temp merge, disable 1 line, SetValid

This commit is contained in:
Your Name
2023-10-12 16:16:44 -07:00
492 changed files with 15533 additions and 9376 deletions

View File

@@ -22,7 +22,7 @@ TEST(ArrayInterface, Initialize) {
HostDeviceVector<size_t> u64_storage(storage.Size());
std::string u64_arr_str{ArrayInterfaceStr(linalg::TensorView<size_t const, 2>{
u64_storage.ConstHostSpan(), {kRows, kCols}, Context::kCpuId})};
u64_storage.ConstHostSpan(), {kRows, kCols}, DeviceOrd::CPU()})};
std::copy(storage.ConstHostVector().cbegin(), storage.ConstHostVector().cend(),
u64_storage.HostSpan().begin());
auto u64_arr = ArrayInterface<2>{u64_arr_str};

View File

@@ -12,6 +12,7 @@
#elif defined(XGBOOST_USE_HIP)
#include "../../../src/data/ellpack_page.hip.h"
#endif
#include "../../../src/data/ellpack_page.h"
#include "../../../src/tree/param.h" // TrainParam
#include "../helpers.h"
#include "../histogram_helpers.h"

View File

@@ -5,8 +5,10 @@
#include <xgboost/data.h>
#if defined(XGBOOST_USE_CUDA)
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
#include "../../../src/data/ellpack_page.cuh"
#elif defined(XGBOOST_USE_HIP)
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
#include "../../../src/data/ellpack_page.hip.h"
#endif
#include "../../../src/data/sparse_page_source.h"
@@ -14,8 +16,7 @@
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
TEST(EllpackPageRawFormat, IO) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
@@ -26,15 +27,17 @@ TEST(EllpackPageRawFormat, IO) {
dmlc::TemporaryDirectory tmpdir;
std::string path = tmpdir.path + "/ellpack.page";
std::size_t n_bytes{0};
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
format->Write(ellpack, fo.get());
n_bytes += format->Write(ellpack, fo.get());
}
}
EllpackPage page;
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
std::unique_ptr<common::AlignedResourceReadStream> fi{
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
format->Read(&page, fi.get());
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
@@ -48,5 +51,4 @@ TEST(EllpackPageRawFormat, IO) {
ASSERT_EQ(loaded->gidx_buffer.HostVector(), orig->gidx_buffer.HostVector());
}
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -26,28 +26,32 @@
#include "xgboost/context.h" // for Context
#include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost {
namespace data {
TEST(GradientIndex, ExternalMemory) {
namespace xgboost::data {
TEST(GradientIndex, ExternalMemoryBaseRowID) {
Context ctx;
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(10000);
auto p_fmat = RandomDataGenerator{4096, 256, 0.5}
.Device(ctx.gpu_id)
.Batches(8)
.GenerateSparsePageDMatrix("cache", true);
std::vector<size_t> base_rowids;
std::vector<float> hessian(dmat->Info().num_row_, 1);
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, true})) {
std::vector<float> hessian(p_fmat->Info().num_row_, 1);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, true})) {
base_rowids.push_back(page.base_rowid);
}
size_t i = 0;
for (auto const &page : dmat->GetBatches<SparsePage>()) {
std::size_t i = 0;
for (auto const &page : p_fmat->GetBatches<SparsePage>()) {
ASSERT_EQ(base_rowids[i], page.base_rowid);
++i;
}
base_rowids.clear();
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, false})) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, false})) {
base_rowids.push_back(page.base_rowid);
}
i = 0;
for (auto const &page : dmat->GetBatches<SparsePage>()) {
for (auto const &page : p_fmat->GetBatches<SparsePage>()) {
ASSERT_EQ(base_rowids[i], page.base_rowid);
++i;
}
@@ -167,11 +171,10 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
ASSERT_TRUE(Xy->SingleColBlock());
bst_bin_t constexpr kBins{17};
auto p = BatchParam{kBins, threshold};
Context gpu_ctx;
gpu_ctx.gpu_id = 0;
auto gpu_ctx = MakeCUDACtx(0);
for (auto const &page : Xy->GetBatches<EllpackPage>(
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p});
from_ellpack = std::make_unique<GHistIndexMatrix>(&ctx, Xy->Info(), page, p);
}
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(&ctx, p)) {
@@ -199,13 +202,15 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
std::string from_sparse_buf;
{
common::MemoryBufferStream fo{&from_sparse_buf};
columns_from_sparse.Write(&fo);
common::AlignedMemWriteStream fo{&from_sparse_buf};
auto n_bytes = columns_from_sparse.Write(&fo);
ASSERT_EQ(fo.Tell(), n_bytes);
}
std::string from_ellpack_buf;
{
common::MemoryBufferStream fo{&from_ellpack_buf};
columns_from_sparse.Write(&fo);
common::AlignedMemWriteStream fo{&from_ellpack_buf};
auto n_bytes = columns_from_sparse.Write(&fo);
ASSERT_EQ(fo.Tell(), n_bytes);
}
ASSERT_EQ(from_sparse_buf, from_ellpack_buf);
}
@@ -228,6 +233,5 @@ INSTANTIATE_TEST_SUITE_P(GHistIndexMatrix, GHistIndexMatrixTest,
std::make_tuple(.5f, .6), // sparse columns
std::make_tuple(.6f, .4))); // dense columns
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
} // namespace data
} // namespace xgboost
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost::data

View File

@@ -2,14 +2,18 @@
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h> // for Context
#include <cstddef> // for size_t
#include <memory> // for unique_ptr
#include "../../../src/common/column_matrix.h"
#include "../../../src/data/gradient_index.h"
#include "../../../src/common/io.h" // for MmapResource, AlignedResourceReadStream...
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#include "../../../src/data/sparse_page_source.h"
#include "../helpers.h"
#include "../helpers.h" // for RandomDataGenerator
namespace xgboost {
namespace data {
namespace xgboost::data {
TEST(GHistIndexPageRawFormat, IO) {
Context ctx;
@@ -20,15 +24,18 @@ TEST(GHistIndexPageRawFormat, IO) {
std::string path = tmpdir.path + "/ghistindex.page";
auto batch = BatchParam{256, 0.5};
std::size_t bytes{0};
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
format->Write(index, fo.get());
bytes += format->Write(index, fo.get());
}
}
GHistIndexMatrix page;
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
std::unique_ptr<common::AlignedResourceReadStream> fi{
std::make_unique<common::PrivateMmapConstStream>(path, 0, bytes)};
format->Read(&page, fi.get());
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
@@ -37,6 +44,8 @@ TEST(GHistIndexPageRawFormat, IO) {
ASSERT_EQ(loaded.cut.MinValues(), page.cut.MinValues());
ASSERT_EQ(loaded.cut.Values(), page.cut.Values());
ASSERT_EQ(loaded.base_rowid, page.base_rowid);
ASSERT_EQ(loaded.row_ptr.size(), page.row_ptr.size());
ASSERT_TRUE(std::equal(loaded.row_ptr.cbegin(), loaded.row_ptr.cend(), page.row_ptr.cbegin()));
ASSERT_EQ(loaded.IsDense(), page.IsDense());
ASSERT_TRUE(std::equal(loaded.index.begin(), loaded.index.end(), page.index.begin()));
ASSERT_TRUE(std::equal(loaded.index.Offset(), loaded.index.Offset() + loaded.index.OffsetSize(),
@@ -45,5 +54,4 @@ TEST(GHistIndexPageRawFormat, IO) {
ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize());
}
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -12,8 +12,7 @@
#include "../helpers.h"
#include "xgboost/data.h" // DMatrix
namespace xgboost {
namespace data {
namespace xgboost::data {
TEST(IterativeDMatrix, Ref) {
Context ctx;
TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(
@@ -21,7 +20,7 @@ TEST(IterativeDMatrix, Ref) {
}
TEST(IterativeDMatrix, IsDense) {
int n_bins = 16;
bst_bin_t n_bins = 16;
auto test = [n_bins](float sparsity) {
NumpyArrayIterForTest iter(sparsity);
auto n_threads = 0;
@@ -38,5 +37,4 @@ TEST(IterativeDMatrix, IsDense) {
test(0.1);
test(1.0);
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -10,6 +10,7 @@
#include "../../../src/data/device_adapter.hip.h"
#include "../../../src/data/ellpack_page.hip.h"
#endif
#include "../../../src/data/ellpack_page.h"
#include "../../../src/data/iterative_dmatrix.h"
#include "../../../src/tree/param.h" // TrainParam
#include "../helpers.h"

View File

@@ -129,8 +129,8 @@ TEST(MetaInfo, SaveLoadBinary) {
EXPECT_EQ(inforead.group_ptr_, info.group_ptr_);
EXPECT_EQ(inforead.weights_.HostVector(), info.weights_.HostVector());
auto orig_margin = info.base_margin_.View(xgboost::Context::kCpuId);
auto read_margin = inforead.base_margin_.View(xgboost::Context::kCpuId);
auto orig_margin = info.base_margin_.View(xgboost::DeviceOrd::CPU());
auto read_margin = inforead.base_margin_.View(xgboost::DeviceOrd::CPU());
EXPECT_TRUE(std::equal(orig_margin.Values().cbegin(), orig_margin.Values().cend(),
read_margin.Values().cbegin()));
@@ -267,8 +267,8 @@ TEST(MetaInfo, Validate) {
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
d_groups.SetDevice(0);
d_groups.DevicePointer(); // pull to device
std::string arr_interface_str{ArrayInterfaceStr(
xgboost::linalg::MakeVec(d_groups.ConstDevicePointer(), d_groups.Size(), 0))};
std::string arr_interface_str{ArrayInterfaceStr(xgboost::linalg::MakeVec(
d_groups.ConstDevicePointer(), d_groups.Size(), xgboost::DeviceOrd::CUDA(0)))};
EXPECT_THROW(info.SetInfo(ctx, "group", xgboost::StringView{arr_interface_str}), dmlc::Error);
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
}
@@ -307,5 +307,5 @@ TEST(MetaInfo, HostExtend) {
}
namespace xgboost {
TEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(Context::kCpuId); }
TEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(DeviceOrd::CPU()); }
} // namespace xgboost

View File

@@ -74,7 +74,7 @@ TEST(MetaInfo, FromInterface) {
}
info.SetInfo(ctx, "base_margin", str.c_str());
auto const h_base_margin = info.base_margin_.View(Context::kCpuId);
auto const h_base_margin = info.base_margin_.View(DeviceOrd::CPU());
ASSERT_EQ(h_base_margin.Size(), d_data.size());
for (size_t i = 0; i < d_data.size(); ++i) {
ASSERT_EQ(h_base_margin(i), d_data[i]);
@@ -92,7 +92,7 @@ TEST(MetaInfo, FromInterface) {
}
TEST(MetaInfo, GPUStridedData) {
TestMetaInfoStridedData(0);
TestMetaInfoStridedData(DeviceOrd::CUDA(0));
}
TEST(MetaInfo, Group) {

View File

@@ -14,10 +14,10 @@
#include "../../../src/data/array_interface.h"
namespace xgboost {
inline void TestMetaInfoStridedData(int32_t device) {
inline void TestMetaInfoStridedData(DeviceOrd device) {
MetaInfo info;
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", std::to_string(device)}});
ctx.UpdateAllowUnknown(Args{{"device", device.Name()}});
{
// labels
linalg::Tensor<float, 3> labels;
@@ -28,9 +28,9 @@ inline void TestMetaInfoStridedData(int32_t device) {
ASSERT_EQ(t_labels.Shape().size(), 2);
info.SetInfo(ctx, "label", StringView{ArrayInterfaceStr(t_labels)});
auto const& h_result = info.labels.View(-1);
auto const& h_result = info.labels.View(DeviceOrd::CPU());
ASSERT_EQ(h_result.Shape().size(), 2);
auto in_labels = labels.View(-1);
auto in_labels = labels.View(DeviceOrd::CPU());
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float& v_0) {
auto tup = linalg::UnravelIndex(i, h_result.Shape());
auto i0 = std::get<0>(tup);
@@ -62,9 +62,9 @@ inline void TestMetaInfoStridedData(int32_t device) {
ASSERT_EQ(t_margin.Shape().size(), 2);
info.SetInfo(ctx, "base_margin", StringView{ArrayInterfaceStr(t_margin)});
auto const& h_result = info.base_margin_.View(-1);
auto const& h_result = info.base_margin_.View(DeviceOrd::CPU());
ASSERT_EQ(h_result.Shape().size(), 2);
auto in_margin = base_margin.View(-1);
auto in_margin = base_margin.View(DeviceOrd::CPU());
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) {
auto tup = linalg::UnravelIndex(i, h_result.Shape());
auto i0 = std::get<0>(tup);

View File

@@ -298,8 +298,8 @@ TEST(SimpleDMatrix, Slice) {
ASSERT_EQ(p_m->Info().weights_.HostVector().at(ridx),
out->Info().weights_.HostVector().at(i));
auto out_margin = out->Info().base_margin_.View(Context::kCpuId);
auto in_margin = margin.View(Context::kCpuId);
auto out_margin = out->Info().base_margin_.View(DeviceOrd::CPU());
auto in_margin = margin.View(DeviceOrd::CPU());
for (size_t j = 0; j < kClasses; ++j) {
ASSERT_EQ(out_margin(i, j), in_margin(ridx, j));
}
@@ -372,8 +372,8 @@ TEST(SimpleDMatrix, SliceCol) {
out->Info().labels_upper_bound_.HostVector().at(i));
ASSERT_EQ(p_m->Info().weights_.HostVector().at(i), out->Info().weights_.HostVector().at(i));
auto out_margin = out->Info().base_margin_.View(Context::kCpuId);
auto in_margin = margin.View(Context::kCpuId);
auto out_margin = out->Info().base_margin_.View(DeviceOrd::CPU());
auto in_margin = margin.View(DeviceOrd::CPU());
for (size_t j = 0; j < kClasses; ++j) {
ASSERT_EQ(out_margin(i, j), in_margin(i, j));
}

View File

@@ -76,9 +76,11 @@ TEST(SparsePageDMatrix, LoadFile) {
// allow caller to retain pages so they can process multiple pages at the same time.
template <typename Page>
void TestRetainPage() {
auto m = CreateSparsePageDMatrix(10000);
std::size_t n_batches = 4;
auto p_fmat = RandomDataGenerator{1024, 128, 0.5f}.Batches(n_batches).GenerateSparsePageDMatrix(
"cache", true);
Context ctx;
auto batches = m->GetBatches<Page>(&ctx);
auto batches = p_fmat->GetBatches<Page>(&ctx);
auto begin = batches.begin();
auto end = batches.end();
@@ -94,7 +96,7 @@ void TestRetainPage() {
}
ASSERT_EQ(pages.back().Size(), (*it).Size());
}
ASSERT_GE(iterators.size(), 2);
ASSERT_GE(iterators.size(), n_batches);
for (size_t i = 0; i < iterators.size(); ++i) {
ASSERT_EQ((*iterators[i]).Size(), pages.at(i).Size());
@@ -102,7 +104,7 @@ void TestRetainPage() {
}
// make sure it's const and the caller can not modify the content of page.
for (auto &page : m->GetBatches<Page>({&ctx})) {
for (auto &page : p_fmat->GetBatches<Page>({&ctx})) {
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
}
}
@@ -248,7 +250,7 @@ auto TestSparsePageDMatrixDeterminism(int32_t threads) {
auto cache_name =
data::MakeId(filename, dynamic_cast<data::SparsePageDMatrix *>(sparse.get())) + ".row.page";
std::string cache = common::LoadSequentialFile(cache_name);
auto cache = common::LoadSequentialFile(cache_name);
return cache;
}
@@ -256,7 +258,7 @@ TEST(SparsePageDMatrix, Determinism) {
#if defined(_MSC_VER)
return;
#endif // defined(_MSC_VER)
std::vector<std::string> caches;
std::vector<std::vector<char>> caches;
for (size_t i = 1; i < 18; i += 2) {
caches.emplace_back(TestSparsePageDMatrixDeterminism(i));
}

View File

@@ -9,6 +9,7 @@
#elif defined(XGBOOST_USE_HIP)
#include "../../../src/data/ellpack_page.hip.h"
#endif
#include "../../../src/data/ellpack_page.h"
#include "../../../src/data/sparse_page_dmatrix.h"
#include "../../../src/tree/param.h" // TrainParam
#include "../filesystem.h" // dmlc::TemporaryDirectory

View File

@@ -2,20 +2,20 @@
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
#include <string> // for char_traits, operator+, basic_string
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
#include <string> // for char_traits, operator+, basic_string
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
#include "../helpers.h" // for RandomDataGenerator
#include "dmlc/filesystem.h" // for TemporaryDirectory
#include "dmlc/io.h" // for SeekStream, Stream
#include "dmlc/io.h" // for Stream
#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST
#include "xgboost/context.h" // for Context
namespace xgboost {
namespace data {
namespace xgboost::data {
template <typename S> void TestSparsePageRawFormat() {
std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>("raw")};
Context ctx;
@@ -25,17 +25,19 @@ template <typename S> void TestSparsePageRawFormat() {
dmlc::TemporaryDirectory tmpdir;
std::string path = tmpdir.path + "/sparse.page";
S orig;
std::size_t n_bytes{0};
{
// block code to flush the stream
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
for (auto const &page : m->GetBatches<S>(&ctx)) {
orig.Push(page);
format->Write(page, fo.get());
n_bytes = format->Write(page, fo.get());
}
}
S page;
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
std::unique_ptr<common::AlignedResourceReadStream> fi{
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
format->Read(&page, fi.get());
for (size_t i = 0; i < orig.data.Size(); ++i) {
ASSERT_EQ(page.data.HostVector()[i].fvalue,
@@ -59,5 +61,4 @@ TEST(SparsePageRawFormat, CSCPage) {
TEST(SparsePageRawFormat, SortedCSCPage) {
TestSparsePageRawFormat<SortedCSCPage>();
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data