Use Booster context in DMatrix. (#8896)

- Pass context from booster to DMatrix.
- Use context instead of integer for `n_threads`.
- Check the consistency configuration for `max_bin`.
- Test for all combinations of initialization options.
This commit is contained in:
Jiaming Yuan
2023-04-28 21:47:14 +08:00
committed by GitHub
parent 1f9a57d17b
commit 08ce495b5d
67 changed files with 1283 additions and 935 deletions

View File

@@ -14,11 +14,12 @@ TEST(DenseColumn, Test) {
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
BinTypeSize last{kUint8BinsTypeSize};
for (int32_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatrix();
auto sparse_thresh = 0.2;
GHistIndexMatrix gmat{dmat.get(), max_num_bin, sparse_thresh, false, AllThreadsForTest()};
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, sparse_thresh, false};
ColumnMatrix column_matrix;
for (auto const& page : dmat->GetBatches<SparsePage>()) {
column_matrix.InitFromSparse(page, gmat, sparse_thresh, AllThreadsForTest());
@@ -62,9 +63,10 @@ TEST(SparseColumn, Test) {
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (int32_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatrix();
GHistIndexMatrix gmat{dmat.get(), max_num_bin, 0.5f, false, AllThreadsForTest()};
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, 0.5f, false};
ColumnMatrix column_matrix;
for (auto const& page : dmat->GetBatches<SparsePage>()) {
column_matrix.InitFromSparse(page, gmat, 1.0, AllThreadsForTest());
@@ -90,9 +92,10 @@ TEST(DenseColumnWithMissing, Test) {
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (int32_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatrix();
GHistIndexMatrix gmat(dmat.get(), max_num_bin, 0.2, false, AllThreadsForTest());
GHistIndexMatrix gmat(&ctx, dmat.get(), max_num_bin, 0.2, false);
ColumnMatrix column_matrix;
for (auto const& page : dmat->GetBatches<SparsePage>()) {
column_matrix.InitFromSparse(page, gmat, 0.2, AllThreadsForTest());

View File

@@ -156,6 +156,7 @@ TEST(CutsBuilder, SearchGroupInd) {
}
TEST(HistUtil, DenseCutsCategorical) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
int categorical_sizes[] = {2, 6, 8, 12};
int num_bins = 256;
int sizes[] = {25, 100, 1000};
@@ -165,7 +166,7 @@ TEST(HistUtil, DenseCutsCategorical) {
std::vector<float> x_sorted(x);
std::sort(x_sorted.begin(), x_sorted.end());
auto dmat = GetDMatrixFromData(x, n, 1);
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
auto cuts_from_sketch = cuts.Values();
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());
@@ -176,6 +177,7 @@ TEST(HistUtil, DenseCutsCategorical) {
}
TEST(HistUtil, DenseCutsAccuracyTest) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100};
int num_columns = 5;
@@ -183,7 +185,7 @@ TEST(HistUtil, DenseCutsAccuracyTest) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
for (auto num_bins : bin_sizes) {
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -193,6 +195,7 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
int num_columns = 5;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
@@ -200,11 +203,11 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) {
dmat->Info().weights_.HostVector() = w;
for (auto num_bins : bin_sizes) {
{
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), true);
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, true);
ValidateCuts(cuts, dmat.get(), num_bins);
}
{
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), false);
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, false);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -215,6 +218,7 @@ void TestQuantileWithHessian(bool use_sorted) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {1000, 1500};
int num_columns = 5;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
@@ -225,15 +229,13 @@ void TestQuantileWithHessian(bool use_sorted) {
dmat->Info().weights_.HostVector() = w;
for (auto num_bins : bin_sizes) {
HistogramCuts cuts_hess =
SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted, hessian);
HistogramCuts cuts_hess = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted, hessian);
for (size_t i = 0; i < w.size(); ++i) {
dmat->Info().weights_.HostVector()[i] = w[i] * hessian[i];
}
ValidateCuts(cuts_hess, dmat.get(), num_bins);
HistogramCuts cuts_wh =
SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted);
HistogramCuts cuts_wh = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted);
ValidateCuts(cuts_wh, dmat.get(), num_bins);
ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size());
@@ -255,12 +257,13 @@ TEST(HistUtil, DenseCutsExternalMemory) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
int num_columns = 5;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
dmlc::TemporaryDirectory tmpdir;
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir);
for (auto num_bins : bin_sizes) {
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -275,12 +278,12 @@ TEST(HistUtil, IndexBinBound) {
kUint32BinsTypeSize};
size_t constexpr kRows = 100;
size_t constexpr kCols = 10;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
size_t bin_id = 0;
for (auto max_bin : bin_sizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest());
GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize());
}
@@ -300,10 +303,11 @@ TEST(HistUtil, IndexBinData) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2 };
size_t constexpr kRows = 100;
size_t constexpr kCols = 10;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto max_bin : kBinSizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest());
GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);
uint32_t const* offsets = hmat.index.Offset();
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
switch (max_bin) {
@@ -327,10 +331,10 @@ void TestSketchFromWeights(bool with_group) {
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
size_t constexpr kGroups = 10;
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
common::HistogramCuts cuts = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);
MetaInfo info;
Context ctx;
auto& h_weights = info.weights_.HostVector();
if (with_group) {
h_weights.resize(kGroups);
@@ -363,7 +367,7 @@ void TestSketchFromWeights(bool with_group) {
if (with_group) {
m->Info().weights_ = decltype(m->Info().weights_)(); // remove weight
HistogramCuts non_weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
HistogramCuts non_weighted = SketchOnDMatrix(&ctx, m.get(), kBins);
for (size_t i = 0; i < cuts.Values().size(); ++i) {
EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
}
@@ -382,7 +386,7 @@ void TestSketchFromWeights(bool with_group) {
for (size_t i = 0; i < h_weights.size(); ++i) {
h_weights[i] = static_cast<float>(i + 1) / static_cast<float>(kGroups);
}
HistogramCuts weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
HistogramCuts weighted = SketchOnDMatrix(&ctx, m.get(), kBins);
ValidateCuts(weighted, m.get(), kBins);
}
}
@@ -393,11 +397,12 @@ TEST(HistUtil, SketchFromWeights) {
}
TEST(HistUtil, SketchCategoricalFeatures) {
TestCategoricalSketch(1000, 256, 32, false, [](DMatrix* p_fmat, int32_t num_bins) {
return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest());
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
TestCategoricalSketch(1000, 256, 32, false, [&ctx](DMatrix* p_fmat, int32_t num_bins) {
return SketchOnDMatrix(&ctx, p_fmat, num_bins);
});
TestCategoricalSketch(1000, 256, 32, true, [](DMatrix* p_fmat, int32_t num_bins) {
return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest());
TestCategoricalSketch(1000, 256, 32, true, [&ctx](DMatrix* p_fmat, int32_t num_bins) {
return SketchOnDMatrix(&ctx, p_fmat, num_bins);
});
}
} // namespace common

View File

@@ -25,9 +25,9 @@ namespace xgboost {
namespace common {
template <typename AdapterT>
HistogramCuts GetHostCuts(AdapterT *adapter, int num_bins, float missing) {
HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, float missing) {
data::SimpleDMatrix dmat(adapter, missing, 1);
HistogramCuts cuts = SketchOnDMatrix(&dmat, num_bins, AllThreadsForTest());
HistogramCuts cuts = SketchOnDMatrix(ctx, &dmat, num_bins);
return cuts;
}
@@ -39,7 +39,9 @@ TEST(HistUtil, DeviceSketch) {
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
HistogramCuts host_cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
Context ctx;
HistogramCuts host_cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());
@@ -308,7 +310,8 @@ TEST(HistUtil, AdapterDeviceSketch) {
data::CupyAdapter adapter(str);
auto device_cuts = MakeUnweightedCutsForTest(adapter, num_bins, missing);
auto host_cuts = GetHostCuts(&adapter, num_bins, missing);
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing);
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());

View File

@@ -16,7 +16,8 @@ TEST(Quantile, LoadBalance) {
size_t constexpr kRows = 1000, kCols = 100;
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
std::vector<bst_feature_t> cols_ptr;
for (auto const& page : m->GetBatches<SparsePage>()) {
Context ctx;
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
data::SparsePageAdapterBatch adapter{page.GetView()};
cols_ptr = LoadBalance(adapter, page.data.Size(), kCols, 13, [](auto) { return true; });
}
@@ -43,6 +44,7 @@ void PushPage(HostSketchContainer* container, SparsePage const& page, MetaInfo c
template <bool use_column>
void DoTestDistributedQuantile(size_t rows, size_t cols) {
Context ctx;
auto const world = collective::GetWorldSize();
std::vector<MetaInfo> infos(2);
auto& h_weights = infos.front().weights_.HostVector();
@@ -51,7 +53,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
SimpleRealUniformDistribution<float> dist(3, 1000);
std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });
std::vector<bst_row_t> column_size(cols, rows);
size_t n_bins = 64;
bst_bin_t n_bins = 64;
// Generate cuts for distributed environment.
auto sparsity = 0.5f;
@@ -72,15 +74,15 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
std::vector<float> hessian(rows, 1.0);
auto hess = Span<float const>{hessian};
ContainerType<use_column> sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(),
column_size, false, AllThreadsForTest());
ContainerType<use_column> sketch_distributed(
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
if (use_column) {
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
PushPage(&sketch_distributed, page, m->Info(), hess);
}
} else {
for (auto const& page : m->GetBatches<SparsePage>()) {
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
PushPage(&sketch_distributed, page, m->Info(), hess);
}
}
@@ -93,8 +95,8 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
CHECK_EQ(collective::GetWorldSize(), 1);
std::for_each(column_size.begin(), column_size.end(), [=](auto& size) { size *= world; });
m->Info().num_row_ = world * rows;
ContainerType<use_column> sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(),
column_size, false, AllThreadsForTest());
ContainerType<use_column> sketch_on_single_node(
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
m->Info().num_row_ = rows;
for (auto rank = 0; rank < world; ++rank) {
@@ -106,7 +108,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
.Upper(1.0f)
.GenerateDMatrix();
if (use_column) {
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
PushPage(&sketch_on_single_node, page, m->Info(), hess);
}
} else {
@@ -172,6 +174,7 @@ TEST(Quantile, SortedDistributed) {
namespace {
template <bool use_column>
void DoTestColSplitQuantile(size_t rows, size_t cols) {
Context ctx;
auto const world = collective::GetWorldSize();
auto const rank = collective::GetRank();
@@ -204,17 +207,17 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
// Generate cuts for distributed environment.
HistogramCuts distributed_cuts;
{
ContainerType<use_column> sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(),
column_size, false, AllThreadsForTest());
ContainerType<use_column> sketch_distributed(
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
std::vector<float> hessian(rows, 1.0);
auto hess = Span<float const>{hessian};
if (use_column) {
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
PushPage(&sketch_distributed, page, m->Info(), hess);
}
} else {
for (auto const& page : m->GetBatches<SparsePage>()) {
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
PushPage(&sketch_distributed, page, m->Info(), hess);
}
}
@@ -227,17 +230,17 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
CHECK_EQ(collective::GetWorldSize(), 1);
HistogramCuts single_node_cuts;
{
ContainerType<use_column> sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(),
column_size, false, AllThreadsForTest());
ContainerType<use_column> sketch_on_single_node(
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
std::vector<float> hessian(rows, 1.0);
auto hess = Span<float const>{hessian};
if (use_column) {
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
PushPage(&sketch_on_single_node, page, m->Info(), hess);
}
} else {
for (auto const& page : m->GetBatches<SparsePage>()) {
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
PushPage(&sketch_on_single_node, page, m->Info(), hess);
}
}
@@ -299,8 +302,10 @@ namespace {
void TestSameOnAllWorkers() {
auto const world = collective::GetWorldSize();
constexpr size_t kRows = 1000, kCols = 100;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
RunWithSeedsAndBins(
kRows, [=](int32_t seed, size_t n_bins, MetaInfo const&) {
kRows, [=, &ctx](int32_t seed, size_t n_bins, MetaInfo const&) {
auto rank = collective::GetRank();
HostDeviceVector<float> storage;
std::vector<FeatureType> ft(kCols);
@@ -314,7 +319,7 @@ void TestSameOnAllWorkers() {
.MaxCategory(17)
.Seed(rank + seed)
.GenerateDMatrix();
auto cuts = SketchOnDMatrix(m.get(), n_bins, AllThreadsForTest());
auto cuts = SketchOnDMatrix(&ctx, m.get(), n_bins);
std::vector<float> cut_values(cuts.Values().size() * world, 0);
std::vector<
typename std::remove_reference_t<decltype(cuts.Ptrs())>::value_type>

View File

@@ -1,17 +1,17 @@
/*!
* Copyright 2019-2020 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <xgboost/base.h>
#include <utility>
#include "../helpers.h"
#include "../histogram_helpers.h"
#include "gtest/gtest.h"
#include "../../../src/common/categorical.h"
#include "../../../src/common/hist_util.h"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/tree/param.h" // TrainParam
#include "../helpers.h"
#include "../histogram_helpers.h"
#include "gtest/gtest.h"
namespace xgboost {
@@ -19,7 +19,10 @@ TEST(EllpackPage, EmptyDMatrix) {
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;
constexpr float kSparsity = 0;
auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatrix();
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin}).begin();
Context ctx{MakeCUDACtx(0)};
auto& page = *dmat->GetBatches<EllpackPage>(
&ctx, BatchParam{kMaxBin, tree::TrainParam::DftSparseThreshold()})
.begin();
auto impl = page.Impl();
ASSERT_EQ(impl->row_stride, 0);
ASSERT_EQ(impl->Cuts().TotalBins(), 0);
@@ -87,8 +90,9 @@ TEST(EllpackPage, FromCategoricalBasic) {
auto& h_ft = m->Info().feature_types.HostVector();
h_ft.resize(kCols, FeatureType::kCategorical);
BatchParam p{0, max_bins};
auto ellpack = EllpackPage(m.get(), p);
Context ctx{MakeCUDACtx(0)};
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
auto ellpack = EllpackPage(&ctx, m.get(), p);
auto accessor = ellpack.Impl()->GetDeviceAccessor(0);
ASSERT_EQ(kCats, accessor.NumBins());
@@ -142,8 +146,9 @@ TEST(EllpackPage, Copy) {
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
@@ -151,7 +156,7 @@ TEST(EllpackPage, Copy) {
// Copy batch pages into the result page.
size_t offset = 0;
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
size_t num_elements = result.Copy(0, batch.Impl(), offset);
offset += num_elements;
}
@@ -161,7 +166,7 @@ TEST(EllpackPage, Copy) {
thrust::device_vector<bst_float> row_result_d(kCols);
std::vector<bst_float> row(kCols);
std::vector<bst_float> row_result(kCols);
for (auto& page : dmat->GetBatches<EllpackPage>(param)) {
for (auto& page : dmat->GetBatches<EllpackPage>(&ctx, param)) {
auto impl = page.Impl();
EXPECT_EQ(impl->base_rowid, current_row);
@@ -186,10 +191,11 @@ TEST(EllpackPage, Compact) {
// Create a DMatrix with multiple batches.
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
std::unique_ptr<DMatrix> dmat(
CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
@@ -201,7 +207,7 @@ TEST(EllpackPage, Compact) {
SIZE_MAX};
thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
result.Compact(0, batch.Impl(), row_indexes_span);
}
@@ -210,7 +216,7 @@ TEST(EllpackPage, Compact) {
thrust::device_vector<bst_float> row_result_d(kCols);
std::vector<bst_float> row(kCols);
std::vector<bst_float> row_result(kCols);
for (auto& page : dmat->GetBatches<EllpackPage>(param)) {
for (auto& page : dmat->GetBatches<EllpackPage>(&ctx, param)) {
auto impl = page.Impl();
ASSERT_EQ(impl->base_rowid, current_row);
@@ -245,15 +251,17 @@ class EllpackPageTest : public testing::TestWithParam<float> {
// device.
size_t n_samples{128}, n_features{13};
Context ctx;
ctx.gpu_id = 0;
Context gpu_ctx{MakeCUDACtx(0)};
auto Xy = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true);
std::unique_ptr<EllpackPageImpl> from_ghist;
ASSERT_TRUE(Xy->SingleColBlock());
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(BatchParam{17, 0.6})) {
from_ghist.reset(new EllpackPageImpl{&ctx, page, {}});
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{17, 0.6})) {
from_ghist.reset(new EllpackPageImpl{&gpu_ctx, page, {}});
}
for (auto const& page : Xy->GetBatches<EllpackPage>(BatchParam{0, 17})) {
for (auto const& page : Xy->GetBatches<EllpackPage>(
&gpu_ctx, BatchParam{17, tree::TrainParam::DftSparseThreshold()})) {
auto from_sparse_page = page.Impl();
ASSERT_EQ(from_sparse_page->is_dense, from_ghist->is_dense);
ASSERT_EQ(from_sparse_page->base_rowid, 0);

View File

@@ -1,17 +1,21 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/sparse_page_source.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../../../src/tree/param.h" // TrainParam
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost {
namespace data {
TEST(EllpackPageRawFormat, IO) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
std::unique_ptr<SparsePageFormat<EllpackPage>> format{CreatePageFormat<EllpackPage>("raw")};
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
@@ -20,7 +24,7 @@ TEST(EllpackPageRawFormat, IO) {
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
for (auto const &ellpack : m->GetBatches<EllpackPage>({0, 256})) {
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
format->Write(ellpack, fo.get());
}
}
@@ -29,7 +33,7 @@ TEST(EllpackPageRawFormat, IO) {
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
format->Read(&page, fi.get());
for (auto const &ellpack : m->GetBatches<EllpackPage>({0, 256})) {
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
auto loaded = page.Impl();
auto orig = ellpack.Impl();
ASSERT_EQ(loaded->Cuts().Ptrs(), orig->Cuts().Ptrs());

View File

@@ -2,20 +2,38 @@
* Copyright 2021-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include <xgboost/data.h> // for BatchIterator, BatchSet, DMatrix, BatchParam
#include "../../../src/common/column_matrix.h"
#include "../../../src/common/io.h" // MemoryBufferStream
#include "../../../src/data/gradient_index.h"
#include "../helpers.h"
#include <algorithm> // for sort, unique
#include <cmath> // for isnan
#include <cstddef> // for size_t
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr, __shared_ptr_access, unique_ptr
#include <string> // for string
#include <tuple> // for make_tuple, tie, tuple
#include <utility> // for move
#include <vector> // for vector
#include "../../../src/common/categorical.h" // for AsCat
#include "../../../src/common/column_matrix.h" // for ColumnMatrix
#include "../../../src/common/hist_util.h" // for Index, HistogramCuts, SketchOnDMatrix
#include "../../../src/common/io.h" // for MemoryBufferStream
#include "../../../src/data/adapter.h" // for SparsePageAdapterBatch
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h" // for CreateEmptyGenericParam, GenerateRandomCa...
#include "xgboost/base.h" // for bst_bin_t
#include "xgboost/context.h" // for Context
#include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost {
namespace data {
TEST(GradientIndex, ExternalMemory) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(10000);
std::vector<size_t> base_rowids;
std::vector<float> hessian(dmat->Info().num_row_, 1);
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>({64, hessian, true})) {
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, true})) {
base_rowids.push_back(page.base_rowid);
}
size_t i = 0;
@@ -24,9 +42,8 @@ TEST(GradientIndex, ExternalMemory) {
++i;
}
base_rowids.clear();
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>({64, hessian, false})) {
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, false})) {
base_rowids.push_back(page.base_rowid);
}
i = 0;
@@ -41,12 +58,13 @@ TEST(GradientIndex, FromCategoricalBasic) {
size_t max_bins = 8;
auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);
auto m = GetDMatrixFromData(x, kRows, 1);
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
auto &h_ft = m->Info().feature_types.HostVector();
h_ft.resize(kCols, FeatureType::kCategorical);
BatchParam p(max_bins, 0.8);
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
GHistIndexMatrix gidx(&ctx, m.get(), max_bins, p.sparse_thresh, false, {});
auto x_copy = x;
std::sort(x_copy.begin(), x_copy.end());
@@ -80,11 +98,11 @@ TEST(GradientIndex, FromCategoricalLarge) {
BatchParam p{max_bins, 0.8};
{
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
GHistIndexMatrix gidx{&ctx, m.get(), max_bins, p.sparse_thresh, false, {}};
ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize);
}
{
for (auto const &page : m->GetBatches<GHistIndexMatrix>(p)) {
for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, p)) {
common::HistogramCuts cut = page.cut;
GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins};
ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats);
@@ -96,10 +114,11 @@ TEST(GradientIndex, PushBatch) {
size_t constexpr kRows = 64, kCols = 4;
bst_bin_t max_bins = 64;
float st = 0.5;
Context ctx;
auto test = [&](float sparisty) {
auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true);
auto cuts = common::SketchOnDMatrix(m.get(), max_bins, AllThreadsForTest(), false, {});
auto cuts = common::SketchOnDMatrix(&ctx, m.get(), max_bins, false, {});
common::HistogramCuts copy_cuts = cuts;
ASSERT_EQ(m->Info().num_row_, kRows);
@@ -112,7 +131,7 @@ TEST(GradientIndex, PushBatch) {
m->Info().num_row_);
gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits<float>::quiet_NaN(), 0);
}
for (auto const &page : m->GetBatches<GHistIndexMatrix>(BatchParam{max_bins, st})) {
for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{max_bins, st})) {
for (size_t i = 0; i < kRows; ++i) {
for (size_t j = 0; j < kCols; ++j) {
auto v0 = gmat.GetFvalue(i, j, false);
@@ -143,17 +162,19 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
// device.
size_t n_samples{128}, n_features{13};
Context ctx;
ctx.gpu_id = 0;
auto Xy = RandomDataGenerator{n_samples, n_features, 1 - density}.GenerateDMatrix(true);
std::unique_ptr<GHistIndexMatrix> from_ellpack;
ASSERT_TRUE(Xy->SingleColBlock());
bst_bin_t constexpr kBins{17};
auto p = BatchParam{kBins, threshold};
for (auto const &page : Xy->GetBatches<EllpackPage>(BatchParam{0, kBins})) {
Context gpu_ctx;
gpu_ctx.gpu_id = 0;
for (auto const &page : Xy->GetBatches<EllpackPage>(
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p});
}
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(p)) {
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(&ctx, p)) {
ASSERT_EQ(from_sparse_page.IsDense(), from_ellpack->IsDense());
ASSERT_EQ(from_sparse_page.base_rowid, 0);
ASSERT_EQ(from_sparse_page.base_rowid, from_ellpack->base_rowid);

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
@@ -11,6 +11,8 @@
namespace xgboost {
namespace data {
TEST(GHistIndexPageRawFormat, IO) {
Context ctx;
std::unique_ptr<SparsePageFormat<GHistIndexMatrix>> format{
CreatePageFormat<GHistIndexMatrix>("raw")};
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
@@ -20,7 +22,7 @@ TEST(GHistIndexPageRawFormat, IO) {
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
for (auto const &index : m->GetBatches<GHistIndexMatrix>(batch)) {
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
format->Write(index, fo.get());
}
}
@@ -29,7 +31,7 @@ TEST(GHistIndexPageRawFormat, IO) {
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
format->Read(&page, fi.get());
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(batch)) {
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
auto const &loaded = gidx;
ASSERT_EQ(loaded.cut.Ptrs(), page.cut.Ptrs());
ASSERT_EQ(loaded.cut.MinValues(), page.cut.MinValues());
@@ -43,5 +45,5 @@ TEST(GHistIndexPageRawFormat, IO) {
ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize());
}
}
} // namespace data
} // namespace xgboost
} // namespace data
} // namespace xgboost

View File

@@ -15,8 +15,9 @@
namespace xgboost {
namespace data {
TEST(IterativeDMatrix, Ref) {
Context ctx;
TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(
[&](GHistIndexMatrix const& page) { return page.cut; });
&ctx, [&](GHistIndexMatrix const& page) { return page.cut; });
}
TEST(IterativeDMatrix, IsDense) {

View File

@@ -1,11 +1,12 @@
/*!
* Copyright 2020-2022 XGBoost contributors
/**
* Copyright 2020-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../../../src/data/device_adapter.cuh"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/iterative_dmatrix.h"
#include "../../../src/tree/param.h" // TrainParam
#include "../helpers.h"
#include "test_iterative_dmatrix.h"
@@ -13,15 +14,17 @@ namespace xgboost {
namespace data {
void TestEquivalent(float sparsity) {
Context ctx{MakeCUDACtx(0)};
CudaArrayIterForTest iter{sparsity};
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, 256);
size_t offset = 0;
auto first = (*m.GetEllpackBatches({}).begin()).Impl();
std::size_t offset = 0;
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
std::unique_ptr<EllpackPageImpl> page_concatenated {
new EllpackPageImpl(0, first->Cuts(), first->is_dense,
first->row_stride, 1000 * 100)};
for (auto& batch : m.GetBatches<EllpackPage>({})) {
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
auto page = batch.Impl();
size_t num_elements = page_concatenated->Copy(0, page, offset);
offset += num_elements;
@@ -34,8 +37,8 @@ void TestEquivalent(float sparsity) {
auto adapter = CupyAdapter(interface_str);
std::unique_ptr<DMatrix> dm{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
BatchParam bp {0, 256};
for (auto& ellpack : dm->GetBatches<EllpackPage>(bp)) {
auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
@@ -92,7 +95,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
std::numeric_limits<float>::quiet_NaN(), 0, 256);
size_t n_batches = 0;
std::string interface_str = iter.AsArray();
for (auto& ellpack : m.GetBatches<EllpackPage>({})) {
Context ctx{MakeCUDACtx(0)};
for (auto& ellpack : m.GetBatches<EllpackPage>(&ctx, {})) {
n_batches ++;
auto impl = ellpack.Impl();
common::CompressedIterator<uint32_t> iterator(
@@ -140,7 +144,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, 256);
auto &ellpack = *m.GetBatches<EllpackPage>({0, 256}).begin();
auto ctx = MakeCUDACtx(0);
auto& ellpack =
*m.GetBatches<EllpackPage>(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()})
.begin();
auto impl = ellpack.Impl();
common::CompressedIterator<uint32_t> iterator(
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
@@ -171,8 +178,9 @@ TEST(IterativeDeviceDMatrix, IsDense) {
}
TEST(IterativeDeviceDMatrix, Ref) {
Context ctx{MakeCUDACtx(0)};
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
[](EllpackPage const& page) { return page.Impl()->Cuts(); });
&ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
}
} // namespace data
} // namespace xgboost

View File

@@ -1,8 +1,11 @@
/*!
* Copyright 2022 XGBoost contributors
/**
* Copyright 2022-2023, XGBoost contributors
*/
#pragma once
#include <memory> // std::make_shared
#include <xgboost/context.h> // for Context
#include <limits> // for numeric_limits
#include <memory> // for make_shared
#include "../../../src/data/iterative_dmatrix.h"
#include "../helpers.h"
@@ -10,7 +13,7 @@
namespace xgboost {
namespace data {
template <typename Page, typename Iter, typename Cuts>
void TestRefDMatrix(Cuts&& get_cuts) {
void TestRefDMatrix(Context const* ctx, Cuts&& get_cuts) {
int n_bins = 256;
Iter iter(0.3, 2048);
auto m = std::make_shared<IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
@@ -20,8 +23,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
auto m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), m, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
for (auto const& page_0 : m->template GetBatches<Page>({})) {
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {
for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {
auto const& cuts_0 = get_cuts(page_0);
auto const& cuts_1 = get_cuts(page_1);
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
@@ -32,8 +35,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
for (auto const& page_0 : m->template GetBatches<Page>({})) {
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {
for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {
auto const& cuts_0 = get_cuts(page_0);
auto const& cuts_1 = get_cuts(page_1);
ASSERT_NE(cuts_0.Values(), cuts_1.Values());
@@ -45,8 +48,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true);
auto dqm = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), dm, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
for (auto const& page_0 : dm->template GetBatches<Page>({})) {
for (auto const& page_1 : dqm->template GetBatches<Page>({})) {
for (auto const& page_0 : dm->template GetBatches<Page>(ctx, {})) {
for (auto const& page_1 : dqm->template GetBatches<Page>(ctx, {})) {
auto const& cuts_0 = get_cuts(page_0);
auto const& cuts_1 = get_cuts(page_1);
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());

View File

@@ -61,6 +61,7 @@ TEST(SimpleDMatrix, RowAccess) {
}
TEST(SimpleDMatrix, ColAccessWithoutBatches) {
Context ctx;
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
@@ -70,7 +71,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
// Loop over the batches and assert the data is as expected
int64_t num_col_batch = 0;
for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {
num_col_batch += 1;
EXPECT_EQ(batch.Size(), dmat->Info().num_col_)
<< "Expected batch size = number of cells as #batches is 1.";

View File

@@ -23,7 +23,7 @@ std::string UriSVM(std::string name, std::string cache) {
} // namespace
template <typename Page>
void TestSparseDMatrixLoadFile() {
void TestSparseDMatrixLoadFile(Context const* ctx) {
dmlc::TemporaryDirectory tmpdir;
auto opath = tmpdir.path + "/1-based.svm";
CreateBigTestData(opath, 3 * 64, false);
@@ -48,7 +48,7 @@ void TestSparseDMatrixLoadFile() {
data::SimpleDMatrix simple{&adapter, std::numeric_limits<float>::quiet_NaN(),
1};
Page out;
for (auto const& page : m.GetBatches<Page>()) {
for (auto const &page : m.GetBatches<Page>(ctx)) {
if (std::is_same<Page, SparsePage>::value) {
out.Push(page);
} else {
@@ -58,7 +58,7 @@ void TestSparseDMatrixLoadFile() {
ASSERT_EQ(m.Info().num_col_, simple.Info().num_col_);
ASSERT_EQ(m.Info().num_row_, simple.Info().num_row_);
for (auto const& page : simple.GetBatches<Page>()) {
for (auto const& page : simple.GetBatches<Page>(ctx)) {
ASSERT_EQ(page.offset.HostVector(), out.offset.HostVector());
for (size_t i = 0; i < page.data.Size(); ++i) {
ASSERT_EQ(page.data.HostVector()[i].fvalue, out.data.HostVector()[i].fvalue);
@@ -67,16 +67,18 @@ void TestSparseDMatrixLoadFile() {
}
TEST(SparsePageDMatrix, LoadFile) {
TestSparseDMatrixLoadFile<SparsePage>();
TestSparseDMatrixLoadFile<CSCPage>();
TestSparseDMatrixLoadFile<SortedCSCPage>();
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
TestSparseDMatrixLoadFile<SparsePage>(&ctx);
TestSparseDMatrixLoadFile<CSCPage>(&ctx);
TestSparseDMatrixLoadFile<SortedCSCPage>(&ctx);
}
// allow caller to retain pages so they can process multiple pages at the same time.
template <typename Page>
void TestRetainPage() {
auto m = CreateSparsePageDMatrix(10000);
auto batches = m->GetBatches<Page>();
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
auto batches = m->GetBatches<Page>(&ctx);
auto begin = batches.begin();
auto end = batches.end();
@@ -100,7 +102,7 @@ void TestRetainPage() {
}
// make sure it's const and the caller can not modify the content of page.
for (auto& page : m->GetBatches<Page>()) {
for (auto &page : m->GetBatches<Page>({&ctx})) {
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
}
}
@@ -143,10 +145,11 @@ TEST(SparsePageDMatrix, ColAccess) {
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file));
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
// Loop over the batches and assert the data is as expected
size_t iter = 0;
for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {
auto col_page = col_batch.GetView();
ASSERT_EQ(col_page.Size(), dmat->Info().num_col_);
if (iter == 1) {
@@ -164,7 +167,7 @@ TEST(SparsePageDMatrix, ColAccess) {
// Loop over the batches and assert the data is as expected
iter = 0;
for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>()) {
for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {
auto col_page = col_batch.GetView();
EXPECT_EQ(col_page.Size(), dmat->Info().num_col_);
if (iter == 0) {
@@ -182,9 +185,9 @@ TEST(SparsePageDMatrix, ColAccess) {
TEST(SparsePageDMatrix, ThreadSafetyException) {
size_t constexpr kEntriesPerCol = 3;
size_t constexpr kEntries = 64 * kEntriesPerCol * 2;
Context ctx;
std::unique_ptr<xgboost::DMatrix> dmat =
xgboost::CreateSparsePageDMatrix(kEntries);
std::unique_ptr<xgboost::DMatrix> dmat = xgboost::CreateSparsePageDMatrix(kEntries);
int threads = 1000;
@@ -221,7 +224,8 @@ TEST(SparsePageDMatrix, ColAccessBatches) {
// Create multiple sparse pages
std::unique_ptr<xgboost::DMatrix> dmat{xgboost::CreateSparsePageDMatrix(kEntries)};
ASSERT_EQ(dmat->Ctx()->Threads(), AllThreadsForTest());
for (auto const &page : dmat->GetBatches<xgboost::CSCPage>()) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto const &page : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {
ASSERT_EQ(dmat->Info().num_col_, page.Size());
}
}

View File

@@ -1,15 +1,20 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include <xgboost/data.h> // for DMatrix
#include "../../../src/common/compressed_iterator.h"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/sparse_page_dmatrix.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../../../src/tree/param.h" // TrainParam
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost {
TEST(SparsePageDMatrix, EllpackPage) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
@@ -17,7 +22,7 @@ TEST(SparsePageDMatrix, EllpackPage) {
// Loop over the batches and assert the data is as expected
size_t n = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256})) {
for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
n += batch.Size();
}
EXPECT_EQ(n, dmat->Info().num_row_);
@@ -37,6 +42,8 @@ TEST(SparsePageDMatrix, EllpackPage) {
}
TEST(SparsePageDMatrix, MultipleEllpackPages) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
@@ -46,7 +53,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
// Loop over the batches and count the records
int64_t batch_count = 0;
int64_t row_count = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256})) {
for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
batch_count++;
row_count += batch.Size();
@@ -61,8 +68,11 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
}
TEST(SparsePageDMatrix, RetainEllpackPage) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()};
auto m = CreateSparsePageDMatrix(10000);
auto batches = m->GetBatches<EllpackPage>({0, 32});
auto batches = m->GetBatches<EllpackPage>(&ctx, param);
auto begin = batches.begin();
auto end = batches.end();
@@ -87,7 +97,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
}
// make sure it's const and the caller can not modify the content of page.
for (auto& page : m->GetBatches<EllpackPage>({0, 32})) {
for (auto& page : m->GetBatches<EllpackPage>(&ctx, param)) {
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
}
@@ -98,6 +108,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
}
TEST(SparsePageDMatrix, EllpackPageContent) {
auto ctx = CreateEmptyGenericParam(0);
constexpr size_t kRows = 6;
constexpr size_t kCols = 2;
constexpr size_t kPageSize = 1;
@@ -110,8 +121,8 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, 2};
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()};
auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_EQ(impl->base_rowid, 0);
EXPECT_EQ(impl->n_rows, kRows);
EXPECT_FALSE(impl->is_dense);
@@ -120,7 +131,7 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
std::unique_ptr<EllpackPageImpl> impl_ext;
size_t offset = 0;
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(param)) {
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
if (!impl_ext) {
impl_ext.reset(new EllpackPageImpl(
batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
@@ -170,8 +181,9 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, kMaxBins};
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};
auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_EQ(impl->base_rowid, 0);
EXPECT_EQ(impl->n_rows, kRows);
@@ -180,7 +192,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
thrust::device_vector<bst_float> row_ext_d(kCols);
std::vector<bst_float> row(kCols);
std::vector<bst_float> row_ext(kCols);
for (auto& page : dmat_ext->GetBatches<EllpackPage>(param)) {
for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
auto impl_ext = page.Impl();
EXPECT_EQ(impl_ext->base_rowid, current_row);
@@ -211,10 +223,11 @@ TEST(SparsePageDMatrix, EllpackPageMultipleLoops) {
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, kMaxBins};
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};
size_t current_row = 0;
for (auto& page : dmat_ext->GetBatches<EllpackPage>(param)) {
for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
auto impl_ext = page.Impl();
EXPECT_EQ(impl_ext->base_rowid, current_row);
current_row += impl_ext->n_rows;

View File

@@ -1,17 +1,24 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
#include "../../../src/data/sparse_page_source.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
#include <string> // for char_traits, operator+, basic_string
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
#include "../helpers.h" // for RandomDataGenerator
#include "dmlc/filesystem.h" // for TemporaryDirectory
#include "dmlc/io.h" // for SeekStream, Stream
#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST
#include "xgboost/context.h" // for Context
namespace xgboost {
namespace data {
template <typename S> void TestSparsePageRawFormat() {
std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>("raw")};
Context ctx;
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
ASSERT_TRUE(m->SingleColBlock());
@@ -21,7 +28,7 @@ template <typename S> void TestSparsePageRawFormat() {
{
// block code to flush the stream
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
for (auto const &page : m->GetBatches<S>()) {
for (auto const &page : m->GetBatches<S>(&ctx)) {
orig.Push(page);
format->Write(page, fo.get());
}

View File

@@ -388,6 +388,11 @@ inline Context CreateEmptyGenericParam(int gpu_id) {
return tparam;
}
/**
* \brief Make a context that uses CUDA.
*/
inline Context MakeCUDACtx(std::int32_t device) { return Context{}.MakeCUDA(device); }
inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_rows,
float lower= 0.0f, float upper = 1.0f) {
xgboost::SimpleLCG gen;

View File

@@ -203,7 +203,11 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
learner->Save(&mem_out);
ASSERT_EQ(model_at_kiter, serialised_model_tmp);
learner->SetParam("gpu_id", "0");
for (auto const& [key, value] : args) {
if (key == "tree_method" && value == "gpu_hist") {
learner->SetParam("gpu_id", "0");
}
}
// Pull data to device
for (auto &batch : p_dmat->GetBatches<SparsePage>()) {
batch.data.SetDevice(0);

View File

@@ -1,12 +1,13 @@
/*!
* Copyright 2020-2021 by XGBoost Contributors
/**
* Copyright 2020-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include "../../../../src/data/ellpack_page.cuh"
#include "../../../../src/tree/gpu_hist/gradient_based_sampler.cuh"
#include "../../../../src/tree/param.h"
#include "../../filesystem.h" // dmlc::TemporaryDirectory
#include "../../../../src/tree/param.h" // TrainParam
#include "../../filesystem.h" // dmlc::TemporaryDirectory
#include "../../helpers.h"
namespace xgboost {
@@ -31,14 +32,15 @@ void VerifySampling(size_t page_size,
}
gpair.SetDevice(0);
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
if (page_size != 0) {
EXPECT_NE(page->n_rows, kRows);
}
GradientBasedSampler sampler(page, kRows, param, subsample, sampling_method);
auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get());
GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
if (fixed_size_sampling) {
EXPECT_EQ(sample.sample_rows, kRows);
@@ -86,12 +88,13 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
auto gpair = GenerateRandomGradients(kRows);
gpair.SetDevice(0);
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_NE(page->n_rows, kRows);
GradientBasedSampler sampler(page, kRows, param, kSubsample, TrainParam::kUniform);
auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get());
GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
auto sampled_page = sample.page;
EXPECT_EQ(sample.sample_rows, kRows);
EXPECT_EQ(sample.gpair.size(), gpair.Size());
@@ -103,7 +106,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
ci(buffer.data(), sampled_page->NumSymbols());
size_t offset = 0;
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
auto page = batch.Impl();
std::vector<common::CompressedByteT> page_buffer(page->gidx_buffer.HostVector());
common::CompressedIterator<common::CompressedByteT>

View File

@@ -1,9 +1,14 @@
/**
* Copyright 2020-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <vector>
#include "../../../../src/common/categorical.h"
#include "../../../../src/tree/gpu_hist/histogram.cuh"
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
#include "../../../../src/tree/param.h" // TrainParam
#include "../../categorical_helpers.h"
#include "../../helpers.h"
@@ -11,15 +16,15 @@ namespace xgboost {
namespace tree {
void TestDeterministicHistogram(bool is_dense, int shm_size) {
Context ctx = CreateEmptyGenericParam(0);
Context ctx = MakeCUDACtx(0);
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
float constexpr kLower = -1e-2, kUpper = 1e2;
float sparsity = is_dense ? 0.0f : 0.5f;
auto matrix = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix();
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
for (auto const& batch : matrix->GetBatches<EllpackPage>(batch_param)) {
for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {
auto* page = batch.Impl();
tree::RowPartitioner row_partitioner(0, kRows);
@@ -114,13 +119,13 @@ void ValidateCategoricalHistogram(size_t n_categories, common::Span<GradientPair
// Test 1 vs rest categorical histogram is equivalent to one hot encoded data.
void TestGPUHistogramCategorical(size_t num_categories) {
auto ctx = CreateEmptyGenericParam(0);
auto ctx = MakeCUDACtx(0);
size_t constexpr kRows = 340;
size_t constexpr kBins = 256;
auto x = GenerateRandomCategoricalSingleColumn(kRows, num_categories);
auto cat_m = GetDMatrixFromData(x, kRows, 1);
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
tree::RowPartitioner row_partitioner(0, kRows);
auto ridx = row_partitioner.GetRows(0);
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
@@ -130,7 +135,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
/**
* Generate hist with cat data.
*/
for (auto const &batch : cat_m->GetBatches<EllpackPage>(batch_param)) {
for (auto const &batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
auto* page = batch.Impl();
FeatureGroups single_group(page->Cuts());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
@@ -144,7 +149,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
auto x_encoded = OneHotEncodeFeature(x, num_categories);
auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories);
dh::device_vector<GradientPairInt64> encode_hist(2 * num_categories);
for (auto const &batch : encode_m->GetBatches<EllpackPage>(batch_param)) {
for (auto const &batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
auto* page = batch.Impl();
FeatureGroups single_group(page->Cuts());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),

View File

@@ -41,7 +41,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
size_t constexpr kMaxBins = 4;
// dense, no missing values
GHistIndexMatrix gmat(dmat.get(), kMaxBins, 0.5, false, AllThreadsForTest());
GHistIndexMatrix gmat(&ctx, dmat.get(), kMaxBins, 0.5, false);
common::RowSetCollection row_set_collection;
std::vector<size_t> &row_indices = *row_set_collection.Data();
row_indices.resize(kRows);
@@ -228,7 +228,7 @@ auto CompareOneHotAndPartition(bool onehot) {
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
std::vector<CPUExpandEntry> entries(1);
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>(&ctx, {32, param.sparse_threshold})) {
common::HistCollection hist;
entries.front().nid = 0;

View File

@@ -25,6 +25,7 @@ void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples
} // anonymous namespace
void TestAddHistRows(bool is_distributed) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
int starting_index = std::numeric_limits<int>::max();
@@ -32,9 +33,9 @@ void TestAddHistRows(bool is_distributed) {
size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4;
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat =
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
RegTree tree;
@@ -73,6 +74,7 @@ TEST(CPUHistogram, AddRows) {
void TestSyncHist(bool is_distributed) {
size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
@@ -80,9 +82,9 @@ void TestSyncHist(bool is_distributed) {
int sync_count = 0;
RegTree tree;
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat =
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
HistogramBuilder<CPUExpandEntry> histogram;
uint32_t total_bins = gmat.cut.Ptrs().back();
@@ -227,12 +229,15 @@ TEST(CPUHistogram, SyncHist) {
void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) {
size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4;
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
if (is_col_split) {
p_fmat = std::shared_ptr<DMatrix>{
p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
}
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
auto const &gmat =
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
uint32_t total_bins = gmat.cut.Ptrs().back();
static double constexpr kEps = 1e-6;
@@ -257,9 +262,9 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
nodes_for_explicit_hist_build.push_back(node);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
histogram.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {}, gpair, force_read_by_column);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {kMaxBins, 0.5})) {
histogram.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair, force_read_by_column);
}
// Check if number of histogram bins is correct
@@ -325,6 +330,8 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
auto x = GenerateRandomCategoricalSingleColumn(kRows, n_categories);
auto cat_m = GetDMatrixFromData(x, kRows, 1);
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
RegTree tree;
@@ -345,12 +352,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
* Generate hist with cat data.
*/
HistogramBuilder<CPUExpandEntry> cat_hist;
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {}, gpair.HostVector(),
force_read_by_column);
cat_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair.HostVector(), force_read_by_column);
}
/**
@@ -359,12 +365,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
auto x_encoded = OneHotEncodeFeature(x, n_categories);
auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories);
HistogramBuilder<CPUExpandEntry> onehot_hist;
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair.HostVector(),
force_read_by_column);
gpair.HostVector(), force_read_by_column);
}
auto cat = cat_hist.Histogram()[0];
@@ -382,8 +387,8 @@ TEST(CPUHistogram, Categorical) {
}
}
namespace {
void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool force_read_by_column) {
Context ctx;
void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, bool is_approx,
bool force_read_by_column) {
size_t constexpr kEntries = 1 << 16;
auto m = CreateSparsePageDMatrix(kEntries, "cache");
@@ -410,7 +415,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
* Multi page
*/
std::vector<common::RowSetCollection> rows_set;
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
CHECK_LT(page.base_rowid, m->Info().num_row_);
auto n_rows_in_node = page.Size();
partition_size[0] = std::max(partition_size[0], n_rows_in_node);
@@ -426,12 +431,12 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
256};
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false);
multi_build.Reset(total_bins, batch_param, ctx->Threads(), rows_set.size(), false, false);
size_t page_idx{0};
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {},
h_gpair, force_read_by_column);
for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {}, h_gpair,
force_read_by_column);
++page_idx;
}
ASSERT_EQ(page_idx, 2);
@@ -447,16 +452,16 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
common::RowSetCollection row_set_collection;
InitRowPartitionForTest(&row_set_collection, n_samples);
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false);
single_build.Reset(total_bins, batch_param, ctx->Threads(), 1, false, false);
SparsePage concat;
std::vector<float> hess(m->Info().num_row_, 1.0f);
for (auto const& page : m->GetBatches<SparsePage>()) {
concat.Push(page);
}
auto cut = common::SketchOnDMatrix(m.get(), batch_param.max_bin, ctx.Threads(), false, hess);
auto cut = common::SketchOnDMatrix(ctx, m.get(), batch_param.max_bin, false, hess);
GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false,
std::numeric_limits<double>::quiet_NaN(), ctx.Threads());
std::numeric_limits<double>::quiet_NaN(), ctx->Threads());
single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair, force_read_by_column);
single_page = single_build.Histogram()[0];
}
@@ -470,16 +475,17 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
TEST(CPUHistogram, ExternalMemory) {
int32_t constexpr kBins = 256;
TestHistogramExternalMemory(BatchParam{kBins, common::Span<float>{}, false}, true, false);
TestHistogramExternalMemory(BatchParam{kBins, common::Span<float>{}, false}, true, true);
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, false);
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, true);
float sparse_thresh{0.5};
TestHistogramExternalMemory({kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory({kBins, sparse_thresh}, false, true);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
sparse_thresh = std::numeric_limits<float>::quiet_NaN();
TestHistogramExternalMemory({kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory({kBins, sparse_thresh}, false, true);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
}
} // namespace tree
} // namespace xgboost

View File

@@ -34,7 +34,7 @@ TEST(Approx, Partitioner) {
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {
bst_feature_t const split_ind = 0;
{
auto min_value = page.cut.MinValues()[split_ind];
@@ -84,7 +84,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
Context ctx;
ctx.InitAllowUnknown(Args{});
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, *hess, true})) {
{
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
@@ -133,7 +133,7 @@ TEST(Approx, PartitionerColSplit) {
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {
bst_feature_t const split_ind = 0;
min_value = page.cut.MinValues()[split_ind];

View File

@@ -43,7 +43,7 @@ void TestLeafPartition(size_t n_samples) {
std::vector<size_t> h_nptr;
float split_value{0};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{64, 0.2})) {
bst_feature_t const split_ind = 0;
auto ptr = page.cut.Ptrs()[split_ind + 1];
split_value = page.cut.Values().at(ptr / 2);

View File

@@ -208,17 +208,16 @@ TEST(GpuHist, TestHistogramIndex) {
TestHistogramIndexImpl();
}
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
size_t gpu_page_size, RegTree* tree,
HostDeviceVector<bst_float>* preds, float subsample = 1.0f,
const std::string& sampling_method = "uniform",
void UpdateTree(Context const* ctx, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
size_t gpu_page_size, RegTree* tree, HostDeviceVector<bst_float>* preds,
float subsample = 1.0f, const std::string& sampling_method = "uniform",
int max_bin = 2) {
if (gpu_page_size > 0) {
// Loop over the batches and count the records
int64_t batch_count = 0;
int64_t row_count = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, max_bin})) {
for (const auto& batch : dmat->GetBatches<EllpackPage>(
ctx, BatchParam{max_bin, TrainParam::DftSparseThreshold()})) {
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
batch_count++;
row_count += batch.Size();
@@ -239,14 +238,13 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
TrainParam param;
param.UpdateAllowUnknown(args);
Context ctx(CreateEmptyGenericParam(0));
ObjInfo task{ObjInfo::kRegression};
tree::GPUHistMaker hist_maker{&ctx, &task};
tree::GPUHistMaker hist_maker{ctx, &task};
std::vector<HostDeviceVector<bst_node_t>> position(1);
hist_maker.Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
{tree});
auto cache = linalg::MakeTensorView(&ctx, preds->DeviceSpan(), preds->Size(), 1);
auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1);
hist_maker.UpdatePredictionCache(dmat, cache);
}
@@ -264,12 +262,13 @@ TEST(GpuHist, UniformSampling) {
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
Context ctx(CreateEmptyGenericParam(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
"uniform", kRows);
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
kRows);
// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();
@@ -293,12 +292,13 @@ TEST(GpuHist, GradientBasedSampling) {
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
Context ctx(CreateEmptyGenericParam(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
"gradient_based", kRows);
// Make sure the predictions are the same.
@@ -327,12 +327,13 @@ TEST(GpuHist, ExternalMemory) {
// Build a tree using the in-memory DMatrix.
RegTree tree;
Context ctx(CreateEmptyGenericParam(0));
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
// Build another tree using multiple ELLPACK pages.
RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();
@@ -364,17 +365,17 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
// Build a tree using the in-memory DMatrix.
auto rng = common::GlobalRandom();
Context ctx(CreateEmptyGenericParam(0));
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod,
kRows);
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows);
// Build another tree using multiple ELLPACK pages.
common::GlobalRandom() = rng;
RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext,
kSubsample, kSamplingMethod, kRows);
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
kSamplingMethod, kRows);
// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();

View File

@@ -36,7 +36,7 @@ void TestPartitioner(bst_target_t n_targets) {
std::vector<ExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64);
for (auto const& page : Xy->GetBatches<SparsePage>()) {
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());

View File

@@ -15,16 +15,17 @@ class DMatrixForTest : public data::SimpleDMatrix {
public:
using SimpleDMatrix::SimpleDMatrix;
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) override {
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx,
const BatchParam& param) override {
auto backup = this->gradient_index_;
auto iter = SimpleDMatrix::GetGradientIndex(param);
auto iter = SimpleDMatrix::GetGradientIndex(ctx, param);
n_regen_ += (backup != this->gradient_index_);
return iter;
}
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override {
BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, const BatchParam& param) override {
auto backup = this->ellpack_page_;
auto iter = SimpleDMatrix::GetEllpackBatches(param);
auto iter = SimpleDMatrix::GetEllpackBatches(ctx, param);
n_regen_ += (backup != this->ellpack_page_);
return iter;
}
@@ -50,8 +51,8 @@ class RegenTest : public ::testing::Test {
HostDeviceVector<float> storage;
auto dense = RandomDataGenerator{kRows, kCols, 0.5}.GenerateArrayInterface(&storage);
auto adapter = data::ArrayAdapter(StringView{dense});
p_fmat_ = std::shared_ptr<DMatrix>(new DMatrixForTest{
&adapter, std::numeric_limits<float>::quiet_NaN(), AllThreadsForTest()});
p_fmat_ = std::shared_ptr<DMatrix>(
new DMatrixForTest{&adapter, std::numeric_limits<float>::quiet_NaN(), AllThreadsForTest()});
p_fmat_->Info().labels.Reshape(256, 1);
auto labels = p_fmat_->Info().labels.Data();
@@ -74,7 +75,7 @@ class RegenTest : public ::testing::Test {
auto for_test = dynamic_cast<DMatrixForTest*>(p_fmat_.get());
CHECK(for_test);
auto backup = for_test->NumRegen();
for_test->GetBatches<Page>(BatchParam{});
for_test->GetBatches<Page>(p_fmat_->Ctx(), BatchParam{});
CHECK_EQ(for_test->NumRegen(), backup);
if (reset) {

View File

@@ -18,6 +18,7 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cupy())
def test_dmatrix_feature_weights(self) -> None:
import cupy as cp
rng = cp.random.RandomState(1994)
data = rng.randn(5, 5)
m = xgb.DMatrix(data)
@@ -26,23 +27,91 @@ class TestQuantileDMatrix:
m.set_info(feature_weights=feature_weights)
cp.testing.assert_array_equal(
cp.array(m.get_float_info('feature_weights')),
feature_weights.astype(np.float32))
cp.array(m.get_float_info("feature_weights")),
feature_weights.astype(np.float32),
)
@pytest.mark.skipif(**tm.no_cupy())
def test_dmatrix_cupy_init(self) -> None:
import cupy as cp
data = cp.random.randn(5, 5)
xgb.QuantileDMatrix(data, cp.ones(5, dtype=np.float64))
@pytest.mark.parametrize(
"on_device,tree_method",
[(True, "hist"), (False, "gpu_hist"), (False, "hist"), (True, "gpu_hist")],
)
def test_initialization(self, on_device: bool, tree_method: str) -> None:
n_samples, n_features, max_bin = 64, 3, 16
X, y, w = tm.make_batches(
n_samples,
n_features=n_features,
n_batches=1,
use_cupy=on_device,
)
# Init SparsePage
Xy = xgb.DMatrix(X[0], y[0], weight=w[0])
# Init GIDX/Ellpack
xgb.train(
{"tree_method": tree_method, "max_bin": max_bin},
Xy,
num_boost_round=1,
)
# query cuts from GIDX/Ellpack
qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
tm.predictor_equal(Xy, qXy)
with pytest.raises(ValueError, match="Inconsistent"):
# max_bin changed.
xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)
# No error, DMatrix can be modified for different training session.
xgb.train(
{"tree_method": tree_method, "max_bin": max_bin - 1},
Xy,
num_boost_round=1,
)
# Init Ellpack/GIDX
Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin)
# Init GIDX/Ellpack
xgb.train(
{"tree_method": tree_method, "max_bin": max_bin},
Xy,
num_boost_round=1,
)
# query cuts from GIDX/Ellpack
qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
tm.predictor_equal(Xy, qXy)
with pytest.raises(ValueError, match="Inconsistent"):
# max_bin changed.
xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)
Xy = xgb.DMatrix(X[0], y[0], weight=w[0])
booster0 = xgb.train(
{"tree_method": "hist", "max_bin": max_bin, "max_depth": 4},
Xy,
num_boost_round=1,
)
booster1 = xgb.train(
{"tree_method": "gpu_hist", "max_bin": max_bin, "max_depth": 4},
Xy,
num_boost_round=1,
)
qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
predt0 = booster0.predict(qXy)
predt1 = booster1.predict(qXy)
np.testing.assert_allclose(predt0, predt1)
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.parametrize(
"tree_method,max_bin", [
("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64)
]
"tree_method,max_bin",
[("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64)],
)
def test_interoperability(self, tree_method: str, max_bin: int) -> None:
import cupy as cp
n_samples = 64
n_features = 3
X, y, w = tm.make_batches(
@@ -75,6 +144,7 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cupy())
def test_metainfo(self) -> None:
import cupy as cp
rng = cp.random.RandomState(1994)
rows = 10
@@ -98,6 +168,7 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cudf())
def test_ref_dmatrix(self) -> None:
import cupy as cp
rng = cp.random.RandomState(1994)
self.cputest.run_ref_dmatrix(rng, "gpu_hist", False)
@@ -158,5 +229,6 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cupy())
def test_check_inf(self) -> None:
import cupy as cp
rng = cp.random.default_rng(1994)
check_inf(rng)

View File

@@ -153,12 +153,18 @@ class TestGPUUpdaters:
tm.dataset_strategy
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
def test_gpu_hist_device_dmatrix(
self, param: dict, num_rounds: int, dataset: tm.TestDataset
) -> None:
# We cannot handle empty dataset yet
assume(len(dataset.y) > 0)
param['tree_method'] = 'gpu_hist'
param = dataset.set_params(param)
result = train_result(param, dataset.get_device_dmat(), num_rounds)
result = train_result(
param,
dataset.get_device_dmat(max_bin=param.get("max_bin", None)),
num_rounds
)
note(result)
assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)