merge 23Mar01
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
/*!
|
||||
* Copyright 2020-2021 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../../src/tree/gpu_hist/gradient_based_sampler.cuh"
|
||||
#include "../../../../src/tree/param.h"
|
||||
#include "../../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../../../src/tree/param.h" // TrainParam
|
||||
#include "../../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -31,14 +32,15 @@ void VerifySampling(size_t page_size,
|
||||
}
|
||||
gpair.SetDevice(0);
|
||||
|
||||
BatchParam param{0, 256};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
if (page_size != 0) {
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
}
|
||||
|
||||
GradientBasedSampler sampler(page, kRows, param, subsample, sampling_method);
|
||||
auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get());
|
||||
GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method);
|
||||
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
|
||||
|
||||
if (fixed_size_sampling) {
|
||||
EXPECT_EQ(sample.sample_rows, kRows);
|
||||
@@ -86,12 +88,13 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
||||
auto gpair = GenerateRandomGradients(kRows);
|
||||
gpair.SetDevice(0);
|
||||
|
||||
BatchParam param{0, 256};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
|
||||
GradientBasedSampler sampler(page, kRows, param, kSubsample, TrainParam::kUniform);
|
||||
auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get());
|
||||
GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform);
|
||||
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
|
||||
auto sampled_page = sample.page;
|
||||
EXPECT_EQ(sample.sample_rows, kRows);
|
||||
EXPECT_EQ(sample.gpair.size(), gpair.Size());
|
||||
@@ -103,7 +106,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
||||
ci(buffer.data(), sampled_page->NumSymbols());
|
||||
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto page = batch.Impl();
|
||||
std::vector<common::CompressedByteT> page_buffer(page->gidx_buffer.HostVector());
|
||||
common::CompressedIterator<common::CompressedByteT>
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../../../../src/common/categorical.h"
|
||||
#include "../../../../src/tree/gpu_hist/histogram.cuh"
|
||||
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
|
||||
#include "../../../../src/tree/param.h" // TrainParam
|
||||
#include "../../categorical_helpers.h"
|
||||
#include "../../helpers.h"
|
||||
|
||||
@@ -11,15 +16,15 @@ namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
Context ctx = CreateEmptyGenericParam(0);
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
|
||||
float constexpr kLower = -1e-2, kUpper = 1e2;
|
||||
|
||||
float sparsity = is_dense ? 0.0f : 0.5f;
|
||||
auto matrix = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix();
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
|
||||
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
for (auto const& batch : matrix->GetBatches<EllpackPage>(batch_param)) {
|
||||
for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
|
||||
tree::RowPartitioner row_partitioner(0, kRows);
|
||||
@@ -132,13 +137,13 @@ void ValidateCategoricalHistogram(size_t n_categories, common::Span<GradientPair
|
||||
|
||||
// Test 1 vs rest categorical histogram is equivalent to one hot encoded data.
|
||||
void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
auto ctx = CreateEmptyGenericParam(0);
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows = 340;
|
||||
size_t constexpr kBins = 256;
|
||||
auto x = GenerateRandomCategoricalSingleColumn(kRows, num_categories);
|
||||
auto cat_m = GetDMatrixFromData(x, kRows, 1);
|
||||
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
|
||||
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
|
||||
tree::RowPartitioner row_partitioner(0, kRows);
|
||||
auto ridx = row_partitioner.GetRows(0);
|
||||
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
|
||||
@@ -148,7 +153,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
/**
|
||||
* Generate hist with cat data.
|
||||
*/
|
||||
for (auto const &batch : cat_m->GetBatches<EllpackPage>(batch_param)) {
|
||||
for (auto const &batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
@@ -162,7 +167,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
auto x_encoded = OneHotEncodeFeature(x, num_categories);
|
||||
auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories);
|
||||
dh::device_vector<GradientPairInt64> encode_hist(2 * num_categories);
|
||||
for (auto const &batch : encode_m->GetBatches<EllpackPage>(batch_param)) {
|
||||
for (auto const &batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
|
||||
@@ -41,7 +41,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
|
||||
size_t constexpr kMaxBins = 4;
|
||||
// dense, no missing values
|
||||
GHistIndexMatrix gmat(dmat.get(), kMaxBins, 0.5, false, AllThreadsForTest());
|
||||
GHistIndexMatrix gmat(&ctx, dmat.get(), kMaxBins, 0.5, false);
|
||||
common::RowSetCollection row_set_collection;
|
||||
std::vector<size_t> &row_indices = *row_set_collection.Data();
|
||||
row_indices.resize(kRows);
|
||||
@@ -228,7 +228,7 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>(&ctx, {32, param.sparse_threshold})) {
|
||||
common::HistCollection hist;
|
||||
|
||||
entries.front().nid = 0;
|
||||
|
||||
@@ -25,6 +25,7 @@ void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples
|
||||
} // anonymous namespace
|
||||
|
||||
void TestAddHistRows(bool is_distributed) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
|
||||
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
|
||||
int starting_index = std::numeric_limits<int>::max();
|
||||
@@ -32,9 +33,9 @@ void TestAddHistRows(bool is_distributed) {
|
||||
|
||||
size_t constexpr kNRows = 8, kNCols = 16;
|
||||
int32_t constexpr kMaxBins = 4;
|
||||
auto p_fmat =
|
||||
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
|
||||
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto const &gmat =
|
||||
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
|
||||
|
||||
RegTree tree;
|
||||
|
||||
@@ -73,6 +74,7 @@ TEST(CPUHistogram, AddRows) {
|
||||
void TestSyncHist(bool is_distributed) {
|
||||
size_t constexpr kNRows = 8, kNCols = 16;
|
||||
int32_t constexpr kMaxBins = 4;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
|
||||
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
|
||||
@@ -80,9 +82,9 @@ void TestSyncHist(bool is_distributed) {
|
||||
int sync_count = 0;
|
||||
RegTree tree;
|
||||
|
||||
auto p_fmat =
|
||||
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
|
||||
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto const &gmat =
|
||||
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
|
||||
|
||||
HistogramBuilder<CPUExpandEntry> histogram;
|
||||
uint32_t total_bins = gmat.cut.Ptrs().back();
|
||||
@@ -227,12 +229,15 @@ TEST(CPUHistogram, SyncHist) {
|
||||
void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) {
|
||||
size_t constexpr kNRows = 8, kNCols = 16;
|
||||
int32_t constexpr kMaxBins = 4;
|
||||
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
auto p_fmat =
|
||||
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
if (is_col_split) {
|
||||
p_fmat = std::shared_ptr<DMatrix>{
|
||||
p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
}
|
||||
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
|
||||
auto const &gmat =
|
||||
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
|
||||
uint32_t total_bins = gmat.cut.Ptrs().back();
|
||||
|
||||
static double constexpr kEps = 1e-6;
|
||||
@@ -257,9 +262,9 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
|
||||
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
|
||||
nodes_for_explicit_hist_build.push_back(node);
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
|
||||
histogram.BuildHist(0, gidx, &tree, row_set_collection,
|
||||
nodes_for_explicit_hist_build, {}, gpair, force_read_by_column);
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {kMaxBins, 0.5})) {
|
||||
histogram.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair, force_read_by_column);
|
||||
}
|
||||
|
||||
// Check if number of histogram bins is correct
|
||||
@@ -325,6 +330,8 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
auto x = GenerateRandomCategoricalSingleColumn(kRows, n_categories);
|
||||
auto cat_m = GetDMatrixFromData(x, kRows, 1);
|
||||
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
|
||||
|
||||
RegTree tree;
|
||||
@@ -345,12 +352,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
* Generate hist with cat data.
|
||||
*/
|
||||
HistogramBuilder<CPUExpandEntry> cat_hist;
|
||||
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
|
||||
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
|
||||
auto total_bins = gidx.cut.TotalBins();
|
||||
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
|
||||
cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
|
||||
nodes_for_explicit_hist_build, {}, gpair.HostVector(),
|
||||
force_read_by_column);
|
||||
cat_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair.HostVector(), force_read_by_column);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -359,12 +365,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
auto x_encoded = OneHotEncodeFeature(x, n_categories);
|
||||
auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories);
|
||||
HistogramBuilder<CPUExpandEntry> onehot_hist;
|
||||
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
|
||||
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
|
||||
auto total_bins = gidx.cut.TotalBins();
|
||||
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
|
||||
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair.HostVector(),
|
||||
force_read_by_column);
|
||||
gpair.HostVector(), force_read_by_column);
|
||||
}
|
||||
|
||||
auto cat = cat_hist.Histogram()[0];
|
||||
@@ -382,8 +387,8 @@ TEST(CPUHistogram, Categorical) {
|
||||
}
|
||||
}
|
||||
namespace {
|
||||
void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool force_read_by_column) {
|
||||
Context ctx;
|
||||
void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, bool is_approx,
|
||||
bool force_read_by_column) {
|
||||
size_t constexpr kEntries = 1 << 16;
|
||||
auto m = CreateSparsePageDMatrix(kEntries, "cache");
|
||||
|
||||
@@ -410,7 +415,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
* Multi page
|
||||
*/
|
||||
std::vector<common::RowSetCollection> rows_set;
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
|
||||
CHECK_LT(page.base_rowid, m->Info().num_row_);
|
||||
auto n_rows_in_node = page.Size();
|
||||
partition_size[0] = std::max(partition_size[0], n_rows_in_node);
|
||||
@@ -426,12 +431,12 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
|
||||
256};
|
||||
|
||||
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false);
|
||||
multi_build.Reset(total_bins, batch_param, ctx->Threads(), rows_set.size(), false, false);
|
||||
|
||||
size_t page_idx{0};
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
|
||||
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {},
|
||||
h_gpair, force_read_by_column);
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
|
||||
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {}, h_gpair,
|
||||
force_read_by_column);
|
||||
++page_idx;
|
||||
}
|
||||
ASSERT_EQ(page_idx, 2);
|
||||
@@ -447,16 +452,16 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
common::RowSetCollection row_set_collection;
|
||||
InitRowPartitionForTest(&row_set_collection, n_samples);
|
||||
|
||||
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false);
|
||||
single_build.Reset(total_bins, batch_param, ctx->Threads(), 1, false, false);
|
||||
SparsePage concat;
|
||||
std::vector<float> hess(m->Info().num_row_, 1.0f);
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
concat.Push(page);
|
||||
}
|
||||
|
||||
auto cut = common::SketchOnDMatrix(m.get(), batch_param.max_bin, ctx.Threads(), false, hess);
|
||||
auto cut = common::SketchOnDMatrix(ctx, m.get(), batch_param.max_bin, false, hess);
|
||||
GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false,
|
||||
std::numeric_limits<double>::quiet_NaN(), ctx.Threads());
|
||||
std::numeric_limits<double>::quiet_NaN(), ctx->Threads());
|
||||
single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair, force_read_by_column);
|
||||
single_page = single_build.Histogram()[0];
|
||||
}
|
||||
@@ -470,16 +475,17 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
|
||||
TEST(CPUHistogram, ExternalMemory) {
|
||||
int32_t constexpr kBins = 256;
|
||||
TestHistogramExternalMemory(BatchParam{kBins, common::Span<float>{}, false}, true, false);
|
||||
TestHistogramExternalMemory(BatchParam{kBins, common::Span<float>{}, false}, true, true);
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, false);
|
||||
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, true);
|
||||
|
||||
float sparse_thresh{0.5};
|
||||
TestHistogramExternalMemory({kBins, sparse_thresh}, false, false);
|
||||
TestHistogramExternalMemory({kBins, sparse_thresh}, false, true);
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
|
||||
sparse_thresh = std::numeric_limits<float>::quiet_NaN();
|
||||
TestHistogramExternalMemory({kBins, sparse_thresh}, false, false);
|
||||
TestHistogramExternalMemory({kBins, sparse_thresh}, false, true);
|
||||
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -34,7 +34,7 @@ TEST(Approx, Partitioner) {
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
{
|
||||
auto min_value = page.cut.MinValues()[split_ind];
|
||||
@@ -84,7 +84,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
|
||||
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, *hess, true})) {
|
||||
{
|
||||
RegTree tree;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
|
||||
@@ -133,7 +133,7 @@ TEST(Approx, PartitionerColSplit) {
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
min_value = page.cut.MinValues()[split_ind];
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ void TestLeafPartition(size_t n_samples) {
|
||||
|
||||
std::vector<size_t> h_nptr;
|
||||
float split_value{0};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{64, 0.2})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
split_value = page.cut.Values().at(ptr / 2);
|
||||
|
||||
@@ -218,17 +218,16 @@ TEST(GpuHist, TestHistogramIndex) {
|
||||
TestHistogramIndexImpl();
|
||||
}
|
||||
|
||||
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
size_t gpu_page_size, RegTree* tree,
|
||||
HostDeviceVector<bst_float>* preds, float subsample = 1.0f,
|
||||
const std::string& sampling_method = "uniform",
|
||||
void UpdateTree(Context const* ctx, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
size_t gpu_page_size, RegTree* tree, HostDeviceVector<bst_float>* preds,
|
||||
float subsample = 1.0f, const std::string& sampling_method = "uniform",
|
||||
int max_bin = 2) {
|
||||
|
||||
if (gpu_page_size > 0) {
|
||||
// Loop over the batches and count the records
|
||||
int64_t batch_count = 0;
|
||||
int64_t row_count = 0;
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, max_bin})) {
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>(
|
||||
ctx, BatchParam{max_bin, TrainParam::DftSparseThreshold()})) {
|
||||
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
|
||||
batch_count++;
|
||||
row_count += batch.Size();
|
||||
@@ -249,14 +248,13 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(args);
|
||||
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
tree::GPUHistMaker hist_maker{&ctx, &task};
|
||||
tree::GPUHistMaker hist_maker{ctx, &task};
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
hist_maker.Update(¶m, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
{tree});
|
||||
auto cache = linalg::MakeTensorView(&ctx, preds->DeviceSpan(), preds->Size(), 1);
|
||||
auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1);
|
||||
hist_maker.UpdatePredictionCache(dmat, cache);
|
||||
}
|
||||
|
||||
@@ -274,12 +272,13 @@ TEST(GpuHist, UniformSampling) {
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
|
||||
"uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
|
||||
kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
@@ -303,12 +302,13 @@ TEST(GpuHist, GradientBasedSampling) {
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
|
||||
"gradient_based", kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
@@ -337,12 +337,13 @@ TEST(GpuHist, ExternalMemory) {
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
@@ -374,17 +375,17 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
auto rng = common::GlobalRandom();
|
||||
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod,
|
||||
kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows);
|
||||
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
common::GlobalRandom() = rng;
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext,
|
||||
kSubsample, kSamplingMethod, kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
|
||||
kSamplingMethod, kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
|
||||
@@ -36,7 +36,7 @@ void TestPartitioner(bst_target_t n_targets) {
|
||||
std::vector<ExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
|
||||
auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
|
||||
@@ -15,16 +15,17 @@ class DMatrixForTest : public data::SimpleDMatrix {
|
||||
|
||||
public:
|
||||
using SimpleDMatrix::SimpleDMatrix;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) override {
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx,
|
||||
const BatchParam& param) override {
|
||||
auto backup = this->gradient_index_;
|
||||
auto iter = SimpleDMatrix::GetGradientIndex(param);
|
||||
auto iter = SimpleDMatrix::GetGradientIndex(ctx, param);
|
||||
n_regen_ += (backup != this->gradient_index_);
|
||||
return iter;
|
||||
}
|
||||
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override {
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, const BatchParam& param) override {
|
||||
auto backup = this->ellpack_page_;
|
||||
auto iter = SimpleDMatrix::GetEllpackBatches(param);
|
||||
auto iter = SimpleDMatrix::GetEllpackBatches(ctx, param);
|
||||
n_regen_ += (backup != this->ellpack_page_);
|
||||
return iter;
|
||||
}
|
||||
@@ -50,8 +51,8 @@ class RegenTest : public ::testing::Test {
|
||||
HostDeviceVector<float> storage;
|
||||
auto dense = RandomDataGenerator{kRows, kCols, 0.5}.GenerateArrayInterface(&storage);
|
||||
auto adapter = data::ArrayAdapter(StringView{dense});
|
||||
p_fmat_ = std::shared_ptr<DMatrix>(new DMatrixForTest{
|
||||
&adapter, std::numeric_limits<float>::quiet_NaN(), AllThreadsForTest()});
|
||||
p_fmat_ = std::shared_ptr<DMatrix>(
|
||||
new DMatrixForTest{&adapter, std::numeric_limits<float>::quiet_NaN(), AllThreadsForTest()});
|
||||
|
||||
p_fmat_->Info().labels.Reshape(256, 1);
|
||||
auto labels = p_fmat_->Info().labels.Data();
|
||||
@@ -74,7 +75,7 @@ class RegenTest : public ::testing::Test {
|
||||
auto for_test = dynamic_cast<DMatrixForTest*>(p_fmat_.get());
|
||||
CHECK(for_test);
|
||||
auto backup = for_test->NumRegen();
|
||||
for_test->GetBatches<Page>(BatchParam{});
|
||||
for_test->GetBatches<Page>(p_fmat_->Ctx(), BatchParam{});
|
||||
CHECK_EQ(for_test->NumRegen(), backup);
|
||||
|
||||
if (reset) {
|
||||
|
||||
Reference in New Issue
Block a user