Merge branch 'master' into dev-hui

This commit is contained in:
amdsc21
2023-03-08 00:39:33 +01:00
221 changed files with 3122 additions and 1486 deletions

View File

@@ -9,12 +9,14 @@
#include "../../../../src/tree/hist/evaluate_splits.h"
#include "../test_evaluate_splits.h"
#include "../../helpers.h"
#include "xgboost/context.h" // Context
namespace xgboost {
namespace tree {
void TestEvaluateSplits(bool force_read_by_column) {
Context ctx;
ctx.nthread = 4;
int static constexpr kRows = 8, kCols = 16;
int32_t n_threads = std::min(omp_get_max_threads(), 4);
auto sampler = std::make_shared<common::ColumnSampler>();
TrainParam param;
@@ -22,7 +24,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
auto evaluator = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
common::HistCollection hist;
std::vector<GradientPair> row_gpairs = {
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
@@ -86,13 +88,15 @@ TEST(HistEvaluator, Evaluate) {
}
TEST(HistEvaluator, Apply) {
Context ctx;
ctx.nthread = 4;
RegTree tree;
int static constexpr kNRows = 8, kNCols = 16;
TrainParam param;
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator_ = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), 4, sampler};
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
CPUExpandEntry entry{0, 0, 10.0f};
entry.split.left_sum = GradStats{0.4, 0.6f};
@@ -115,10 +119,11 @@ TEST(HistEvaluator, Apply) {
}
TEST_F(TestPartitionBasedSplit, CPUHist) {
Context ctx;
// check the evaluator is returning the optimal split
std::vector<FeatureType> ft{FeatureType::kCategorical};
auto sampler = std::make_shared<common::ColumnSampler>();
HistEvaluator<CPUExpandEntry> evaluator{param_, info_, AllThreadsForTest(), sampler};
HistEvaluator<CPUExpandEntry> evaluator{&ctx, &param_, info_, sampler};
evaluator.InitRoot(GradStats{total_gpair_});
RegTree tree;
std::vector<CPUExpandEntry> entries(1);
@@ -128,6 +133,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
namespace {
auto CompareOneHotAndPartition(bool onehot) {
Context ctx;
int static constexpr kRows = 128, kCols = 1;
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
@@ -147,8 +153,7 @@ auto CompareOneHotAndPartition(bool onehot) {
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator =
HistEvaluator<CPUExpandEntry>{param, dmat->Info(), AllThreadsForTest(), sampler};
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
std::vector<CPUExpandEntry> entries(1);
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
@@ -198,8 +203,8 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
MetaInfo info;
info.num_col_ = 1;
info.feature_types = {FeatureType::kCategorical};
auto evaluator =
HistEvaluator<CPUExpandEntry>{param_, info, AllThreadsForTest(), sampler};
Context ctx;
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param_, info, sampler};
evaluator.InitRoot(GradStats{parent_sum_});
std::vector<CPUExpandEntry> entries(1);

View File

@@ -48,7 +48,7 @@ void TestAddHistRows(bool is_distributed) {
HistogramBuilder<CPUExpandEntry> histogram_builder;
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
is_distributed);
is_distributed, false);
histogram_builder.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
nodes_for_subtraction_trick_, &tree);
@@ -86,7 +86,7 @@ void TestSyncHist(bool is_distributed) {
HistogramBuilder<CPUExpandEntry> histogram;
uint32_t total_bins = gmat.cut.Ptrs().back();
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed, false);
common::RowSetCollection row_set_collection_;
{
@@ -226,11 +226,14 @@ TEST(CPUHistogram, SyncHist) {
TestSyncHist(false);
}
void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) {
size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4;
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
if (is_col_split) {
p_fmat = std::shared_ptr<DMatrix>{
p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
}
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
uint32_t total_bins = gmat.cut.Ptrs().back();
@@ -241,7 +244,8 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
bst_node_t nid = 0;
HistogramBuilder<CPUExpandEntry> histogram;
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed,
is_col_split);
RegTree tree;
@@ -284,11 +288,16 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
}
TEST(CPUHistogram, BuildHist) {
TestBuildHistogram(true, false);
TestBuildHistogram(false, false);
TestBuildHistogram(true, true);
TestBuildHistogram(false, true);
TestBuildHistogram(true, false, false);
TestBuildHistogram(false, false, false);
TestBuildHistogram(true, true, false);
TestBuildHistogram(false, true, false);
}
TEST(CPUHistogram, BuildHistColSplit) {
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, true, true);
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, false, true);
}
namespace {
@@ -340,7 +349,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
HistogramBuilder<CPUExpandEntry> cat_hist;
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {}, gpair.HostVector(),
force_read_by_column);
@@ -354,7 +363,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
HistogramBuilder<CPUExpandEntry> onehot_hist;
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair.HostVector(),
force_read_by_column);
@@ -419,7 +428,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
256};
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false);
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false);
size_t page_idx{0};
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
@@ -440,7 +449,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
common::RowSetCollection row_set_collection;
InitRowPartitionForTest(&row_set_collection, n_samples);
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false);
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false);
SparsePage concat;
std::vector<float> hess(m->Info().num_row_, 1.0f);
for (auto const& page : m->GetBatches<SparsePage>()) {

View File

@@ -10,29 +10,36 @@
namespace xgboost {
namespace tree {
TEST(Approx, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
Context ctx;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
ctx.InitAllowUnknown(Args{});
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
namespace {
std::vector<float> GenerateHess(size_t n_samples) {
auto grad = GenerateRandomGradients(n_samples);
std::vector<float> hess(grad.Size());
std::transform(grad.HostVector().cbegin(), grad.HostVector().cend(), hess.begin(),
[](auto gpair) { return gpair.GetHess(); });
return hess;
}
} // anonymous namespace
TEST(Approx, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
bst_feature_t const split_ind = 0;
{
auto min_value = page.cut.MinValues()[split_ind];
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
@@ -40,7 +47,7 @@ TEST(Approx, Partitioner) {
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto ptr = page.cut.Ptrs()[split_ind + 1];
float split_value = page.cut.Values().at(ptr / 2);
RegTree tree;
@@ -66,12 +73,85 @@ TEST(Approx, Partitioner) {
}
}
namespace {
void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared_ptr<DMatrix> Xy,
std::vector<float>* hess, float min_value, float mid_value,
CommonRowPartitioner const& expected_mid_partitioner) {
auto dmat =
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
Context ctx;
ctx.InitAllowUnknown(Args{});
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
{
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
ASSERT_EQ(partitioner[1].Size(), 0);
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
RegTree tree;
GetSplit(&tree, mid_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
auto left_nidx = tree[RegTree::kRoot].LeftChild();
auto elem = partitioner[left_nidx];
ASSERT_LT(elem.Size(), n_samples);
ASSERT_GT(elem.Size(), 1);
auto expected_elem = expected_mid_partitioner[left_nidx];
ASSERT_EQ(elem.Size(), expected_elem.Size());
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
ASSERT_EQ(*it, *eit);
}
auto right_nidx = tree[RegTree::kRoot].RightChild();
elem = partitioner[right_nidx];
expected_elem = expected_mid_partitioner[right_nidx];
ASSERT_EQ(elem.Size(), expected_elem.Size());
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
ASSERT_EQ(*it, *eit);
}
}
}
}
} // anonymous namespace
TEST(Approx, PartitionerColSplit) {
size_t n_samples = 1024, n_features = 16, base_rowid = 0;
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
float min_value, mid_value;
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
bst_feature_t const split_ind = 0;
min_value = page.cut.MinValues()[split_ind];
auto ptr = page.cut.Ptrs()[split_ind + 1];
mid_value = page.cut.Values().at(ptr / 2);
RegTree tree;
GetSplit(&tree, mid_value, &candidates);
mid_partitioner.UpdatePosition(&ctx, page, candidates, &tree);
}
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
&hess, min_value, mid_value, mid_partitioner);
}
namespace {
void TestLeafPartition(size_t n_samples) {
size_t const n_features = 2, base_rowid = 0;
Context ctx;
common::RowSetCollection row_set;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2022 by XGBoost Contributors
/**
* Copyright 2022-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
@@ -12,8 +12,7 @@
#include "../../../src/tree/split_evaluator.h"
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
/**
* \brief Enumerate all possible partitions for categorical split.
*/
@@ -151,5 +150,4 @@ class TestCategoricalSplitWithMissing : public testing::Test {
ASSERT_EQ(right_sum.GetHess(), parent_sum_.GetHess() - left_sum.GetHess());
}
};
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2022 XGBoost contributors
/**
* Copyright 2017-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <thrust/device_vector.h>
@@ -13,6 +13,7 @@
#include "../../../src/common/common.h"
#include "../../../src/data/sparse_page_source.h"
#include "../../../src/tree/constraints.cuh"
#include "../../../src/tree/param.h" // for TrainParam
#include "../../../src/tree/updater_gpu_common.cuh"
#include "../../../src/tree/updater_gpu_hist.cu"
#include "../filesystem.h" // dmlc::TemporaryDirectory
@@ -21,8 +22,7 @@
#include "xgboost/context.h"
#include "xgboost/json.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
TEST(GpuHist, DeviceHistogram) {
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
dh::safe_cuda(cudaSetDevice(0));
@@ -83,11 +83,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
int const kNRows = 16, kNCols = 8;
TrainParam param;
std::vector<std::pair<std::string, std::string>> args {
{"max_depth", "6"},
{"max_leaves", "0"},
Args args{
{"max_depth", "6"},
{"max_leaves", "0"},
};
param.Init(args);
auto page = BuildEllpackPage(kNRows, kNCols);
BatchParam batch_param{};
Context ctx{CreateEmptyGenericParam(0)};
@@ -168,7 +169,6 @@ void TestHistogramIndexImpl() {
int constexpr kNRows = 1000, kNCols = 10;
// Build 2 matrices and build a histogram maker with that
Context ctx(CreateEmptyGenericParam(0));
tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}},
hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}};
@@ -179,15 +179,14 @@ void TestHistogramIndexImpl() {
std::unique_ptr<DMatrix> hist_maker_ext_dmat(
CreateSparsePageDMatrixWithRC(kNRows, kNCols, 128UL, true, tempdir));
std::vector<std::pair<std::string, std::string>> training_params = {
{"max_depth", "10"},
{"max_leaves", "0"}
};
Args training_params = {{"max_depth", "10"}, {"max_leaves", "0"}};
TrainParam param;
param.UpdateAllowUnknown(training_params);
hist_maker.Configure(training_params);
hist_maker.InitDataOnce(hist_maker_dmat.get());
hist_maker.InitDataOnce(&param, hist_maker_dmat.get());
hist_maker_ext.Configure(training_params);
hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());
hist_maker_ext.InitDataOnce(&param, hist_maker_ext_dmat.get());
// Extract the device maker from the histogram makers and from that its compressed
// histogram index
@@ -237,13 +236,15 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
{"subsample", std::to_string(subsample)},
{"sampling_method", sampling_method},
};
TrainParam param;
param.UpdateAllowUnknown(args);
Context ctx(CreateEmptyGenericParam(0));
tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}};
hist_maker.Configure(args);
std::vector<HostDeviceVector<bst_node_t>> position(1);
hist_maker.Update(gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position}, {tree});
hist_maker.Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
{tree});
auto cache = linalg::VectorView<float>{preds->DeviceSpan(), {preds->Size()}, 0};
hist_maker.UpdatePredictionCache(dmat, cache);
}
@@ -391,13 +392,11 @@ TEST(GpuHist, ConfigIO) {
Json j_updater { Object() };
updater->SaveConfig(&j_updater);
ASSERT_TRUE(IsA<Object>(j_updater["gpu_hist_train_param"]));
ASSERT_TRUE(IsA<Object>(j_updater["train_param"]));
updater->LoadConfig(j_updater);
Json j_updater_roundtrip { Object() };
updater->SaveConfig(&j_updater_roundtrip);
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["gpu_hist_train_param"]));
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["train_param"]));
ASSERT_EQ(j_updater, j_updater_roundtrip);
}
@@ -414,5 +413,4 @@ TEST(GpuHist, MaxDepth) {
ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error);
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,33 +1,42 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols){
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
}
TEST(GrowHistMaker, InteractionConstraint) {
size_t constexpr kRows = 32;
size_t constexpr kCols = 16;
Context ctx;
auto p_dmat = RandomDataGenerator{kRows, kCols, 0.6f}.Seed(3).GenerateDMatrix();
HostDeviceVector<GradientPair> gradients (kRows);
std::vector<GradientPair>& h_gradients = gradients.HostVector();
std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(std::size_t rows) {
auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows);
auto& h_gradients = p_gradients->HostVector();
xgboost::SimpleLCG gen;
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
for (size_t i = 0; i < kRows; ++i) {
bst_float grad = dist(&gen);
bst_float hess = dist(&gen);
h_gradients[i] = GradientPair(grad, hess);
for (std::size_t i = 0; i < rows; ++i) {
auto grad = dist(&gen);
auto hess = dist(&gen);
h_gradients[i] = GradientPair{grad, hess};
}
return p_gradients;
}
TEST(GrowHistMaker, InteractionConstraint)
{
auto constexpr kRows = 32;
auto constexpr kCols = 16;
auto p_dmat = GenerateDMatrix(kRows, kCols);
auto p_gradients = GenerateGradients(kRows);
Context ctx;
{
// With constraints
RegTree tree;
@@ -35,11 +44,11 @@ TEST(GrowHistMaker, InteractionConstraint) {
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
updater->Configure(Args{
{"interaction_constraints", "[[0, 1]]"},
{"num_feature", std::to_string(kCols)}});
TrainParam param;
param.UpdateAllowUnknown(
Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gradients, p_dmat.get(), position, {&tree});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});
ASSERT_EQ(tree.NumExtraNodes(), 4);
ASSERT_EQ(tree[0].SplitIndex(), 1);
@@ -54,9 +63,10 @@ TEST(GrowHistMaker, InteractionConstraint) {
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
updater->Configure(Args{{"num_feature", std::to_string(kCols)}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gradients, p_dmat.get(), position, {&tree});
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});
ASSERT_EQ(tree.NumExtraNodes(), 10);
ASSERT_EQ(tree[0].SplitIndex(), 1);
@@ -66,5 +76,53 @@ TEST(GrowHistMaker, InteractionConstraint) {
}
}
} // namespace tree
} // namespace xgboost
namespace {
void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
auto p_dmat = GenerateDMatrix(rows, cols);
auto p_gradients = GenerateGradients(rows);
Context ctx;
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
std::vector<HostDeviceVector<bst_node_t>> position(1);
std::unique_ptr<DMatrix> sliced{
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
RegTree tree;
tree.param.num_feature = cols;
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
EXPECT_EQ(tree.NumExtraNodes(), 10);
EXPECT_EQ(tree[0].SplitIndex(), 1);
EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
EXPECT_EQ(tree, expected_tree);
}
} // anonymous namespace
TEST(GrowHistMaker, ColumnSplit) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
RegTree expected_tree;
expected_tree.param.num_feature = kCols;
{
auto p_dmat = GenerateDMatrix(kRows, kCols);
auto p_gradients = GenerateGradients(kRows);
Context ctx;
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&expected_tree});
}
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree));
}
} // namespace xgboost::tree

View File

@@ -7,6 +7,7 @@
#include <memory>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
@@ -75,9 +76,11 @@ class TestPredictionCache : public ::testing::Test {
RegTree tree;
std::vector<RegTree *> trees{&tree};
auto gpair = GenerateRandomGradients(n_samples_);
updater->Configure(Args{{"max_bin", "64"}});
tree::TrainParam param;
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gpair, Xy_.get(), position, trees);
updater->Update(&param, &gpair, Xy_.get(), position, trees);
HostDeviceVector<float> out_prediction_cached;
out_prediction_cached.SetDevice(ctx.gpu_id);
out_prediction_cached.Resize(n_samples_);

View File

@@ -1,28 +1,26 @@
/*!
* Copyright 2018-2019 by Contributors
/**
* Copyright 2018-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/tree_updater.h>
#include <xgboost/learner.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <memory>
#include <xgboost/tree_updater.h>
#include <memory>
#include <string>
#include <vector>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
TEST(Updater, Prune) {
int constexpr kCols = 16;
std::vector<std::pair<std::string, std::string>> cfg;
cfg.emplace_back(std::pair<std::string, std::string>("num_feature",
std::to_string(kCols)));
cfg.emplace_back(std::pair<std::string, std::string>(
"min_split_loss", "10"));
cfg.emplace_back("num_feature", std::to_string(kCols));
cfg.emplace_back("min_split_loss", "10");
// These data are just place holders.
HostDeviceVector<GradientPair> gpair =
@@ -38,28 +36,30 @@ TEST(Updater, Prune) {
tree.param.UpdateAllowUnknown(cfg);
std::vector<RegTree*> trees {&tree};
// prepare pruner
TrainParam param;
param.UpdateAllowUnknown(cfg);
std::unique_ptr<TreeUpdater> pruner(
TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression}));
pruner->Configure(cfg);
// loss_chg < min_split_loss;
std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
pruner->Update(&gpair, p_dmat.get(), position, trees);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 0);
// loss_chg > min_split_loss;
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
pruner->Update(&gpair, p_dmat.get(), position, trees);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
// loss_chg == min_split_loss;
tree.Stat(0).loss_chg = 10;
pruner->Update(&gpair, p_dmat.get(), position, trees);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
@@ -73,20 +73,20 @@ TEST(Updater, Prune) {
0, 0.5f, true, 0.3, 0.4, 0.5,
/*loss_chg=*/19.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
cfg.emplace_back(std::make_pair("max_depth", "1"));
pruner->Configure(cfg);
pruner->Update(&gpair, p_dmat.get(), position, trees);
cfg.emplace_back("max_depth", "1");
param.UpdateAllowUnknown(cfg);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
tree.ExpandNode(tree[0].LeftChild(),
0, 0.5f, true, 0.3, 0.4, 0.5,
/*loss_chg=*/18.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
cfg.emplace_back(std::make_pair("min_split_loss", "0"));
pruner->Configure(cfg);
pruner->Update(&gpair, p_dmat.get(), position, trees);
cfg.emplace_back("min_split_loss", "0");
param.UpdateAllowUnknown(cfg);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -23,7 +23,7 @@ TEST(QuantileHist, Partitioner) {
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
@@ -41,7 +41,7 @@ TEST(QuantileHist, Partitioner) {
{
auto min_value = gmat.cut.MinValues()[split_ind];
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
@@ -49,7 +49,7 @@ TEST(QuantileHist, Partitioner) {
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
float split_value = gmat.cut.Values().at(ptr / 2);
RegTree tree;

View File

@@ -1,14 +1,15 @@
/*!
* Copyright 2018-2019 by Contributors
/**
* Copyright 2018-2013 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/tree_updater.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <memory>
#include <string>
#include <vector>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
@@ -43,9 +44,11 @@ TEST(Updater, Refresh) {
tree.Stat(cleft).base_weight = 1.2;
tree.Stat(cright).base_weight = 1.3;
refresher->Configure(cfg);
std::vector<HostDeviceVector<bst_node_t>> position;
refresher->Update(&gpair, p_dmat.get(), position, trees);
tree::TrainParam param;
param.UpdateAllowUnknown(cfg);
refresher->Update(&param, &gpair, p_dmat.get(), position, trees);
bst_float constexpr kEps = 1e-6;
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);

View File

@@ -1,7 +1,11 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
@@ -21,6 +25,9 @@ class UpdaterTreeStatTest : public ::testing::Test {
}
void RunTest(std::string updater) {
tree::TrainParam param;
param.Init(Args{});
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
auto up = std::unique_ptr<TreeUpdater>{
@@ -29,7 +36,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
RegTree tree;
tree.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up->Update(&gpairs_, p_dmat_.get(), position, {&tree});
up->Update(&param, &gpairs_, p_dmat_.get(), position, {&tree});
tree.WalkTree([&tree](bst_node_t nidx) {
if (tree[nidx].IsLeaf()) {
@@ -69,28 +76,33 @@ class UpdaterEtaTest : public ::testing::Test {
void RunTest(std::string updater) {
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
float eta = 0.4;
auto up_0 = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
up_0->Configure(Args{{"eta", std::to_string(eta)}});
up_0->Configure(Args{});
tree::TrainParam param0;
param0.Init(Args{{"eta", std::to_string(eta)}});
auto up_1 = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
up_1->Configure(Args{{"eta", "1.0"}});
tree::TrainParam param1;
param1.Init(Args{{"eta", "1.0"}});
for (size_t iter = 0; iter < 4; ++iter) {
RegTree tree_0;
{
tree_0.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up_0->Update(&gpairs_, p_dmat_.get(), position, {&tree_0});
up_0->Update(&param0, &gpairs_, p_dmat_.get(), position, {&tree_0});
}
RegTree tree_1;
{
tree_1.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up_1->Update(&gpairs_, p_dmat_.get(), position, {&tree_1});
up_1->Update(&param1, &gpairs_, p_dmat_.get(), position, {&tree_1});
}
tree_0.WalkTree([&](bst_node_t nidx) {
if (tree_0[nidx].IsLeaf()) {
@@ -139,17 +151,18 @@ class TestMinSplitLoss : public ::testing::Test {
// test gamma
{"gamma", std::to_string(gamma)}};
tree::TrainParam param;
param.UpdateAllowUnknown(args);
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
std::cout << ctx.gpu_id << std::endl;
auto up = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
up->Configure(args);
up->Configure({});
RegTree tree;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up->Update(&gpair_, dmat_.get(), position, {&tree});
up->Update(&param, &gpair_, dmat_.get(), position, {&tree});
auto n_nodes = tree.NumExtraNodes();
return n_nodes;