initial merge
This commit is contained in:
@@ -304,7 +304,7 @@ void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
auto feature_histogram = ConvertToInteger({ {-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(),
|
||||
FeatureType::kCategorical);
|
||||
|
||||
@@ -1,18 +1,27 @@
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
|
||||
#include "../../../../src/common/hist_util.h"
|
||||
#include "../../../../src/tree/common_row_partitioner.h"
|
||||
#include "../../../../src/tree/hist/evaluate_splits.h"
|
||||
#include "../test_evaluate_splits.h"
|
||||
#include "../../helpers.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for GradientPairPrecise, Args, Gradie...
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for FeatureType, DMatrix, MetaInfo
|
||||
#include <xgboost/logging.h> // for CHECK_EQ
|
||||
#include <xgboost/tree_model.h> // for RegTree, RTreeNodeStat
|
||||
|
||||
#include <memory> // for make_shared, shared_ptr, addressof
|
||||
|
||||
#include "../../../../src/common/hist_util.h" // for HistCollection, HistogramCuts
|
||||
#include "../../../../src/common/random.h" // for ColumnSampler
|
||||
#include "../../../../src/common/row_set.h" // for RowSetCollection
|
||||
#include "../../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../../src/tree/hist/evaluate_splits.h" // for HistEvaluator
|
||||
#include "../../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
|
||||
#include "../../../../src/tree/param.h" // for GradStats, TrainParam
|
||||
#include "../../helpers.h" // for RandomDataGenerator, AllThreadsFo...
|
||||
|
||||
namespace xgboost::tree {
|
||||
void TestEvaluateSplits(bool force_read_by_column) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
@@ -87,6 +96,68 @@ TEST(HistEvaluator, Evaluate) {
|
||||
TestEvaluateSplits(true);
|
||||
}
|
||||
|
||||
TEST(HistMultiEvaluator, Evaluate) {
|
||||
Context ctx;
|
||||
ctx.nthread = 1;
|
||||
|
||||
TrainParam param;
|
||||
param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
|
||||
std::size_t n_samples = 3;
|
||||
bst_feature_t n_features = 2;
|
||||
bst_target_t n_targets = 2;
|
||||
bst_bin_t n_bins = 2;
|
||||
|
||||
auto p_fmat =
|
||||
RandomDataGenerator{n_samples, n_features, 0.5}.Targets(n_targets).GenerateDMatrix(true);
|
||||
|
||||
HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), ¶m, sampler};
|
||||
std::vector<common::HistCollection> histogram(n_targets);
|
||||
linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto &hist = histogram[t];
|
||||
hist.Init(n_bins * n_features);
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
auto node_hist = hist[0];
|
||||
node_hist[0] = {-0.5, 0.5};
|
||||
node_hist[1] = {2.0, 0.5};
|
||||
node_hist[2] = {0.5, 0.5};
|
||||
node_hist[3] = {1.0, 0.5};
|
||||
|
||||
root_sum(t) += node_hist[0];
|
||||
root_sum(t) += node_hist[1];
|
||||
}
|
||||
|
||||
RegTree tree{n_targets, n_features};
|
||||
auto weight = evaluator.InitRoot(root_sum.HostView());
|
||||
tree.SetLeaf(RegTree::kRoot, weight.HostView());
|
||||
auto w = weight.HostView();
|
||||
ASSERT_EQ(w.Size(), n_targets);
|
||||
ASSERT_EQ(w(0), -1.5);
|
||||
ASSERT_EQ(w(1), -1.5);
|
||||
|
||||
common::HistogramCuts cuts;
|
||||
cuts.cut_ptrs_ = {0, 2, 4};
|
||||
cuts.cut_values_ = {0.5, 1.0, 2.0, 3.0};
|
||||
cuts.min_vals_ = {-0.2, 1.8};
|
||||
|
||||
std::vector<MultiExpandEntry> entries(1, {/*nidx=*/0, /*depth=*/0});
|
||||
|
||||
std::vector<common::HistCollection const *> ptrs;
|
||||
std::transform(histogram.cbegin(), histogram.cend(), std::back_inserter(ptrs),
|
||||
[](auto const &h) { return std::addressof(h); });
|
||||
|
||||
evaluator.EvaluateSplits(tree, ptrs, cuts, &entries);
|
||||
|
||||
ASSERT_EQ(entries.front().split.loss_chg, 12.5);
|
||||
ASSERT_EQ(entries.front().split.split_value, 0.5);
|
||||
ASSERT_EQ(entries.front().split.SplitIndex(), 0);
|
||||
|
||||
ASSERT_EQ(sampler->GetFeatureSet(0)->Size(), n_features);
|
||||
}
|
||||
|
||||
TEST(HistEvaluator, Apply) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
@@ -98,7 +169,8 @@ TEST(HistEvaluator, Apply) {
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
|
||||
CPUExpandEntry entry{0, 0, 10.0f};
|
||||
CPUExpandEntry entry{0, 0};
|
||||
entry.split.loss_chg = 10.0f;
|
||||
entry.split.left_sum = GradStats{0.4, 0.6f};
|
||||
entry.split.right_sum = GradStats{0.5, 0.5f};
|
||||
|
||||
@@ -210,12 +282,11 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
RegTree tree;
|
||||
evaluator.EvaluateSplits(hist, cuts_, info.feature_types.ConstHostSpan(), tree, &entries);
|
||||
auto const& split = entries.front().split;
|
||||
auto const &split = entries.front().split;
|
||||
|
||||
this->CheckResult(split.loss_chg, split.SplitIndex(), split.split_value, split.is_cat,
|
||||
split.DefaultLeft(),
|
||||
GradientPairPrecise{split.left_sum.GetGrad(), split.left_sum.GetHess()},
|
||||
GradientPairPrecise{split.right_sum.GetGrad(), split.right_sum.GetHess()});
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -41,10 +41,10 @@ void TestAddHistRows(bool is_distributed) {
|
||||
tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
|
||||
nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4));
|
||||
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5));
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
|
||||
|
||||
HistogramBuilder<CPUExpandEntry> histogram_builder;
|
||||
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
|
||||
@@ -98,7 +98,7 @@ void TestSyncHist(bool is_distributed) {
|
||||
}
|
||||
|
||||
// level 0
|
||||
nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0));
|
||||
histogram.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_, &tree);
|
||||
@@ -108,10 +108,8 @@ void TestSyncHist(bool is_distributed) {
|
||||
nodes_for_subtraction_trick_.clear();
|
||||
|
||||
// level 1
|
||||
nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(),
|
||||
tree.GetDepth(1), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(),
|
||||
tree.GetDepth(2), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(), tree.GetDepth(1));
|
||||
nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(), tree.GetDepth(2));
|
||||
|
||||
histogram.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
@@ -123,10 +121,10 @@ void TestSyncHist(bool is_distributed) {
|
||||
nodes_for_explicit_hist_build_.clear();
|
||||
nodes_for_subtraction_trick_.clear();
|
||||
// level 2
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
|
||||
nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4));
|
||||
nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5));
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
|
||||
|
||||
histogram.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
@@ -256,7 +254,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
|
||||
std::iota(row_indices.begin(), row_indices.end(), 0);
|
||||
row_set_collection.Init();
|
||||
|
||||
CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
|
||||
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
|
||||
nodes_for_explicit_hist_build.push_back(node);
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
|
||||
@@ -330,7 +328,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
|
||||
|
||||
RegTree tree;
|
||||
CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
|
||||
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
|
||||
nodes_for_explicit_hist_build.push_back(node);
|
||||
|
||||
@@ -403,7 +401,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
|
||||
RegTree tree;
|
||||
std::vector<CPUExpandEntry> nodes;
|
||||
nodes.emplace_back(0, tree.GetDepth(0), 0.0f);
|
||||
nodes.emplace_back(0, tree.GetDepth(0));
|
||||
|
||||
common::GHistRow multi_page;
|
||||
HistogramBuilder<CPUExpandEntry> multi_build;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2021-2022, XGBoost contributors.
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost contributors.
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace {
|
||||
std::vector<float> GenerateHess(size_t n_samples) {
|
||||
auto grad = GenerateRandomGradients(n_samples);
|
||||
@@ -32,7 +31,8 @@ TEST(Approx, Partitioner) {
|
||||
|
||||
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
auto hess = GenerateHess(n_samples);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
@@ -79,7 +79,9 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
|
||||
CommonRowPartitioner const& expected_mid_partitioner) {
|
||||
auto dmat =
|
||||
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
|
||||
@@ -124,7 +126,8 @@ TEST(Approx, PartitionerColSplit) {
|
||||
size_t n_samples = 1024, n_features = 16, base_rowid = 0;
|
||||
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
auto hess = GenerateHess(n_samples);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
float min_value, mid_value;
|
||||
Context ctx;
|
||||
@@ -145,77 +148,5 @@ TEST(Approx, PartitionerColSplit) {
|
||||
RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
|
||||
&hess, min_value, mid_value, mid_partitioner);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestLeafPartition(size_t n_samples) {
|
||||
size_t const n_features = 2, base_rowid = 0;
|
||||
Context ctx;
|
||||
common::RowSetCollection row_set;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
RegTree tree;
|
||||
std::vector<float> hess(n_samples, 0);
|
||||
// emulate sampling
|
||||
auto not_sampled = [](size_t i) {
|
||||
size_t const kSampleFactor{3};
|
||||
return i % kSampleFactor != 0;
|
||||
};
|
||||
for (size_t i = 0; i < hess.size(); ++i) {
|
||||
if (not_sampled(i)) {
|
||||
hess[i] = 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> h_nptr;
|
||||
float split_value{0};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
split_value = page.cut.Values().at(ptr / 2);
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
std::vector<bst_node_t> position;
|
||||
partitioner.LeafPartition(&ctx, tree, hess, &position);
|
||||
std::sort(position.begin(), position.end());
|
||||
size_t beg = std::distance(
|
||||
position.begin(),
|
||||
std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
|
||||
std::vector<size_t> nptr;
|
||||
common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
|
||||
std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
|
||||
auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
|
||||
ASSERT_EQ(nptr.size(), n_uniques + 1);
|
||||
ASSERT_EQ(nptr[0], beg);
|
||||
ASSERT_EQ(nptr.back(), n_samples);
|
||||
|
||||
h_nptr = nptr;
|
||||
}
|
||||
|
||||
if (h_nptr.front() == n_samples) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_GE(h_nptr.size(), 2);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
size_t left{0};
|
||||
for (size_t i = 0; i < batch.Size(); ++i) {
|
||||
if (not_sampled(i) && batch[i].front().fvalue < split_value) {
|
||||
left++;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Approx, LeafPartition) {
|
||||
for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
|
||||
TestLeafPartition(n_samples);
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
93
tests/cpp/tree/test_common_partitioner.cc
Normal file
93
tests/cpp/tree/test_common_partitioner.cc
Normal file
@@ -0,0 +1,93 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost contributors.
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for bst_node_t
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
#include <algorithm> // for transform
|
||||
#include <iterator> // for distance
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/numeric.h" // for ==RunLengthEncode
|
||||
#include "../../../src/common/row_set.h" // for RowSetCollection
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "test_partitioner.h" // for GetSplit
|
||||
|
||||
namespace xgboost::tree {
|
||||
namespace {
|
||||
void TestLeafPartition(size_t n_samples) {
|
||||
size_t const n_features = 2, base_rowid = 0;
|
||||
Context ctx;
|
||||
common::RowSetCollection row_set;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
RegTree tree;
|
||||
std::vector<float> hess(n_samples, 0);
|
||||
// emulate sampling
|
||||
auto not_sampled = [](size_t i) {
|
||||
size_t const kSampleFactor{3};
|
||||
return i % kSampleFactor != 0;
|
||||
};
|
||||
for (size_t i = 0; i < hess.size(); ++i) {
|
||||
if (not_sampled(i)) {
|
||||
hess[i] = 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> h_nptr;
|
||||
float split_value{0};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
split_value = page.cut.Values().at(ptr / 2);
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
std::vector<bst_node_t> position;
|
||||
partitioner.LeafPartition(&ctx, tree, hess, &position);
|
||||
std::sort(position.begin(), position.end());
|
||||
size_t beg = std::distance(
|
||||
position.begin(),
|
||||
std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
|
||||
std::vector<size_t> nptr;
|
||||
common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
|
||||
std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
|
||||
auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
|
||||
ASSERT_EQ(nptr.size(), n_uniques + 1);
|
||||
ASSERT_EQ(nptr[0], beg);
|
||||
ASSERT_EQ(nptr.back(), n_samples);
|
||||
|
||||
h_nptr = nptr;
|
||||
}
|
||||
|
||||
if (h_nptr.front() == n_samples) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_GE(h_nptr.size(), 2);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
size_t left{0};
|
||||
for (size_t i = 0; i < batch.Size(); ++i) {
|
||||
if (not_sampled(i) && batch[i].front().fvalue < split_value) {
|
||||
left++;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(CommonRowPartitioner, LeafPartition) {
|
||||
for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
|
||||
TestLeafPartition(n_samples);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
@@ -2,15 +2,26 @@
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/base.h> // for GradientPairInternal, GradientPairPrecise
|
||||
#include <xgboost/data.h> // for MetaInfo
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/span.h> // for operator!=, Span, SpanIterator
|
||||
|
||||
#include <algorithm> // next_permutation
|
||||
#include <numeric> // iota
|
||||
#include <algorithm> // for max, max_element, next_permutation, copy
|
||||
#include <cmath> // for isnan
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t, uint64_t, uint32_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <numeric> // for iota
|
||||
#include <tuple> // for make_tuple, tie, tuple
|
||||
#include <utility> // for pair
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/hist_util.h" // HistogramCuts,HistCollection
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../../../src/tree/split_evaluator.h"
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/hist_util.h" // for HistogramCuts, HistCollection, GHistRow
|
||||
#include "../../../src/tree/param.h" // for TrainParam, GradStats
|
||||
#include "../../../src/tree/split_evaluator.h" // for TreeEvaluator
|
||||
#include "../helpers.h" // for SimpleLCG, SimpleRealUniformDistribution
|
||||
#include "gtest/gtest_pred_impl.h" // for AssertionResult, ASSERT_EQ, ASSERT_TRUE
|
||||
|
||||
namespace xgboost::tree {
|
||||
/**
|
||||
|
||||
@@ -21,7 +21,8 @@ void TestFitStump(Context const *ctx) {
|
||||
}
|
||||
}
|
||||
linalg::Vector<float> out;
|
||||
FitStump(ctx, gpair, kTargets, &out);
|
||||
MetaInfo info;
|
||||
FitStump(ctx, info, gpair, kTargets, &out);
|
||||
auto h_out = out.HostView();
|
||||
for (auto it = linalg::cbegin(h_out); it != linalg::cend(h_out); ++it) {
|
||||
// sum_hess == kRows
|
||||
|
||||
@@ -40,8 +40,7 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
{
|
||||
// With constraints
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
RegTree tree{1, kCols};
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
TrainParam param;
|
||||
@@ -58,8 +57,7 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
}
|
||||
{
|
||||
// Without constraints
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
RegTree tree{1u, kCols};
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
@@ -76,7 +74,7 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
|
||||
void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||
auto p_dmat = GenerateDMatrix(rows, cols);
|
||||
auto p_gradients = GenerateGradients(rows);
|
||||
Context ctx;
|
||||
@@ -87,8 +85,7 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
|
||||
std::unique_ptr<DMatrix> sliced{
|
||||
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
|
||||
RegTree tree;
|
||||
tree.param.num_feature = cols;
|
||||
RegTree tree{1u, cols};
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
@@ -107,8 +104,7 @@ TEST(GrowHistMaker, ColumnSplit) {
|
||||
auto constexpr kRows = 32;
|
||||
auto constexpr kCols = 16;
|
||||
|
||||
RegTree expected_tree;
|
||||
expected_tree.param.num_feature = kCols;
|
||||
RegTree expected_tree{1u, kCols};
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
{
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols);
|
||||
|
||||
@@ -17,8 +17,8 @@ TEST(MultiTargetTree, JsonIO) {
|
||||
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
|
||||
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
|
||||
left_weight.HostView(), right_weight.HostView());
|
||||
ASSERT_EQ(tree.param.num_nodes, 3);
|
||||
ASSERT_EQ(tree.param.size_leaf_vector, 3);
|
||||
ASSERT_EQ(tree.NumNodes(), 3);
|
||||
ASSERT_EQ(tree.NumTargets(), 3);
|
||||
ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3);
|
||||
ASSERT_EQ(tree.Size(), 3);
|
||||
|
||||
@@ -26,20 +26,19 @@ TEST(MultiTargetTree, JsonIO) {
|
||||
tree.SaveModel(&jtree);
|
||||
|
||||
auto check_jtree = [](Json jtree, RegTree const& tree) {
|
||||
ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]),
|
||||
std::to_string(tree.param.num_nodes));
|
||||
ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]), std::to_string(tree.NumNodes()));
|
||||
ASSERT_EQ(get<F32Array const>(jtree["base_weights"]).size(),
|
||||
tree.param.num_nodes * tree.param.size_leaf_vector);
|
||||
ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.param.num_nodes);
|
||||
ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.param.num_nodes);
|
||||
ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.param.num_nodes);
|
||||
tree.NumNodes() * tree.NumTargets());
|
||||
ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.NumNodes());
|
||||
ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.NumNodes());
|
||||
ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.NumNodes());
|
||||
};
|
||||
check_jtree(jtree, tree);
|
||||
|
||||
RegTree loaded;
|
||||
loaded.LoadModel(jtree);
|
||||
ASSERT_TRUE(loaded.IsMultiTarget());
|
||||
ASSERT_EQ(loaded.param.num_nodes, 3);
|
||||
ASSERT_EQ(loaded.NumNodes(), 3);
|
||||
|
||||
Json jtree1{Object{}};
|
||||
loaded.SaveModel(&jtree1);
|
||||
|
||||
@@ -1,17 +1,20 @@
|
||||
/*!
|
||||
* Copyright 2021-2022, XGBoost contributors.
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost contributors.
|
||||
*/
|
||||
#ifndef XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
|
||||
#define XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/linalg.h> // for Constant, Vector
|
||||
#include <xgboost/logging.h> // for CHECK
|
||||
#include <xgboost/tree_model.h> // for RegTree
|
||||
|
||||
#include <vector>
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/tree/hist/expand_entry.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry, MultiExpandEntry
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntry> *candidates) {
|
||||
CHECK(!tree->IsMultiTarget());
|
||||
tree->ExpandNode(
|
||||
/*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
|
||||
/*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
||||
@@ -21,6 +24,22 @@ inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntr
|
||||
candidates->front().split.sindex = 0;
|
||||
candidates->front().split.sindex |= (1U << 31);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
inline void GetMultiSplitForTest(RegTree *tree, float split_value,
|
||||
std::vector<MultiExpandEntry> *candidates) {
|
||||
CHECK(tree->IsMultiTarget());
|
||||
auto n_targets = tree->NumTargets();
|
||||
Context ctx;
|
||||
linalg::Vector<float> base_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
|
||||
linalg::Vector<float> left_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
|
||||
linalg::Vector<float> right_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
|
||||
|
||||
tree->ExpandNode(/*nidx=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
|
||||
/*default_left=*/true, base_weight.HostView(), left_weight.HostView(),
|
||||
right_weight.HostView());
|
||||
candidates->front().split.split_value = split_value;
|
||||
candidates->front().split.sindex = 0;
|
||||
candidates->front().split.sindex |= (1U << 31);
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
#endif // XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
|
||||
|
||||
@@ -32,8 +32,7 @@ TEST(Updater, Prune) {
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
// prepare tree
|
||||
RegTree tree = RegTree();
|
||||
tree.param.UpdateAllowUnknown(cfg);
|
||||
RegTree tree = RegTree{1u, kCols};
|
||||
std::vector<RegTree*> trees {&tree};
|
||||
// prepare pruner
|
||||
TrainParam param;
|
||||
|
||||
@@ -1,25 +1,29 @@
|
||||
/*!
|
||||
* Copyright 2018-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef> // for size_t
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
|
||||
#include "../../../src/tree/param.h"
|
||||
#include "../../../src/tree/split_evaluator.h"
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../helpers.h"
|
||||
#include "test_partitioner.h"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
TEST(QuantileHist, Partitioner) {
|
||||
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
|
||||
namespace xgboost::tree {
|
||||
template <typename ExpandEntry>
|
||||
void TestPartitioner(bst_target_t n_targets) {
|
||||
std::size_t n_samples = 1024, base_rowid = 0;
|
||||
bst_feature_t n_features = 1;
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
|
||||
@@ -29,7 +33,8 @@ TEST(QuantileHist, Partitioner) {
|
||||
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
std::vector<ExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
|
||||
|
||||
@@ -40,9 +45,13 @@ TEST(QuantileHist, Partitioner) {
|
||||
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
|
||||
{
|
||||
auto min_value = gmat.cut.MinValues()[split_ind];
|
||||
RegTree tree;
|
||||
RegTree tree{n_targets, n_features};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, min_value, &candidates);
|
||||
}
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
ASSERT_EQ(partitioner.Size(), 3);
|
||||
ASSERT_EQ(partitioner[1].Size(), 0);
|
||||
@@ -52,9 +61,13 @@ TEST(QuantileHist, Partitioner) {
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
|
||||
float split_value = gmat.cut.Values().at(ptr / 2);
|
||||
RegTree tree;
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
auto left_nidx = tree[RegTree::kRoot].LeftChild();
|
||||
RegTree tree{n_targets, n_features};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, split_value, &candidates);
|
||||
}
|
||||
auto left_nidx = tree.LeftChild(RegTree::kRoot);
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
|
||||
auto elem = partitioner[left_nidx];
|
||||
@@ -64,14 +77,17 @@ TEST(QuantileHist, Partitioner) {
|
||||
auto value = gmat.cut.Values().at(gmat.index[*it]);
|
||||
ASSERT_LE(value, split_value);
|
||||
}
|
||||
auto right_nidx = tree[RegTree::kRoot].RightChild();
|
||||
auto right_nidx = tree.RightChild(RegTree::kRoot);
|
||||
elem = partitioner[right_nidx];
|
||||
for (auto it = elem.begin; it != elem.end; ++it) {
|
||||
auto value = gmat.cut.Values().at(gmat.index[*it]);
|
||||
ASSERT_GT(value, split_value) << *it;
|
||||
ASSERT_GT(value, split_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(QuantileHist, Partitioner) { TestPartitioner<CPUExpandEntry>(1); }
|
||||
|
||||
TEST(QuantileHist, MultiPartitioner) { TestPartitioner<MultiExpandEntry>(3); }
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -28,9 +28,8 @@ TEST(Updater, Refresh) {
|
||||
{"num_feature", std::to_string(kCols)},
|
||||
{"reg_lambda", "1"}};
|
||||
|
||||
RegTree tree = RegTree();
|
||||
RegTree tree = RegTree{1u, kCols};
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
tree.param.UpdateAllowUnknown(cfg);
|
||||
std::vector<RegTree*> trees{&tree};
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
|
||||
@@ -11,9 +11,8 @@
|
||||
namespace xgboost {
|
||||
TEST(Tree, ModelShape) {
|
||||
bst_feature_t n_features = std::numeric_limits<uint32_t>::max();
|
||||
RegTree tree;
|
||||
tree.param.UpdateAllowUnknown(Args{{"num_feature", std::to_string(n_features)}});
|
||||
ASSERT_EQ(tree.param.num_feature, n_features);
|
||||
RegTree tree{1u, n_features};
|
||||
ASSERT_EQ(tree.NumFeatures(), n_features);
|
||||
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/tree.model";
|
||||
@@ -27,7 +26,7 @@ TEST(Tree, ModelShape) {
|
||||
RegTree new_tree;
|
||||
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(tmp_file.c_str(), "r"));
|
||||
new_tree.Load(fi.get());
|
||||
ASSERT_EQ(new_tree.param.num_feature, n_features);
|
||||
ASSERT_EQ(new_tree.NumFeatures(), n_features);
|
||||
}
|
||||
{
|
||||
// json
|
||||
@@ -39,7 +38,7 @@ TEST(Tree, ModelShape) {
|
||||
|
||||
auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()});
|
||||
new_tree.LoadModel(j_loaded);
|
||||
ASSERT_EQ(new_tree.param.num_feature, n_features);
|
||||
ASSERT_EQ(new_tree.NumFeatures(), n_features);
|
||||
}
|
||||
{
|
||||
// ubjson
|
||||
@@ -51,7 +50,7 @@ TEST(Tree, ModelShape) {
|
||||
|
||||
auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()}, std::ios::binary);
|
||||
new_tree.LoadModel(j_loaded);
|
||||
ASSERT_EQ(new_tree.param.num_feature, n_features);
|
||||
ASSERT_EQ(new_tree.NumFeatures(), n_features);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -488,8 +487,7 @@ TEST(Tree, JsonIO) {
|
||||
|
||||
RegTree loaded_tree;
|
||||
loaded_tree.LoadModel(j_tree);
|
||||
ASSERT_EQ(loaded_tree.param.num_nodes, 3);
|
||||
|
||||
ASSERT_EQ(loaded_tree.NumNodes(), 3);
|
||||
ASSERT_TRUE(loaded_tree == tree);
|
||||
|
||||
auto left = tree[0].LeftChild();
|
||||
|
||||
@@ -37,8 +37,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
up->Configure(Args{});
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
RegTree tree{1u, kCols};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up->Update(¶m, &gpairs_, p_dmat_.get(), position, {&tree});
|
||||
|
||||
@@ -95,16 +94,14 @@ class UpdaterEtaTest : public ::testing::Test {
|
||||
param1.Init(Args{{"eta", "1.0"}});
|
||||
|
||||
for (size_t iter = 0; iter < 4; ++iter) {
|
||||
RegTree tree_0;
|
||||
RegTree tree_0{1u, kCols};
|
||||
{
|
||||
tree_0.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_0->Update(¶m0, &gpairs_, p_dmat_.get(), position, {&tree_0});
|
||||
}
|
||||
|
||||
RegTree tree_1;
|
||||
RegTree tree_1{1u, kCols};
|
||||
{
|
||||
tree_1.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_1->Update(¶m1, &gpairs_, p_dmat_.get(), position, {&tree_1});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user