Define multi expand entry. (#8895)

This commit is contained in:
Jiaming Yuan
2023-03-13 19:31:05 +08:00
committed by GitHub
parent bbee355b45
commit 5ba3509dd3
7 changed files with 125 additions and 60 deletions

View File

@@ -98,7 +98,8 @@ TEST(HistEvaluator, Apply) {
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
CPUExpandEntry entry{0, 0, 10.0f};
CPUExpandEntry entry{0, 0};
entry.split.loss_chg = 10.0f;
entry.split.left_sum = GradStats{0.4, 0.6f};
entry.split.right_sum = GradStats{0.5, 0.5f};

View File

@@ -41,10 +41,10 @@ void TestAddHistRows(bool is_distributed) {
tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4), 0.0f);
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5), 0.0f);
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4));
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5));
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
HistogramBuilder<CPUExpandEntry> histogram_builder;
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
@@ -98,7 +98,7 @@ void TestSyncHist(bool is_distributed) {
}
// level 0
nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0));
histogram.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
nodes_for_subtraction_trick_, &tree);
@@ -108,10 +108,8 @@ void TestSyncHist(bool is_distributed) {
nodes_for_subtraction_trick_.clear();
// level 1
nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(),
tree.GetDepth(1), 0.0f);
nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(),
tree.GetDepth(2), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(), tree.GetDepth(1));
nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(), tree.GetDepth(2));
histogram.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
@@ -123,10 +121,10 @@ void TestSyncHist(bool is_distributed) {
nodes_for_explicit_hist_build_.clear();
nodes_for_subtraction_trick_.clear();
// level 2
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5), 0.0f);
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4));
nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5));
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
histogram.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
@@ -256,7 +254,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
std::iota(row_indices.begin(), row_indices.end(), 0);
row_set_collection.Init();
CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
nodes_for_explicit_hist_build.push_back(node);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
@@ -330,7 +328,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
RegTree tree;
CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
nodes_for_explicit_hist_build.push_back(node);
@@ -403,7 +401,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
RegTree tree;
std::vector<CPUExpandEntry> nodes;
nodes.emplace_back(0, tree.GetDepth(0), 0.0f);
nodes.emplace_back(0, tree.GetDepth(0));
common::GHistRow multi_page;
HistogramBuilder<CPUExpandEntry> multi_build;

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021-2022, XGBoost contributors.
/**
* Copyright 2021-2023 by XGBoost contributors.
*/
#include <gtest/gtest.h>
@@ -10,7 +10,6 @@
namespace xgboost {
namespace tree {
namespace {
std::vector<float> GenerateHess(size_t n_samples) {
auto grad = GenerateRandomGradients(n_samples);
@@ -32,7 +31,8 @@ TEST(Approx, Partitioner) {
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
bst_feature_t const split_ind = 0;
@@ -79,7 +79,9 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
CommonRowPartitioner const& expected_mid_partitioner) {
auto dmat =
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
Context ctx;
ctx.InitAllowUnknown(Args{});
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
@@ -124,7 +126,8 @@ TEST(Approx, PartitionerColSplit) {
size_t n_samples = 1024, n_features = 16, base_rowid = 0;
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
float min_value, mid_value;
Context ctx;
@@ -154,7 +157,8 @@ void TestLeafPartition(size_t n_samples) {
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
RegTree tree;
std::vector<float> hess(n_samples, 0);
// emulate sampling

View File

@@ -29,7 +29,8 @@ TEST(QuantileHist, Partitioner) {
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());