Partitioner for multi-target tree. (#8922)
This commit is contained in:
@@ -1,15 +1,17 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/row_set.h"
|
||||
#include "../../../src/common/partition_builder.h"
|
||||
#include "../../../src/common/row_set.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(PartitionBuilder, BasicTest) {
|
||||
constexpr size_t kBlockSize = 16;
|
||||
constexpr size_t kNodes = 5;
|
||||
@@ -74,6 +76,4 @@ TEST(PartitionBuilder, BasicTest) {
|
||||
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -148,78 +148,5 @@ TEST(Approx, PartitionerColSplit) {
|
||||
RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
|
||||
&hess, min_value, mid_value, mid_partitioner);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestLeafPartition(size_t n_samples) {
|
||||
size_t const n_features = 2, base_rowid = 0;
|
||||
Context ctx;
|
||||
common::RowSetCollection row_set;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
RegTree tree;
|
||||
std::vector<float> hess(n_samples, 0);
|
||||
// emulate sampling
|
||||
auto not_sampled = [](size_t i) {
|
||||
size_t const kSampleFactor{3};
|
||||
return i % kSampleFactor != 0;
|
||||
};
|
||||
for (size_t i = 0; i < hess.size(); ++i) {
|
||||
if (not_sampled(i)) {
|
||||
hess[i] = 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> h_nptr;
|
||||
float split_value{0};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
split_value = page.cut.Values().at(ptr / 2);
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
std::vector<bst_node_t> position;
|
||||
partitioner.LeafPartition(&ctx, tree, hess, &position);
|
||||
std::sort(position.begin(), position.end());
|
||||
size_t beg = std::distance(
|
||||
position.begin(),
|
||||
std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
|
||||
std::vector<size_t> nptr;
|
||||
common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
|
||||
std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
|
||||
auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
|
||||
ASSERT_EQ(nptr.size(), n_uniques + 1);
|
||||
ASSERT_EQ(nptr[0], beg);
|
||||
ASSERT_EQ(nptr.back(), n_samples);
|
||||
|
||||
h_nptr = nptr;
|
||||
}
|
||||
|
||||
if (h_nptr.front() == n_samples) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_GE(h_nptr.size(), 2);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
size_t left{0};
|
||||
for (size_t i = 0; i < batch.Size(); ++i) {
|
||||
if (not_sampled(i) && batch[i].front().fvalue < split_value) {
|
||||
left++;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Approx, LeafPartition) {
|
||||
for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
|
||||
TestLeafPartition(n_samples);
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
93
tests/cpp/tree/test_common_partitioner.cc
Normal file
93
tests/cpp/tree/test_common_partitioner.cc
Normal file
@@ -0,0 +1,93 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost contributors.
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for bst_node_t
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
#include <algorithm> // for transform
|
||||
#include <iterator> // for distance
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/numeric.h" // for ==RunLengthEncode
|
||||
#include "../../../src/common/row_set.h" // for RowSetCollection
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "test_partitioner.h" // for GetSplit
|
||||
|
||||
namespace xgboost::tree {
|
||||
namespace {
|
||||
void TestLeafPartition(size_t n_samples) {
|
||||
size_t const n_features = 2, base_rowid = 0;
|
||||
Context ctx;
|
||||
common::RowSetCollection row_set;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
RegTree tree;
|
||||
std::vector<float> hess(n_samples, 0);
|
||||
// emulate sampling
|
||||
auto not_sampled = [](size_t i) {
|
||||
size_t const kSampleFactor{3};
|
||||
return i % kSampleFactor != 0;
|
||||
};
|
||||
for (size_t i = 0; i < hess.size(); ++i) {
|
||||
if (not_sampled(i)) {
|
||||
hess[i] = 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> h_nptr;
|
||||
float split_value{0};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
split_value = page.cut.Values().at(ptr / 2);
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
std::vector<bst_node_t> position;
|
||||
partitioner.LeafPartition(&ctx, tree, hess, &position);
|
||||
std::sort(position.begin(), position.end());
|
||||
size_t beg = std::distance(
|
||||
position.begin(),
|
||||
std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
|
||||
std::vector<size_t> nptr;
|
||||
common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
|
||||
std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
|
||||
auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
|
||||
ASSERT_EQ(nptr.size(), n_uniques + 1);
|
||||
ASSERT_EQ(nptr[0], beg);
|
||||
ASSERT_EQ(nptr.back(), n_samples);
|
||||
|
||||
h_nptr = nptr;
|
||||
}
|
||||
|
||||
if (h_nptr.front() == n_samples) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_GE(h_nptr.size(), 2);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
size_t left{0};
|
||||
for (size_t i = 0; i < batch.Size(); ++i) {
|
||||
if (not_sampled(i) && batch[i].front().fvalue < split_value) {
|
||||
left++;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(CommonRowPartitioner, LeafPartition) {
|
||||
for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
|
||||
TestLeafPartition(n_samples);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
@@ -1,17 +1,20 @@
|
||||
/*!
|
||||
* Copyright 2021-2022, XGBoost contributors.
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost contributors.
|
||||
*/
|
||||
#ifndef XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
|
||||
#define XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/linalg.h> // for Constant, Vector
|
||||
#include <xgboost/logging.h> // for CHECK
|
||||
#include <xgboost/tree_model.h> // for RegTree
|
||||
|
||||
#include <vector>
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/tree/hist/expand_entry.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry, MultiExpandEntry
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntry> *candidates) {
|
||||
CHECK(!tree->IsMultiTarget());
|
||||
tree->ExpandNode(
|
||||
/*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
|
||||
/*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
||||
@@ -21,6 +24,22 @@ inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntr
|
||||
candidates->front().split.sindex = 0;
|
||||
candidates->front().split.sindex |= (1U << 31);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
inline void GetMultiSplitForTest(RegTree *tree, float split_value,
|
||||
std::vector<MultiExpandEntry> *candidates) {
|
||||
CHECK(tree->IsMultiTarget());
|
||||
auto n_targets = tree->NumTargets();
|
||||
Context ctx;
|
||||
linalg::Vector<float> base_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
|
||||
linalg::Vector<float> left_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
|
||||
linalg::Vector<float> right_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
|
||||
|
||||
tree->ExpandNode(/*nidx=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
|
||||
/*default_left=*/true, base_weight.HostView(), left_weight.HostView(),
|
||||
right_weight.HostView());
|
||||
candidates->front().split.split_value = split_value;
|
||||
candidates->front().split.sindex = 0;
|
||||
candidates->front().split.sindex |= (1U << 31);
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
#endif // XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
|
||||
|
||||
@@ -1,25 +1,29 @@
|
||||
/*!
|
||||
* Copyright 2018-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef> // for size_t
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
|
||||
#include "../../../src/tree/param.h"
|
||||
#include "../../../src/tree/split_evaluator.h"
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../helpers.h"
|
||||
#include "test_partitioner.h"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
TEST(QuantileHist, Partitioner) {
|
||||
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
|
||||
namespace xgboost::tree {
|
||||
template <typename ExpandEntry>
|
||||
void TestPartitioner(bst_target_t n_targets) {
|
||||
std::size_t n_samples = 1024, base_rowid = 0;
|
||||
bst_feature_t n_features = 1;
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
|
||||
@@ -29,7 +33,7 @@ TEST(QuantileHist, Partitioner) {
|
||||
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
std::vector<ExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
|
||||
@@ -41,9 +45,13 @@ TEST(QuantileHist, Partitioner) {
|
||||
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
|
||||
{
|
||||
auto min_value = gmat.cut.MinValues()[split_ind];
|
||||
RegTree tree;
|
||||
RegTree tree{n_targets, n_features};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, min_value, &candidates);
|
||||
}
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
ASSERT_EQ(partitioner.Size(), 3);
|
||||
ASSERT_EQ(partitioner[1].Size(), 0);
|
||||
@@ -53,9 +61,13 @@ TEST(QuantileHist, Partitioner) {
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
|
||||
float split_value = gmat.cut.Values().at(ptr / 2);
|
||||
RegTree tree;
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
auto left_nidx = tree[RegTree::kRoot].LeftChild();
|
||||
RegTree tree{n_targets, n_features};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, split_value, &candidates);
|
||||
}
|
||||
auto left_nidx = tree.LeftChild(RegTree::kRoot);
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
|
||||
auto elem = partitioner[left_nidx];
|
||||
@@ -65,14 +77,17 @@ TEST(QuantileHist, Partitioner) {
|
||||
auto value = gmat.cut.Values().at(gmat.index[*it]);
|
||||
ASSERT_LE(value, split_value);
|
||||
}
|
||||
auto right_nidx = tree[RegTree::kRoot].RightChild();
|
||||
auto right_nidx = tree.RightChild(RegTree::kRoot);
|
||||
elem = partitioner[right_nidx];
|
||||
for (auto it = elem.begin; it != elem.end; ++it) {
|
||||
auto value = gmat.cut.Values().at(gmat.index[*it]);
|
||||
ASSERT_GT(value, split_value) << *it;
|
||||
ASSERT_GT(value, split_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(QuantileHist, Partitioner) { TestPartitioner<CPUExpandEntry>(1); }
|
||||
|
||||
TEST(QuantileHist, MultiPartitioner) { TestPartitioner<MultiExpandEntry>(3); }
|
||||
} // namespace xgboost::tree
|
||||
|
||||
Reference in New Issue
Block a user