sync up May15 2023
This commit is contained in:
@@ -6,6 +6,8 @@
|
||||
#include <string>
|
||||
|
||||
#include "../../../src/tree/constraints.h"
|
||||
#include "../../../src/tree/hist/evaluate_splits.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@@ -56,5 +58,37 @@ TEST(CPUFeatureInteractionConstraint, Basic) {
|
||||
ASSERT_FALSE(constraints.Query(1, 5));
|
||||
}
|
||||
|
||||
TEST(CPUMonoConstraint, Basic) {
|
||||
std::size_t kRows{64}, kCols{16};
|
||||
Context ctx;
|
||||
|
||||
TrainParam param;
|
||||
std::vector<std::int32_t> mono(kCols, 1);
|
||||
I32Array arr;
|
||||
for (std::size_t i = 0; i < kCols; ++i) {
|
||||
arr.GetArray().push_back(mono[i]);
|
||||
}
|
||||
Json jarr{std::move(arr)};
|
||||
std::string str_mono;
|
||||
Json::Dump(jarr, &str_mono);
|
||||
str_mono.front() = '(';
|
||||
str_mono.back() = ')';
|
||||
|
||||
param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
|
||||
|
||||
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
|
||||
HistEvaluator<CPUExpandEntry> evalutor{&ctx, ¶m, Xy->Info(), sampler};
|
||||
evalutor.InitRoot(GradStats{2.0, 2.0});
|
||||
|
||||
SplitEntry split;
|
||||
split.Update(1.0f, 0, 3.0, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});
|
||||
CPUExpandEntry entry{0, 0, split};
|
||||
RegTree tree{1, static_cast<bst_feature_t>(kCols)};
|
||||
evalutor.ApplyTreeSplit(entry, &tree);
|
||||
|
||||
ASSERT_TRUE(evalutor.Evaluator().has_constraint);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -90,13 +90,16 @@ void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_t
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
|
||||
EXPECT_EQ(tree.NumExtraNodes(), 10);
|
||||
EXPECT_EQ(tree[0].SplitIndex(), 1);
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 10);
|
||||
ASSERT_EQ(tree[0].SplitIndex(), 1);
|
||||
|
||||
EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
|
||||
EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
|
||||
ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
|
||||
ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
|
||||
|
||||
EXPECT_EQ(tree, expected_tree);
|
||||
FeatureMap fmap;
|
||||
auto json = tree.DumpModel(fmap, false, "json");
|
||||
auto expected_json = expected_tree.DumpModel(fmap, false, "json");
|
||||
ASSERT_EQ(json, expected_json);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
@@ -19,6 +19,8 @@
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost::tree {
|
||||
|
||||
namespace {
|
||||
template <typename ExpandEntry>
|
||||
void TestPartitioner(bst_target_t n_targets) {
|
||||
std::size_t n_samples = 1024, base_rowid = 0;
|
||||
@@ -86,8 +88,117 @@ void TestPartitioner(bst_target_t n_targets) {
|
||||
}
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(QuantileHist, Partitioner) { TestPartitioner<CPUExpandEntry>(1); }
|
||||
|
||||
TEST(QuantileHist, MultiPartitioner) { TestPartitioner<MultiExpandEntry>(3); }
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename ExpandEntry>
|
||||
void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples,
|
||||
bst_feature_t n_features, size_t base_rowid,
|
||||
std::shared_ptr<DMatrix> Xy, float min_value, float mid_value,
|
||||
CommonRowPartitioner const& expected_mid_partitioner) {
|
||||
auto dmat =
|
||||
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
|
||||
std::vector<ExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
auto cuts = common::SketchOnDMatrix(&ctx, dmat.get(), 64);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
bst_feature_t const split_ind = 0;
|
||||
common::ColumnMatrix column_indices;
|
||||
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
|
||||
{
|
||||
RegTree tree{n_targets, n_features};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, min_value, &candidates);
|
||||
}
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
ASSERT_EQ(partitioner.Size(), 3);
|
||||
ASSERT_EQ(partitioner[1].Size(), 0);
|
||||
ASSERT_EQ(partitioner[2].Size(), n_samples);
|
||||
}
|
||||
{
|
||||
RegTree tree{n_targets, n_features};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, mid_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, mid_value, &candidates);
|
||||
}
|
||||
auto left_nidx = tree.LeftChild(RegTree::kRoot);
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
|
||||
auto elem = partitioner[left_nidx];
|
||||
ASSERT_LT(elem.Size(), n_samples);
|
||||
ASSERT_GT(elem.Size(), 1);
|
||||
auto expected_elem = expected_mid_partitioner[left_nidx];
|
||||
ASSERT_EQ(elem.Size(), expected_elem.Size());
|
||||
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
|
||||
ASSERT_EQ(*it, *eit);
|
||||
}
|
||||
|
||||
auto right_nidx = tree.RightChild(RegTree::kRoot);
|
||||
elem = partitioner[right_nidx];
|
||||
expected_elem = expected_mid_partitioner[right_nidx];
|
||||
ASSERT_EQ(elem.Size(), expected_elem.Size());
|
||||
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
|
||||
ASSERT_EQ(*it, *eit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ExpandEntry>
|
||||
void TestColumnSplitPartitioner(bst_target_t n_targets) {
|
||||
std::size_t n_samples = 1024, base_rowid = 0;
|
||||
bst_feature_t n_features = 16;
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<ExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64);
|
||||
|
||||
float min_value, mid_value;
|
||||
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
bst_feature_t const split_ind = 0;
|
||||
common::ColumnMatrix column_indices;
|
||||
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
|
||||
min_value = gmat.cut.MinValues()[split_ind];
|
||||
|
||||
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
|
||||
mid_value = gmat.cut.Values().at(ptr / 2);
|
||||
RegTree tree{n_targets, n_features};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, mid_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, mid_value, &candidates);
|
||||
}
|
||||
mid_partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
}
|
||||
|
||||
auto constexpr kWorkers = 4;
|
||||
RunWithInMemoryCommunicator(kWorkers, VerifyColumnSplitPartitioner<ExpandEntry>, n_targets,
|
||||
n_samples, n_features, base_rowid, Xy, min_value, mid_value, mid_partitioner);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEntry>(1); }
|
||||
|
||||
TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
|
||||
} // namespace xgboost::tree
|
||||
|
||||
Reference in New Issue
Block a user