/** * Copyright 2020-2024, XGBoost Contributors */ #include #include // for Context #include // for ObjInfo #include // for RegTree #include // for TreeUpdater #include // for unique_ptr #include "../../../src/tree/param.h" // for TrainParam #include "../helpers.h" namespace xgboost { /** * @brief Test the tree statistic (like sum Hessian) is correct. */ class UpdaterTreeStatTest : public ::testing::Test { protected: std::shared_ptr p_dmat_; linalg::Matrix gpairs_; size_t constexpr static kRows = 10; size_t constexpr static kCols = 10; protected: void SetUp() override { p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix(true); auto g = GenerateRandomGradients(kRows); gpairs_.Reshape(kRows, 1); gpairs_.Data()->Copy(g); } void RunTest(Context const* ctx, std::string updater) { tree::TrainParam param; ObjInfo task{ObjInfo::kRegression}; param.Init(Args{}); auto up = std::unique_ptr{TreeUpdater::Create(updater, ctx, &task)}; up->Configure(Args{}); RegTree tree{1u, kCols}; std::vector> position(1); up->Update(¶m, &gpairs_, p_dmat_.get(), position, {&tree}); tree.WalkTree([&tree](bst_node_t nidx) { if (tree[nidx].IsLeaf()) { // 1.0 is the default `min_child_weight`. CHECK_GE(tree.Stat(nidx).sum_hess, 1.0); } return true; }); } }; #if defined(XGBOOST_USE_CUDA) TEST_F(UpdaterTreeStatTest, GpuHist) { auto ctx = MakeCUDACtx(0); this->RunTest(&ctx, "grow_gpu_hist"); } TEST_F(UpdaterTreeStatTest, GpuApprox) { auto ctx = MakeCUDACtx(0); this->RunTest(&ctx, "grow_gpu_approx"); } #endif // defined(XGBOOST_USE_CUDA) TEST_F(UpdaterTreeStatTest, Hist) { Context ctx; this->RunTest(&ctx, "grow_quantile_histmaker"); } TEST_F(UpdaterTreeStatTest, Exact) { Context ctx; this->RunTest(&ctx, "grow_colmaker"); } TEST_F(UpdaterTreeStatTest, Approx) { Context ctx; this->RunTest(&ctx, "grow_histmaker"); } /** * @brief Test changing learning rate doesn't change internal splits. */ class TestSplitWithEta : public ::testing::Test { protected: void Run(Context const* ctx, bst_target_t n_targets, std::string name) { auto Xy = RandomDataGenerator{512, 64, 0.2}.Targets(n_targets).GenerateDMatrix(true); auto gen_tree = [&](float eta) { auto tree = std::make_unique(n_targets, static_cast(Xy->Info().num_col_)); std::vector trees{tree.get()}; ObjInfo task{ObjInfo::kRegression}; std::unique_ptr updater{TreeUpdater::Create(name, ctx, &task)}; updater->Configure({}); auto grad = GenerateRandomGradients(ctx, Xy->Info().num_row_, n_targets); CHECK_EQ(grad.Shape(1), n_targets); tree::TrainParam param; param.Init(Args{{"learning_rate", std::to_string(eta)}}); HostDeviceVector position; updater->Update(¶m, &grad, Xy.get(), common::Span{&position, 1}, trees); CHECK_EQ(tree->NumTargets(), n_targets); if (n_targets > 1) { CHECK(tree->IsMultiTarget()); } return tree; }; auto eta_ratio = 8.0f; auto p_tree0 = gen_tree(0.1f); auto p_tree1 = gen_tree(0.1f * eta_ratio); // Just to make sure we are not testing a stump. CHECK_GE(p_tree0->NumExtraNodes(), 32); bst_node_t n_nodes{0}; p_tree0->WalkTree([&](bst_node_t nidx) { if (p_tree0->IsLeaf(nidx)) { CHECK(p_tree1->IsLeaf(nidx)); if (p_tree0->IsMultiTarget()) { CHECK(p_tree1->IsMultiTarget()); auto leaf_0 = p_tree0->GetMultiTargetTree()->LeafValue(nidx); auto leaf_1 = p_tree1->GetMultiTargetTree()->LeafValue(nidx); CHECK_EQ(leaf_0.Size(), leaf_1.Size()); for (std::size_t i = 0; i < leaf_0.Size(); ++i) { CHECK_EQ(leaf_0(i) * eta_ratio, leaf_1(i)); } CHECK(std::isnan(p_tree0->SplitCond(nidx))); CHECK(std::isnan(p_tree1->SplitCond(nidx))); } else { // NON-mt tree reuses split cond for leaf value. auto leaf_0 = p_tree0->SplitCond(nidx); auto leaf_1 = p_tree1->SplitCond(nidx); CHECK_EQ(leaf_0 * eta_ratio, leaf_1); } } else { CHECK(!p_tree1->IsLeaf(nidx)); CHECK_EQ(p_tree0->SplitCond(nidx), p_tree1->SplitCond(nidx)); } n_nodes++; return true; }); ASSERT_EQ(n_nodes, p_tree0->NumExtraNodes() + 1); } }; TEST_F(TestSplitWithEta, HistMulti) { Context ctx; bst_target_t n_targets{3}; this->Run(&ctx, n_targets, "grow_quantile_histmaker"); } TEST_F(TestSplitWithEta, Hist) { Context ctx; bst_target_t n_targets{1}; this->Run(&ctx, n_targets, "grow_quantile_histmaker"); } TEST_F(TestSplitWithEta, Approx) { Context ctx; bst_target_t n_targets{1}; this->Run(&ctx, n_targets, "grow_histmaker"); } TEST_F(TestSplitWithEta, Exact) { Context ctx; bst_target_t n_targets{1}; this->Run(&ctx, n_targets, "grow_colmaker"); } #if defined(XGBOOST_USE_CUDA) TEST_F(TestSplitWithEta, GpuHist) { auto ctx = MakeCUDACtx(0); bst_target_t n_targets{1}; this->Run(&ctx, n_targets, "grow_gpu_hist"); } TEST_F(TestSplitWithEta, GpuApprox) { auto ctx = MakeCUDACtx(0); bst_target_t n_targets{1}; this->Run(&ctx, n_targets, "grow_gpu_approx"); } #endif // defined(XGBOOST_USE_CUDA) class TestMinSplitLoss : public ::testing::Test { std::shared_ptr dmat_; linalg::Matrix gpair_; void SetUp() override { constexpr size_t kRows = 32; constexpr size_t kCols = 16; constexpr float kSparsity = 0.6; dmat_ = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatrix(); gpair_.Reshape(kRows, 1); gpair_.Data()->Copy(GenerateRandomGradients(kRows)); } std::int32_t Update(Context const* ctx, std::string updater, float gamma) { Args args{{"max_depth", "1"}, {"max_leaves", "0"}, // Disable all other parameters. {"colsample_bynode", "1"}, {"colsample_bylevel", "1"}, {"colsample_bytree", "1"}, {"min_child_weight", "0.01"}, {"reg_alpha", "0"}, {"reg_lambda", "0"}, {"max_delta_step", "0"}, // test gamma {"gamma", std::to_string(gamma)}}; tree::TrainParam param; param.UpdateAllowUnknown(args); ObjInfo task{ObjInfo::kRegression}; auto up = std::unique_ptr{TreeUpdater::Create(updater, ctx, &task)}; up->Configure({}); RegTree tree; std::vector> position(1); up->Update(¶m, &gpair_, dmat_.get(), position, {&tree}); auto n_nodes = tree.NumExtraNodes(); return n_nodes; } public: void RunTest(Context const* ctx, std::string updater) { { int32_t n_nodes = Update(ctx, updater, 0.01); // This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured // when writing this test, and only used for testing larger gamma (below) does prevent // building tree. ASSERT_EQ(n_nodes, 2); } { int32_t n_nodes = Update(ctx, updater, 100.0); // No new nodes with gamma == 100. ASSERT_EQ(n_nodes, static_cast(0)); } } }; /* Exact tree method requires a pruner as an additional updater, so not tested here. */ TEST_F(TestMinSplitLoss, Approx) { Context ctx; this->RunTest(&ctx, "grow_histmaker"); } TEST_F(TestMinSplitLoss, Hist) { Context ctx; this->RunTest(&ctx, "grow_quantile_histmaker"); } #if defined(XGBOOST_USE_CUDA) TEST_F(TestMinSplitLoss, GpuHist) { auto ctx = MakeCUDACtx(0); this->RunTest(&ctx, "grow_gpu_hist"); } TEST_F(TestMinSplitLoss, GpuApprox) { auto ctx = MakeCUDACtx(0); this->RunTest(&ctx, "grow_gpu_approx"); } #endif // defined(XGBOOST_USE_CUDA) } // namespace xgboost