Support learning rate for zero-hessian objectives. (#8866)
This commit is contained in:
@@ -6,8 +6,9 @@
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include "../../../src/common/linalg_op.h" // begin,end
|
||||
#include "../../../src/common/linalg_op.h" // for begin, end
|
||||
#include "../../../src/objective/adaptive.h"
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
@@ -408,9 +409,13 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
|
||||
h_predt[i] = labels[i] + i;
|
||||
}
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, predt, 0, &tree);
|
||||
ASSERT_EQ(tree[1].LeafValue(), -1);
|
||||
ASSERT_EQ(tree[2].LeafValue(), -4);
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
auto lr = param.learning_rate;
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree);
|
||||
ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);
|
||||
ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
|
||||
@@ -457,11 +462,16 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
|
||||
ASSERT_EQ(tree.GetNumLeaves(), 4);
|
||||
|
||||
auto empty_leaf = tree[4].LeafValue();
|
||||
obj->UpdateTreeLeaf(position, info, predt, t, &tree);
|
||||
ASSERT_EQ(tree[3].LeafValue(), -5);
|
||||
ASSERT_EQ(tree[4].LeafValue(), empty_leaf);
|
||||
ASSERT_EQ(tree[5].LeafValue(), -10);
|
||||
ASSERT_EQ(tree[6].LeafValue(), -14);
|
||||
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
auto lr = param.learning_rate;
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);
|
||||
ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);
|
||||
ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);
|
||||
ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);
|
||||
ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
|
||||
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, param, dmat->Info(), sampler};
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
common::HistCollection hist;
|
||||
std::vector<GradientPair> row_gpairs = {
|
||||
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
|
||||
@@ -96,7 +96,7 @@ TEST(HistEvaluator, Apply) {
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
|
||||
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, param, dmat->Info(), sampler};
|
||||
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
|
||||
CPUExpandEntry entry{0, 0, 10.0f};
|
||||
entry.split.left_sum = GradStats{0.4, 0.6f};
|
||||
@@ -123,7 +123,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
|
||||
// check the evaluator is returning the optimal split
|
||||
std::vector<FeatureType> ft{FeatureType::kCategorical};
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
HistEvaluator<CPUExpandEntry> evaluator{&ctx, param_, info_, sampler};
|
||||
HistEvaluator<CPUExpandEntry> evaluator{&ctx, ¶m_, info_, sampler};
|
||||
evaluator.InitRoot(GradStats{total_gpair_});
|
||||
RegTree tree;
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
@@ -153,7 +153,7 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
|
||||
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, param, dmat->Info(), sampler};
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
|
||||
@@ -204,7 +204,7 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
info.num_col_ = 1;
|
||||
info.feature_types = {FeatureType::kCategorical};
|
||||
Context ctx;
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, param_, info, sampler};
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m_, info, sampler};
|
||||
evaluator.InitRoot(GradStats{parent_sum_});
|
||||
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
@@ -12,8 +12,7 @@
|
||||
#include "../../../src/tree/split_evaluator.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
/**
|
||||
* \brief Enumerate all possible partitions for categorical split.
|
||||
*/
|
||||
@@ -151,5 +150,4 @@ class TestCategoricalSplitWithMissing : public testing::Test {
|
||||
ASSERT_EQ(right_sum.GetHess(), parent_sum_.GetHess() - left_sum.GetHess());
|
||||
}
|
||||
};
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/device_vector.h>
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "../../../src/common/common.h"
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../../../src/tree/constraints.cuh"
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../../../src/tree/updater_gpu_common.cuh"
|
||||
#include "../../../src/tree/updater_gpu_hist.cu"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
@@ -21,8 +22,7 @@
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/json.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
TEST(GpuHist, DeviceHistogram) {
|
||||
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
@@ -83,11 +83,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
int const kNRows = 16, kNCols = 8;
|
||||
|
||||
TrainParam param;
|
||||
std::vector<std::pair<std::string, std::string>> args {
|
||||
{"max_depth", "6"},
|
||||
{"max_leaves", "0"},
|
||||
Args args{
|
||||
{"max_depth", "6"},
|
||||
{"max_leaves", "0"},
|
||||
};
|
||||
param.Init(args);
|
||||
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
Context ctx{CreateEmptyGenericParam(0)};
|
||||
@@ -168,7 +169,6 @@ void TestHistogramIndexImpl() {
|
||||
int constexpr kNRows = 1000, kNCols = 10;
|
||||
|
||||
// Build 2 matrices and build a histogram maker with that
|
||||
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}},
|
||||
hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}};
|
||||
@@ -179,15 +179,14 @@ void TestHistogramIndexImpl() {
|
||||
std::unique_ptr<DMatrix> hist_maker_ext_dmat(
|
||||
CreateSparsePageDMatrixWithRC(kNRows, kNCols, 128UL, true, tempdir));
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> training_params = {
|
||||
{"max_depth", "10"},
|
||||
{"max_leaves", "0"}
|
||||
};
|
||||
Args training_params = {{"max_depth", "10"}, {"max_leaves", "0"}};
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(training_params);
|
||||
|
||||
hist_maker.Configure(training_params);
|
||||
hist_maker.InitDataOnce(hist_maker_dmat.get());
|
||||
hist_maker.InitDataOnce(¶m, hist_maker_dmat.get());
|
||||
hist_maker_ext.Configure(training_params);
|
||||
hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());
|
||||
hist_maker_ext.InitDataOnce(¶m, hist_maker_ext_dmat.get());
|
||||
|
||||
// Extract the device maker from the histogram makers and from that its compressed
|
||||
// histogram index
|
||||
@@ -237,13 +236,15 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
{"subsample", std::to_string(subsample)},
|
||||
{"sampling_method", sampling_method},
|
||||
};
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(args);
|
||||
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}};
|
||||
hist_maker.Configure(args);
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
hist_maker.Update(gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position}, {tree});
|
||||
hist_maker.Update(¶m, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
{tree});
|
||||
auto cache = linalg::VectorView<float>{preds->DeviceSpan(), {preds->Size()}, 0};
|
||||
hist_maker.UpdatePredictionCache(dmat, cache);
|
||||
}
|
||||
@@ -391,13 +392,11 @@ TEST(GpuHist, ConfigIO) {
|
||||
Json j_updater { Object() };
|
||||
updater->SaveConfig(&j_updater);
|
||||
ASSERT_TRUE(IsA<Object>(j_updater["gpu_hist_train_param"]));
|
||||
ASSERT_TRUE(IsA<Object>(j_updater["train_param"]));
|
||||
updater->LoadConfig(j_updater);
|
||||
|
||||
Json j_updater_roundtrip { Object() };
|
||||
updater->SaveConfig(&j_updater_roundtrip);
|
||||
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["gpu_hist_train_param"]));
|
||||
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["train_param"]));
|
||||
|
||||
ASSERT_EQ(j_updater, j_updater_roundtrip);
|
||||
}
|
||||
@@ -414,5 +413,4 @@ TEST(GpuHist, MaxDepth) {
|
||||
|
||||
ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -5,11 +5,10 @@
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols){
|
||||
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
|
||||
}
|
||||
@@ -45,11 +44,11 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
updater->Configure(Args{
|
||||
{"interaction_constraints", "[[0, 1]]"},
|
||||
{"num_feature", std::to_string(kCols)}});
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(
|
||||
Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(p_gradients.get(), p_dmat.get(), position, {&tree});
|
||||
updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&tree});
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 4);
|
||||
ASSERT_EQ(tree[0].SplitIndex(), 1);
|
||||
@@ -64,9 +63,10 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
updater->Configure(Args{{"num_feature", std::to_string(kCols)}});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(p_gradients.get(), p_dmat.get(), position, {&tree});
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&tree});
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 10);
|
||||
ASSERT_EQ(tree[0].SplitIndex(), 1);
|
||||
@@ -83,7 +83,6 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
|
||||
Context ctx;
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
updater->Configure(Args{{"num_feature", std::to_string(cols)}});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
|
||||
std::unique_ptr<DMatrix> sliced{
|
||||
@@ -91,7 +90,9 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
|
||||
|
||||
RegTree tree;
|
||||
tree.param.num_feature = cols;
|
||||
updater->Update(p_gradients.get(), sliced.get(), position, {&tree});
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
|
||||
EXPECT_EQ(tree.NumExtraNodes(), 10);
|
||||
EXPECT_EQ(tree[0].SplitIndex(), 1);
|
||||
@@ -115,14 +116,13 @@ TEST(GrowHistMaker, ColumnSplit) {
|
||||
Context ctx;
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
updater->Configure(Args{{"num_feature", std::to_string(kCols)}});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(p_gradients.get(), p_dmat.get(), position, {&expected_tree});
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&expected_tree});
|
||||
}
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree));
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -75,9 +76,11 @@ class TestPredictionCache : public ::testing::Test {
|
||||
RegTree tree;
|
||||
std::vector<RegTree *> trees{&tree};
|
||||
auto gpair = GenerateRandomGradients(n_samples_);
|
||||
updater->Configure(Args{{"max_bin", "64"}});
|
||||
tree::TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(&gpair, Xy_.get(), position, trees);
|
||||
updater->Update(¶m, &gpair, Xy_.get(), position, trees);
|
||||
HostDeviceVector<float> out_prediction_cached;
|
||||
out_prediction_cached.SetDevice(ctx.gpu_id);
|
||||
out_prediction_cached.Resize(n_samples_);
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
/*!
|
||||
* Copyright 2018-2019 by Contributors
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
#include <xgboost/learner.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
TEST(Updater, Prune) {
|
||||
int constexpr kCols = 16;
|
||||
|
||||
@@ -36,28 +36,30 @@ TEST(Updater, Prune) {
|
||||
tree.param.UpdateAllowUnknown(cfg);
|
||||
std::vector<RegTree*> trees {&tree};
|
||||
// prepare pruner
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(cfg);
|
||||
|
||||
std::unique_ptr<TreeUpdater> pruner(
|
||||
TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression}));
|
||||
pruner->Configure(cfg);
|
||||
|
||||
// loss_chg < min_split_loss;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
|
||||
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 0);
|
||||
|
||||
// loss_chg > min_split_loss;
|
||||
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
// loss_chg == min_split_loss;
|
||||
tree.Stat(0).loss_chg = 10;
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
@@ -71,10 +73,10 @@ TEST(Updater, Prune) {
|
||||
0, 0.5f, true, 0.3, 0.4, 0.5,
|
||||
/*loss_chg=*/19.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
cfg.emplace_back("max_depth", "1");
|
||||
pruner->Configure(cfg);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
|
||||
cfg.emplace_back("max_depth", "1");
|
||||
param.UpdateAllowUnknown(cfg);
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
tree.ExpandNode(tree[0].LeftChild(),
|
||||
@@ -82,9 +84,9 @@ TEST(Updater, Prune) {
|
||||
/*loss_chg=*/18.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
cfg.emplace_back("min_split_loss", "0");
|
||||
pruner->Configure(cfg);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
param.UpdateAllowUnknown(cfg);
|
||||
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
/*!
|
||||
* Copyright 2018-2019 by Contributors
|
||||
/**
|
||||
* Copyright 2018-2013 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -43,9 +44,11 @@ TEST(Updater, Refresh) {
|
||||
tree.Stat(cleft).base_weight = 1.2;
|
||||
tree.Stat(cright).base_weight = 1.3;
|
||||
|
||||
refresher->Configure(cfg);
|
||||
std::vector<HostDeviceVector<bst_node_t>> position;
|
||||
refresher->Update(&gpair, p_dmat.get(), position, trees);
|
||||
tree::TrainParam param;
|
||||
param.UpdateAllowUnknown(cfg);
|
||||
|
||||
refresher->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
|
||||
bst_float constexpr kEps = 1e-6;
|
||||
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -21,6 +25,9 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
}
|
||||
|
||||
void RunTest(std::string updater) {
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
auto up = std::unique_ptr<TreeUpdater>{
|
||||
@@ -29,7 +36,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up->Update(&gpairs_, p_dmat_.get(), position, {&tree});
|
||||
up->Update(¶m, &gpairs_, p_dmat_.get(), position, {&tree});
|
||||
|
||||
tree.WalkTree([&tree](bst_node_t nidx) {
|
||||
if (tree[nidx].IsLeaf()) {
|
||||
@@ -69,28 +76,33 @@ class UpdaterEtaTest : public ::testing::Test {
|
||||
void RunTest(std::string updater) {
|
||||
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
|
||||
float eta = 0.4;
|
||||
auto up_0 = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
|
||||
up_0->Configure(Args{{"eta", std::to_string(eta)}});
|
||||
up_0->Configure(Args{});
|
||||
tree::TrainParam param0;
|
||||
param0.Init(Args{{"eta", std::to_string(eta)}});
|
||||
|
||||
auto up_1 = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
|
||||
up_1->Configure(Args{{"eta", "1.0"}});
|
||||
tree::TrainParam param1;
|
||||
param1.Init(Args{{"eta", "1.0"}});
|
||||
|
||||
for (size_t iter = 0; iter < 4; ++iter) {
|
||||
RegTree tree_0;
|
||||
{
|
||||
tree_0.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_0->Update(&gpairs_, p_dmat_.get(), position, {&tree_0});
|
||||
up_0->Update(¶m0, &gpairs_, p_dmat_.get(), position, {&tree_0});
|
||||
}
|
||||
|
||||
RegTree tree_1;
|
||||
{
|
||||
tree_1.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_1->Update(&gpairs_, p_dmat_.get(), position, {&tree_1});
|
||||
up_1->Update(¶m1, &gpairs_, p_dmat_.get(), position, {&tree_1});
|
||||
}
|
||||
tree_0.WalkTree([&](bst_node_t nidx) {
|
||||
if (tree_0[nidx].IsLeaf()) {
|
||||
@@ -139,17 +151,18 @@ class TestMinSplitLoss : public ::testing::Test {
|
||||
|
||||
// test gamma
|
||||
{"gamma", std::to_string(gamma)}};
|
||||
tree::TrainParam param;
|
||||
param.UpdateAllowUnknown(args);
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
std::cout << ctx.gpu_id << std::endl;
|
||||
auto up = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
up->Configure(args);
|
||||
up->Configure({});
|
||||
|
||||
RegTree tree;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up->Update(&gpair_, dmat_.get(), position, {&tree});
|
||||
up->Update(¶m, &gpair_, dmat_.get(), position, {&tree});
|
||||
|
||||
auto n_nodes = tree.NumExtraNodes();
|
||||
return n_nodes;
|
||||
|
||||
@@ -42,9 +42,15 @@ class TestGPUBasicModels:
|
||||
def test_custom_objective(self):
|
||||
self.cpu_test_bm.run_custom_objective("gpu_hist")
|
||||
|
||||
def test_eta_decay_gpu_hist(self):
|
||||
def test_eta_decay(self):
|
||||
self.cpu_test_cb.run_eta_decay('gpu_hist')
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"objective", ["binary:logistic", "reg:absoluteerror", "reg:quantileerror"]
|
||||
)
|
||||
def test_eta_decay_leaf_output(self, objective) -> None:
|
||||
self.cpu_test_cb.run_eta_decay_leaf_output("gpu_hist", objective)
|
||||
|
||||
def test_deterministic_gpu_hist(self):
|
||||
kRows = 1000
|
||||
kCols = 64
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from contextlib import nullcontext
|
||||
@@ -355,47 +356,125 @@ class TestCallbacks:
|
||||
with warning_check:
|
||||
xgb.cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)])
|
||||
|
||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
|
||||
def run_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
|
||||
# check decay has effect on leaf output.
|
||||
num_round = 4
|
||||
scheduler = xgb.callback.LearningRateScheduler
|
||||
|
||||
dpath = tm.data_dir(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
|
||||
param = {
|
||||
"max_depth": 2,
|
||||
"objective": objective,
|
||||
"eval_metric": "error",
|
||||
"tree_method": tree_method,
|
||||
}
|
||||
if objective == "reg:quantileerror":
|
||||
param["quantile_alpha"] = 0.3
|
||||
|
||||
def eta_decay_0(i):
|
||||
return num_round / (i + 1)
|
||||
|
||||
bst0 = xgb.train(
|
||||
param,
|
||||
dtrain,
|
||||
num_round,
|
||||
watchlist,
|
||||
callbacks=[scheduler(eta_decay_0)],
|
||||
)
|
||||
|
||||
def eta_decay_1(i: int) -> float:
|
||||
if i > 1:
|
||||
return 5.0
|
||||
return num_round / (i + 1)
|
||||
|
||||
bst1 = xgb.train(
|
||||
param,
|
||||
dtrain,
|
||||
num_round,
|
||||
watchlist,
|
||||
callbacks=[scheduler(eta_decay_1)],
|
||||
)
|
||||
bst_json0 = bst0.save_raw(raw_format="json")
|
||||
bst_json1 = bst1.save_raw(raw_format="json")
|
||||
|
||||
j0 = json.loads(bst_json0)
|
||||
j1 = json.loads(bst_json1)
|
||||
|
||||
tree_2th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][2]
|
||||
tree_2th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][2]
|
||||
assert tree_2th_0["base_weights"] == tree_2th_1["base_weights"]
|
||||
assert tree_2th_0["split_conditions"] == tree_2th_1["split_conditions"]
|
||||
|
||||
tree_3th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][3]
|
||||
tree_3th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][3]
|
||||
assert tree_3th_0["base_weights"] != tree_3th_1["base_weights"]
|
||||
assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
|
||||
|
||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"])
|
||||
def test_eta_decay(self, tree_method):
|
||||
self.run_eta_decay(tree_method)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tree_method,objective",
|
||||
[
|
||||
("hist", "binary:logistic"),
|
||||
("hist", "reg:absoluteerror"),
|
||||
("hist", "reg:quantileerror"),
|
||||
("approx", "binary:logistic"),
|
||||
("approx", "reg:absoluteerror"),
|
||||
("approx", "reg:quantileerror"),
|
||||
],
|
||||
)
|
||||
def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
|
||||
self.run_eta_decay_leaf_output(tree_method, objective)
|
||||
|
||||
def test_check_point(self):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
m = xgb.DMatrix(X, y)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
|
||||
iterations=1,
|
||||
name='model')
|
||||
xgb.train({'objective': 'binary:logistic'}, m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point])
|
||||
check_point = xgb.callback.TrainingCheckPoint(
|
||||
directory=tmpdir, iterations=1, name="model"
|
||||
)
|
||||
xgb.train(
|
||||
{"objective": "binary:logistic"},
|
||||
m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point],
|
||||
)
|
||||
for i in range(1, 10):
|
||||
assert os.path.exists(
|
||||
os.path.join(tmpdir, 'model_' + str(i) + '.json'))
|
||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
|
||||
|
||||
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
|
||||
iterations=1,
|
||||
as_pickle=True,
|
||||
name='model')
|
||||
xgb.train({'objective': 'binary:logistic'}, m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point])
|
||||
check_point = xgb.callback.TrainingCheckPoint(
|
||||
directory=tmpdir, iterations=1, as_pickle=True, name="model"
|
||||
)
|
||||
xgb.train(
|
||||
{"objective": "binary:logistic"},
|
||||
m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point],
|
||||
)
|
||||
for i in range(1, 10):
|
||||
assert os.path.exists(
|
||||
os.path.join(tmpdir, 'model_' + str(i) + '.pkl'))
|
||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
|
||||
|
||||
def test_callback_list(self):
|
||||
X, y = tm.get_california_housing()
|
||||
m = xgb.DMatrix(X, y)
|
||||
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
|
||||
for i in range(4):
|
||||
xgb.train({'objective': 'reg:squarederror',
|
||||
'eval_metric': 'rmse'}, m,
|
||||
evals=[(m, 'Train')],
|
||||
num_boost_round=1,
|
||||
verbose_eval=True,
|
||||
callbacks=callbacks)
|
||||
xgb.train(
|
||||
{"objective": "reg:squarederror", "eval_metric": "rmse"},
|
||||
m,
|
||||
evals=[(m, "Train")],
|
||||
num_boost_round=1,
|
||||
verbose_eval=True,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
assert len(callbacks) == 1
|
||||
|
||||
@@ -51,11 +51,8 @@ class TestPickling:
|
||||
|
||||
def test_model_pickling_json(self):
|
||||
def check(config):
|
||||
updater = config["learner"]["gradient_booster"]["updater"]
|
||||
if params["tree_method"] == "exact":
|
||||
subsample = updater["grow_colmaker"]["train_param"]["subsample"]
|
||||
else:
|
||||
subsample = updater["grow_quantile_histmaker"]["train_param"]["subsample"]
|
||||
tree_param = config["learner"]["gradient_booster"]["tree_train_param"]
|
||||
subsample = tree_param["subsample"]
|
||||
assert float(subsample) == 0.5
|
||||
|
||||
params = {"nthread": 8, "tree_method": "hist", "subsample": 0.5}
|
||||
|
||||
@@ -447,7 +447,8 @@ class TestTreeMethod:
|
||||
{
|
||||
"tree_method": tree_method,
|
||||
"objective": "reg:absoluteerror",
|
||||
"subsample": 0.8
|
||||
"subsample": 0.8,
|
||||
"eta": 1.0,
|
||||
},
|
||||
Xy,
|
||||
num_boost_round=10,
|
||||
|
||||
@@ -1018,14 +1018,18 @@ def test_XGBClassifier_resume():
|
||||
|
||||
|
||||
def test_constraint_parameters():
|
||||
reg = xgb.XGBRegressor(interaction_constraints='[[0, 1], [2, 3, 4]]')
|
||||
reg = xgb.XGBRegressor(interaction_constraints="[[0, 1], [2, 3, 4]]")
|
||||
X = np.random.randn(10, 10)
|
||||
y = np.random.randn(10)
|
||||
reg.fit(X, y)
|
||||
|
||||
config = json.loads(reg.get_booster().save_config())
|
||||
assert config['learner']['gradient_booster']['updater']['grow_colmaker'][
|
||||
'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]'
|
||||
assert (
|
||||
config["learner"]["gradient_booster"]["tree_train_param"][
|
||||
"interaction_constraints"
|
||||
]
|
||||
== "[[0, 1], [2, 3, 4]]"
|
||||
)
|
||||
|
||||
|
||||
def test_parameter_validation():
|
||||
|
||||
@@ -422,10 +422,10 @@ class XgboostLocalClusterTestCase(SparkLocalClusterTestCase):
|
||||
self.assertTrue(hasattr(classifier, "max_depth"))
|
||||
self.assertEqual(classifier.getOrDefault(classifier.max_depth), 7)
|
||||
booster_config = json.loads(model.get_booster().save_config())
|
||||
max_depth = booster_config["learner"]["gradient_booster"]["updater"][
|
||||
"grow_histmaker"
|
||||
]["train_param"]["max_depth"]
|
||||
self.assertEqual(int(max_depth), 7)
|
||||
max_depth = booster_config["learner"]["gradient_booster"]["tree_train_param"][
|
||||
"max_depth"
|
||||
]
|
||||
assert int(max_depth) == 7
|
||||
|
||||
def test_repartition(self):
|
||||
# The following test case has a few partitioned datasets that are either
|
||||
|
||||
Reference in New Issue
Block a user