Rewrite approx (#7214)
This PR rewrites the approx tree method to use codebase from hist for better performance and code sharing. The rewrite has many benefits: - Support for both `max_leaves` and `max_depth`. - Support for `grow_policy`. - Support for mono constraint. - Support for feature weights. - Support for easier bin configuration (`max_bin`). - Support for categorical data. - Faster performance for most of the datasets. (many times faster) - Support for prediction cache. - Significantly better performance for external memory. - Unites the code base between approx and hist.
This commit is contained in:
@@ -72,5 +72,58 @@ TEST(Approx, Partitioner) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Approx, PredictionCache) {
|
||||
size_t n_samples = 2048, n_features = 13;
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
|
||||
{
|
||||
omp_set_num_threads(1);
|
||||
GenericParameter ctx;
|
||||
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
|
||||
std::unique_ptr<TreeUpdater> approx{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
RegTree tree;
|
||||
std::vector<RegTree *> trees{&tree};
|
||||
auto gpair = GenerateRandomGradients(n_samples);
|
||||
approx->Configure(Args{{"max_bin", "64"}});
|
||||
approx->Update(&gpair, Xy.get(), trees);
|
||||
HostDeviceVector<float> out_prediction_cached;
|
||||
out_prediction_cached.Resize(n_samples);
|
||||
auto cache = linalg::VectorView<float>{
|
||||
out_prediction_cached.HostSpan(), {out_prediction_cached.Size()}, GenericParameter::kCpuId};
|
||||
ASSERT_TRUE(approx->UpdatePredictionCache(Xy.get(), cache));
|
||||
}
|
||||
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("nthread", "0");
|
||||
learner->Configure();
|
||||
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
learner->UpdateOneIter(i, Xy);
|
||||
}
|
||||
|
||||
HostDeviceVector<float> out_prediction_cached;
|
||||
learner->Predict(Xy, false, &out_prediction_cached, 0, 0);
|
||||
|
||||
Json model{Object()};
|
||||
learner->SaveModel(&model);
|
||||
|
||||
HostDeviceVector<float> out_prediction;
|
||||
{
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
learner->LoadModel(model);
|
||||
learner->Predict(Xy, false, &out_prediction, 0, 0);
|
||||
}
|
||||
|
||||
auto const h_predt_cached = out_prediction_cached.ConstHostSpan();
|
||||
auto const h_predt = out_prediction.ConstHostSpan();
|
||||
|
||||
ASSERT_EQ(h_predt.size(), h_predt_cached.size());
|
||||
for (size_t i = 0; i < h_predt.size(); ++i) {
|
||||
ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -315,57 +315,6 @@ TEST(GpuHist, TestHistogramIndex) {
|
||||
TestHistogramIndexImpl();
|
||||
}
|
||||
|
||||
// gamma is an alias of min_split_loss
|
||||
int32_t TestMinSplitLoss(DMatrix* dmat, float gamma, HostDeviceVector<GradientPair>* gpair) {
|
||||
Args args {
|
||||
{"max_depth", "1"},
|
||||
{"max_leaves", "0"},
|
||||
|
||||
// Disable all other parameters.
|
||||
{"colsample_bynode", "1"},
|
||||
{"colsample_bylevel", "1"},
|
||||
{"colsample_bytree", "1"},
|
||||
{"min_child_weight", "0.01"},
|
||||
{"reg_alpha", "0"},
|
||||
{"reg_lambda", "0"},
|
||||
{"max_delta_step", "0"},
|
||||
|
||||
// test gamma
|
||||
{"gamma", std::to_string(gamma)}
|
||||
};
|
||||
|
||||
tree::GPUHistMakerSpecialised<GradientPairPrecise> hist_maker{ObjInfo{ObjInfo::kRegression}};
|
||||
GenericParameter generic_param(CreateEmptyGenericParam(0));
|
||||
hist_maker.Configure(args, &generic_param);
|
||||
|
||||
RegTree tree;
|
||||
hist_maker.Update(gpair, dmat, {&tree});
|
||||
|
||||
auto n_nodes = tree.NumExtraNodes();
|
||||
return n_nodes;
|
||||
}
|
||||
|
||||
TEST(GpuHist, MinSplitLoss) {
|
||||
constexpr size_t kRows = 32;
|
||||
constexpr size_t kCols = 16;
|
||||
constexpr float kSparsity = 0.6;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatrix();
|
||||
auto gpair = GenerateRandomGradients(kRows);
|
||||
|
||||
{
|
||||
int32_t n_nodes = TestMinSplitLoss(dmat.get(), 0.01, &gpair);
|
||||
// This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
|
||||
// when writing this test, and only used for testing larger gamma (below) does prevent
|
||||
// building tree.
|
||||
ASSERT_EQ(n_nodes, 2);
|
||||
}
|
||||
{
|
||||
int32_t n_nodes = TestMinSplitLoss(dmat.get(), 100.0, &gpair);
|
||||
// No new nodes with gamma == 100.
|
||||
ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
size_t gpu_page_size, RegTree* tree,
|
||||
HostDeviceVector<bst_float>* preds, float subsample = 1.0f,
|
||||
|
||||
@@ -61,7 +61,7 @@ class TestGrowPolicy : public ::testing::Test {
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestGrowPolicy, DISABLED_Approx) {
|
||||
TEST_F(TestGrowPolicy, Approx) {
|
||||
this->TestTreeGrowPolicy("approx", "depthwise");
|
||||
this->TestTreeGrowPolicy("approx", "lossguide");
|
||||
}
|
||||
|
||||
@@ -114,4 +114,70 @@ TEST_F(UpdaterEtaTest, Approx) { this->RunTest("grow_histmaker"); }
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST_F(UpdaterEtaTest, GpuHist) { this->RunTest("grow_gpu_hist"); }
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
class TestMinSplitLoss : public ::testing::Test {
|
||||
std::shared_ptr<DMatrix> dmat_;
|
||||
HostDeviceVector<GradientPair> gpair_;
|
||||
|
||||
void SetUp() override {
|
||||
constexpr size_t kRows = 32;
|
||||
constexpr size_t kCols = 16;
|
||||
constexpr float kSparsity = 0.6;
|
||||
dmat_ = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatrix();
|
||||
gpair_ = GenerateRandomGradients(kRows);
|
||||
}
|
||||
|
||||
int32_t Update(std::string updater, float gamma) {
|
||||
Args args{{"max_depth", "1"},
|
||||
{"max_leaves", "0"},
|
||||
|
||||
// Disable all other parameters.
|
||||
{"colsample_bynode", "1"},
|
||||
{"colsample_bylevel", "1"},
|
||||
{"colsample_bytree", "1"},
|
||||
{"min_child_weight", "0.01"},
|
||||
{"reg_alpha", "0"},
|
||||
{"reg_lambda", "0"},
|
||||
{"max_delta_step", "0"},
|
||||
|
||||
// test gamma
|
||||
{"gamma", std::to_string(gamma)}};
|
||||
|
||||
GenericParameter generic_param(CreateEmptyGenericParam(0));
|
||||
auto up = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &generic_param, ObjInfo{ObjInfo::kRegression})};
|
||||
up->Configure(args);
|
||||
|
||||
RegTree tree;
|
||||
up->Update(&gpair_, dmat_.get(), {&tree});
|
||||
|
||||
auto n_nodes = tree.NumExtraNodes();
|
||||
return n_nodes;
|
||||
}
|
||||
|
||||
public:
|
||||
void RunTest(std::string updater) {
|
||||
{
|
||||
int32_t n_nodes = Update(updater, 0.01);
|
||||
// This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
|
||||
// when writing this test, and only used for testing larger gamma (below) does prevent
|
||||
// building tree.
|
||||
ASSERT_EQ(n_nodes, 2);
|
||||
}
|
||||
{
|
||||
int32_t n_nodes = Update(updater, 100.0);
|
||||
// No new nodes with gamma == 100.
|
||||
ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* Exact tree method requires a pruner as an additional updater, so not tested here. */
|
||||
|
||||
TEST_F(TestMinSplitLoss, Approx) { this->RunTest("grow_histmaker"); }
|
||||
|
||||
TEST_F(TestMinSplitLoss, Hist) { this->RunTest("grow_quantile_histmaker"); }
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST_F(TestMinSplitLoss, GpuHist) { this->RunTest("grow_gpu_hist"); }
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user