Rewrite approx (#7214)

This PR rewrites the approx tree method to use codebase from hist for better performance and code sharing. The rewrite has many benefits: - Support for both `max_leaves` and `max_depth`. - Support for `grow_policy`. - Support for mono constraint. - Support for feature weights. - Support for easier bin configuration (`max_bin`). - Support for categorical data. - Faster performance for most of the datasets. (many times faster) - Support for prediction cache. - Significantly better performance for external memory. - Unites the code base between approx and hist.
2022-01-10 21:15:05 +08:00
parent ed95e77752
commit 001503186c
22 changed files with 635 additions and 264 deletions
--- a/tests/cpp/tree/test_approx.cc
+++ b/tests/cpp/tree/test_approx.cc
@@ -72,5 +72,58 @@ TEST(Approx, Partitioner) {
    }
  }
 }
+
+TEST(Approx, PredictionCache) {
+  size_t n_samples = 2048, n_features = 13;
+  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
+
+  {
+    omp_set_num_threads(1);
+    GenericParameter ctx;
+    ctx.InitAllowUnknown(Args{{"nthread", "8"}});
+    std::unique_ptr<TreeUpdater> approx{
+        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+    RegTree tree;
+    std::vector<RegTree *> trees{&tree};
+    auto gpair = GenerateRandomGradients(n_samples);
+    approx->Configure(Args{{"max_bin", "64"}});
+    approx->Update(&gpair, Xy.get(), trees);
+    HostDeviceVector<float> out_prediction_cached;
+    out_prediction_cached.Resize(n_samples);
+    auto cache = linalg::VectorView<float>{
+        out_prediction_cached.HostSpan(), {out_prediction_cached.Size()}, GenericParameter::kCpuId};
+    ASSERT_TRUE(approx->UpdatePredictionCache(Xy.get(), cache));
+  }
+
+  std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+  learner->SetParam("tree_method", "approx");
+  learner->SetParam("nthread", "0");
+  learner->Configure();
+
+  for (size_t i = 0; i < 8; ++i) {
+    learner->UpdateOneIter(i, Xy);
+  }
+
+  HostDeviceVector<float> out_prediction_cached;
+  learner->Predict(Xy, false, &out_prediction_cached, 0, 0);
+
+  Json model{Object()};
+  learner->SaveModel(&model);
+
+  HostDeviceVector<float> out_prediction;
+  {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+    learner->LoadModel(model);
+    learner->Predict(Xy, false, &out_prediction, 0, 0);
+  }
+
+  auto const h_predt_cached = out_prediction_cached.ConstHostSpan();
+  auto const h_predt = out_prediction.ConstHostSpan();
+
+  ASSERT_EQ(h_predt.size(), h_predt_cached.size());
+  for (size_t i = 0; i < h_predt.size(); ++i) {
+    ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);
+  }
+}
 }  // namespace tree
 }  // namespace xgboost
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -315,57 +315,6 @@ TEST(GpuHist, TestHistogramIndex) {
  TestHistogramIndexImpl();
 }

-// gamma is an alias of min_split_loss
-int32_t TestMinSplitLoss(DMatrix* dmat, float gamma, HostDeviceVector<GradientPair>* gpair) {
-  Args args {
-    {"max_depth", "1"},
-    {"max_leaves", "0"},
-
-    // Disable all other parameters.
-    {"colsample_bynode", "1"},
-    {"colsample_bylevel", "1"},
-    {"colsample_bytree", "1"},
-    {"min_child_weight", "0.01"},
-    {"reg_alpha", "0"},
-    {"reg_lambda", "0"},
-    {"max_delta_step", "0"},
-
-    // test gamma
-    {"gamma", std::to_string(gamma)}
-  };
-
-  tree::GPUHistMakerSpecialised<GradientPairPrecise> hist_maker{ObjInfo{ObjInfo::kRegression}};
-  GenericParameter generic_param(CreateEmptyGenericParam(0));
-  hist_maker.Configure(args, &generic_param);
-
-  RegTree tree;
-  hist_maker.Update(gpair, dmat, {&tree});
-
-  auto n_nodes = tree.NumExtraNodes();
-  return n_nodes;
-}
-
-TEST(GpuHist, MinSplitLoss) {
-  constexpr size_t kRows = 32;
-  constexpr size_t kCols = 16;
-  constexpr float kSparsity = 0.6;
-  auto dmat = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatrix();
-  auto gpair = GenerateRandomGradients(kRows);
-
-  {
-    int32_t n_nodes = TestMinSplitLoss(dmat.get(), 0.01, &gpair);
-    // This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
-    // when writing this test, and only used for testing larger gamma (below) does prevent
-    // building tree.
-    ASSERT_EQ(n_nodes, 2);
-  }
-  {
-    int32_t n_nodes = TestMinSplitLoss(dmat.get(), 100.0, &gpair);
-    // No new nodes with gamma == 100.
-    ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
-  }
-}
-
 void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
                size_t gpu_page_size, RegTree* tree,
                HostDeviceVector<bst_float>* preds, float subsample = 1.0f,
--- a/tests/cpp/tree/test_tree_policy.cc
+++ b/tests/cpp/tree/test_tree_policy.cc
@@ -61,7 +61,7 @@ class TestGrowPolicy : public ::testing::Test {
  }
 };

-TEST_F(TestGrowPolicy, DISABLED_Approx) {
+TEST_F(TestGrowPolicy, Approx) {
  this->TestTreeGrowPolicy("approx", "depthwise");
  this->TestTreeGrowPolicy("approx", "lossguide");
 }
--- a/tests/cpp/tree/test_tree_stat.cc
+++ b/tests/cpp/tree/test_tree_stat.cc
@@ -114,4 +114,70 @@ TEST_F(UpdaterEtaTest, Approx) { this->RunTest("grow_histmaker"); }
 #if defined(XGBOOST_USE_CUDA)
 TEST_F(UpdaterEtaTest, GpuHist) { this->RunTest("grow_gpu_hist"); }
 #endif  // defined(XGBOOST_USE_CUDA)
+
+class TestMinSplitLoss : public ::testing::Test {
+  std::shared_ptr<DMatrix> dmat_;
+  HostDeviceVector<GradientPair> gpair_;
+
+  void SetUp() override {
+    constexpr size_t kRows = 32;
+    constexpr size_t kCols = 16;
+    constexpr float kSparsity = 0.6;
+    dmat_ = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatrix();
+    gpair_ = GenerateRandomGradients(kRows);
+  }
+
+  int32_t Update(std::string updater, float gamma) {
+    Args args{{"max_depth", "1"},
+              {"max_leaves", "0"},
+
+              // Disable all other parameters.
+              {"colsample_bynode", "1"},
+              {"colsample_bylevel", "1"},
+              {"colsample_bytree", "1"},
+              {"min_child_weight", "0.01"},
+              {"reg_alpha", "0"},
+              {"reg_lambda", "0"},
+              {"max_delta_step", "0"},
+
+              // test gamma
+              {"gamma", std::to_string(gamma)}};
+
+    GenericParameter generic_param(CreateEmptyGenericParam(0));
+    auto up = std::unique_ptr<TreeUpdater>{
+        TreeUpdater::Create(updater, &generic_param, ObjInfo{ObjInfo::kRegression})};
+    up->Configure(args);
+
+    RegTree tree;
+    up->Update(&gpair_, dmat_.get(), {&tree});
+
+    auto n_nodes = tree.NumExtraNodes();
+    return n_nodes;
+  }
+
+ public:
+  void RunTest(std::string updater) {
+    {
+      int32_t n_nodes = Update(updater, 0.01);
+      // This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
+      // when writing this test, and only used for testing larger gamma (below) does prevent
+      // building tree.
+      ASSERT_EQ(n_nodes, 2);
+    }
+    {
+      int32_t n_nodes = Update(updater, 100.0);
+      // No new nodes with gamma == 100.
+      ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
+    }
+  }
+};
+
+/* Exact tree method requires a pruner as an additional updater, so not tested here. */
+
+TEST_F(TestMinSplitLoss, Approx) { this->RunTest("grow_histmaker"); }
+
+TEST_F(TestMinSplitLoss, Hist) { this->RunTest("grow_quantile_histmaker"); }
+#if defined(XGBOOST_USE_CUDA)
+TEST_F(TestMinSplitLoss, GpuHist) { this->RunTest("grow_gpu_hist"); }
+#endif  // defined(XGBOOST_USE_CUDA)
 }  // namespace xgboost