Prepare gradient index for Quantile DMatrix. (#8103)

* Prepare gradient index for Quantile DMatrix. - Implement push batch with adapter batch. - Implement `GetFvalue` for prediction.
2022-07-22 17:26:33 +08:00
parent 1be09848a7
commit 4a4e5c7c18
7 changed files with 254 additions and 70 deletions
--- a/tests/cpp/data/test_gradient_index.cc
+++ b/tests/cpp/data/test_gradient_index.cc
@@ -4,6 +4,7 @@
 #include <gtest/gtest.h>
 #include <xgboost/data.h>

+#include "../../../src/common/column_matrix.h"
 #include "../../../src/data/gradient_index.h"
 #include "../helpers.h"

@@ -65,5 +66,46 @@ TEST(GradientIndex, FromCategoricalBasic) {
    ASSERT_EQ(common::AsCat(x[i]), common::AsCat(bin_value));
  }
 }
+
+TEST(GradientIndex, PushBatch) {
+  size_t constexpr kRows = 64, kCols = 4;
+  bst_bin_t max_bins = 64;
+  float st = 0.5;
+
+  auto test = [&](float sparisty) {
+    auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true);
+    auto cuts = common::SketchOnDMatrix(m.get(), max_bins, common::OmpGetNumThreads(0), false, {});
+    common::HistogramCuts copy_cuts = cuts;
+
+    ASSERT_EQ(m->Info().num_row_, kRows);
+    ASSERT_EQ(m->Info().num_col_, kCols);
+    GHistIndexMatrix gmat{m->Info(), std::move(copy_cuts), max_bins};
+
+    for (auto const &page : m->GetBatches<SparsePage>()) {
+      SparsePageAdapterBatch batch{page.GetView()};
+      gmat.PushAdapterBatch(m->Ctx(), 0, 0, batch, std::numeric_limits<float>::quiet_NaN(), {}, st,
+                            m->Info().num_row_);
+      gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits<float>::quiet_NaN(), 0);
+    }
+    for (auto const &page : m->GetBatches<GHistIndexMatrix>(BatchParam{max_bins, st})) {
+      for (size_t i = 0; i < kRows; ++i) {
+        for (size_t j = 0; j < kCols; ++j) {
+          auto v0 = gmat.GetFvalue(i, j, false);
+          auto v1 = page.GetFvalue(i, j, false);
+          if (sparisty == 0.0) {
+            ASSERT_FALSE(std::isnan(v0));
+          }
+          if (!std::isnan(v0)) {
+            ASSERT_EQ(v0, v1);
+          }
+        }
+      }
+    }
+  };
+
+  test(0.0f);
+  test(0.5f);
+  test(0.9f);
+}
 }  // namespace data
 }  // namespace xgboost
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -66,6 +66,14 @@ void TestTrainingPrediction(size_t rows, size_t bins,
      learner->UpdateOneIter(i, p_hist);
    }

+    Json model{Object{}};
+    learner->SaveModel(&model);
+
+    learner.reset(Learner::Create({}));
+    learner->LoadModel(model);
+    learner->SetParam("predictor", predictor);
+    learner->Configure();
+
    HostDeviceVector<float> from_full;
    learner->Predict(p_full, false, &from_full, 0, 0);

--- a/tests/cpp/tree/hist/test_histogram.cc
+++ b/tests/cpp/tree/hist/test_histogram.cc
@@ -419,9 +419,8 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx) {

    auto cut = common::SketchOnDMatrix(m.get(), batch_param.max_bin, common::OmpGetNumThreads(0),
                                       false, hess);
-    GHistIndexMatrix gmat;
-    gmat.Init(concat, {}, cut, batch_param.max_bin, false, std::numeric_limits<double>::quiet_NaN(),
-              common::OmpGetNumThreads(0));
+    GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false,
+                          std::numeric_limits<double>::quiet_NaN(), common::OmpGetNumThreads(0));
    single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair);
    single_page = single_build.Histogram()[0];
  }
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@@ -34,8 +34,7 @@ TEST(QuantileHist, Partitioner) {
  auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());

  for (auto const& page : Xy->GetBatches<SparsePage>()) {
-    GHistIndexMatrix gmat;
-    gmat.Init(page, {}, cuts, 64, true, 0.5, ctx.Threads());
+    GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
    bst_feature_t const split_ind = 0;
    common::ColumnMatrix column_indices;
    column_indices.Init(page, gmat, 0.5, ctx.Threads());