Move prediction cache to Learner. (#5220)

* Move prediction cache into Learner. * Clean-ups - Remove duplicated cache in Learner and GBM. - Remove ad-hoc fix of invalid cache. - Remove `PredictFromCache` in predictors. - Remove prediction cache for linear altogether, as it's only moving the prediction into training process but doesn't provide any actual overall speed gain. - The cache is now unique to Learner, which means the ownership is no longer shared by any other components. * Changes - Add version to prediction cache. - Use weak ptr to check expired DMatrix. - Pass shared pointer instead of raw pointer.
2020-02-14 13:04:23 +08:00
parent 24ad9dec0b
commit c35cdecddd
19 changed files with 457 additions and 372 deletions
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -1,6 +1,5 @@
-
 /*!
- * Copyright 2017-2019 XGBoost contributors
+ * Copyright 2017-2020 XGBoost contributors
 */
 #include <dmlc/filesystem.h>
 #include <xgboost/c_api.h>
@@ -19,12 +18,11 @@ namespace predictor {
 TEST(GpuPredictor, Basic) {
  auto cpu_lparam = CreateEmptyGenericParam(-1);
  auto gpu_lparam = CreateEmptyGenericParam(0);
-  auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();

  std::unique_ptr<Predictor> gpu_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam, cache));
+      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam));
  std::unique_ptr<Predictor> cpu_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam, cache));
+      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam));

  gpu_predictor->Configure({});
  cpu_predictor->Configure({});
@@ -41,16 +39,17 @@ TEST(GpuPredictor, Basic) {
    gbm::GBTreeModel model = CreateTestModel(&param);

    // Test predict batch
-    HostDeviceVector<float> gpu_out_predictions;
-    HostDeviceVector<float> cpu_out_predictions;
+    PredictionCacheEntry gpu_out_predictions;
+    PredictionCacheEntry cpu_out_predictions;

    gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0);
+    ASSERT_EQ(model.trees.size(), gpu_out_predictions.version);
    cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0);

-    std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
-    std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
+    std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.predictions.HostVector();
+    std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.predictions.HostVector();
    float abs_tolerance = 0.001;
-    for (int j = 0; j < gpu_out_predictions.Size(); j++) {
+    for (int j = 0; j < gpu_out_predictions.predictions.Size(); j++) {
      ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);
    }
    delete dmat;
@@ -59,9 +58,8 @@ TEST(GpuPredictor, Basic) {

 TEST(gpu_predictor, ExternalMemoryTest) {
  auto lparam = CreateEmptyGenericParam(0);
-  auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
  std::unique_ptr<Predictor> gpu_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam, cache));
+      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
  gpu_predictor->Configure({});

  LearnerModelParam param;
@@ -70,7 +68,7 @@ TEST(gpu_predictor, ExternalMemoryTest) {
  param.num_output_group = n_classes;
  param.base_score = 0.5;

-  gbm::GBTreeModel model = CreateTestModel(&param);
+  gbm::GBTreeModel model = CreateTestModel(&param, n_classes);
  std::vector<std::unique_ptr<DMatrix>> dmats;
  dmlc::TemporaryDirectory tmpdir;
  std::string file0 = tmpdir.path + "/big_0.libsvm";
@@ -82,10 +80,10 @@ TEST(gpu_predictor, ExternalMemoryTest) {

  for (const auto& dmat: dmats) {
    dmat->Info().base_margin_.Resize(dmat->Info().num_row_ * n_classes, 0.5);
-    HostDeviceVector<float> out_predictions;
+    PredictionCacheEntry out_predictions;
    gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
-    EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_ * n_classes);
-    const std::vector<float> &host_vector = out_predictions.ConstHostVector();
+    EXPECT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_ * n_classes);
+    const std::vector<float> &host_vector = out_predictions.predictions.ConstHostVector();
    for (int i = 0; i < host_vector.size() / n_classes; i++) {
      ASSERT_EQ(host_vector[i * n_classes], 2.0);
      ASSERT_EQ(host_vector[i * n_classes + 1], 0.5);