Remove update prediction cache from predictors. (#5312)

Move this function into gbtree, and uses only updater for doing so. As now the predictor knows exactly how many trees to predict, there's no need for it to update the prediction cache.
2020-02-17 11:35:47 +08:00
parent e433a379e4
commit 0110754a76
16 changed files with 118 additions and 164 deletions
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -296,8 +296,15 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
    num_new_trees += new_trees[gid].size();
    model_.CommitModel(std::move(new_trees[gid]), gid);
  }
-  CHECK(configured_);
-  GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees, m, predts);
+  auto* out = &predts->predictions;
+  if (model_.learner_model_param_->num_output_group == 1 &&
+      updaters_.size() > 0 &&
+      num_new_trees == 1 &&
+      out->Size() > 0 &&
+      updaters_.back()->UpdatePredictionCache(m, out)) {
+    auto delta = num_new_trees / model_.learner_model_param_->num_output_group;
+    predts->Update(delta);
+  }
  monitor_.Stop("CommitModel");
 }

@@ -357,6 +364,76 @@ void GBTree::SaveModel(Json* p_out) const {
  model_.SaveModel(&model);
 }

+void GBTree::PredictBatch(DMatrix* p_fmat,
+                          PredictionCacheEntry* out_preds,
+                          bool training,
+                          unsigned ntree_limit) {
+  CHECK(configured_);
+  GetPredictor(&out_preds->predictions, p_fmat)
+      ->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
+}
+
+std::unique_ptr<Predictor> const &
+GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
+                     DMatrix *f_dmat) const {
+  CHECK(configured_);
+  if (tparam_.predictor != PredictorType::kAuto) {
+    if (tparam_.predictor == PredictorType::kGPUPredictor) {
+#if defined(XGBOOST_USE_CUDA)
+      CHECK(gpu_predictor_);
+      return gpu_predictor_;
+#else
+      this->AssertGPUSupport();
+#endif  // defined(XGBOOST_USE_CUDA)
+    }
+    CHECK(cpu_predictor_);
+    return cpu_predictor_;
+  }
+
+  auto on_device =
+      f_dmat &&
+      (*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
+
+  // Use GPU Predictor if data is already on device.
+  if (on_device) {
+#if defined(XGBOOST_USE_CUDA)
+    CHECK(gpu_predictor_);
+    return gpu_predictor_;
+#else
+    LOG(FATAL) << "Data is on CUDA device, but XGBoost is not compiled with "
+                  "CUDA support.";
+    return cpu_predictor_;
+#endif  // defined(XGBOOST_USE_CUDA)
+  }
+
+  // GPU_Hist by default has prediction cache calculated from quantile values,
+  // so GPU Predictor is not used for training dataset.  But when XGBoost
+  // performs continue training with an existing model, the prediction cache is
+  // not availbale and number of trees doesn't equal zero, the whole training
+  // dataset got copied into GPU for precise prediction.  This condition tries
+  // to avoid such copy by calling CPU Predictor instead.
+  if ((out_pred && out_pred->Size() == 0) && (model_.param.num_trees != 0) &&
+      // FIXME(trivialfis): Implement a better method for testing whether data
+      // is on device after DMatrix refactoring is done.
+      !on_device) {
+    CHECK(cpu_predictor_);
+    return cpu_predictor_;
+  }
+
+  if (tparam_.tree_method == TreeMethod::kGPUHist) {
+#if defined(XGBOOST_USE_CUDA)
+    CHECK(gpu_predictor_);
+    return gpu_predictor_;
+#else
+    this->AssertGPUSupport();
+    return cpu_predictor_;
+#endif  // defined(XGBOOST_USE_CUDA)
+  }
+
+  CHECK(cpu_predictor_);
+  return cpu_predictor_;
+}
+
 class Dart : public GBTree {
 public:
  explicit Dart(LearnerModelParam const* booster_config) :
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@@ -201,11 +201,7 @@ class GBTree : public GradientBooster {
  void PredictBatch(DMatrix* p_fmat,
                    PredictionCacheEntry* out_preds,
                    bool training,
-                    unsigned ntree_limit) override {
-    CHECK(configured_);
-    GetPredictor(&out_preds->predictions, p_fmat)->PredictBatch(
-        p_fmat, out_preds, model_, 0, ntree_limit);
-  }
+                    unsigned ntree_limit) override;

  void PredictInstance(const SparsePage::Inst& inst,
                       std::vector<bst_float>* out_preds,
@@ -256,62 +252,7 @@ class GBTree : public GradientBooster {
                     std::vector<std::unique_ptr<RegTree> >* ret);

  std::unique_ptr<Predictor> const& GetPredictor(HostDeviceVector<float> const* out_pred = nullptr,
-                                                 DMatrix* f_dmat = nullptr) const {
-    CHECK(configured_);
-    if (tparam_.predictor != PredictorType::kAuto) {
-      if (tparam_.predictor == PredictorType::kGPUPredictor) {
-#if defined(XGBOOST_USE_CUDA)
-        CHECK(gpu_predictor_);
-        return gpu_predictor_;
-#else
-        this->AssertGPUSupport();
-#endif  // defined(XGBOOST_USE_CUDA)
-      }
-      CHECK(cpu_predictor_);
-      return cpu_predictor_;
-    }
-
-    auto on_device = f_dmat && (*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
-
-    // Use GPU Predictor if data is already on device.
-    if (on_device) {
-#if defined(XGBOOST_USE_CUDA)
-      CHECK(gpu_predictor_);
-      return gpu_predictor_;
-#else
-      LOG(FATAL) << "Data is on CUDA device, but XGBoost is not compiled with CUDA support.";
-      return cpu_predictor_;
-#endif  // defined(XGBOOST_USE_CUDA)
-    }
-
-    // GPU_Hist by default has prediction cache calculated from quantile values, so GPU
-    // Predictor is not used for training dataset.  But when XGBoost performs continue
-    // training with an existing model, the prediction cache is not availbale and number
-    // of trees doesn't equal zero, the whole training dataset got copied into GPU for
-    // precise prediction.  This condition tries to avoid such copy by calling CPU
-    // Predictor instead.
-    if ((out_pred && out_pred->Size() == 0) &&
-        (model_.param.num_trees != 0) &&
-        // FIXME(trivialfis): Implement a better method for testing whether data is on
-        // device after DMatrix refactoring is done.
-        !on_device) {
-      CHECK(cpu_predictor_);
-      return cpu_predictor_;
-    }
-
-    if (tparam_.tree_method == TreeMethod::kGPUHist) {
-#if defined(XGBOOST_USE_CUDA)
-      CHECK(gpu_predictor_);
-      return gpu_predictor_;
-#else
-      this->AssertGPUSupport();
-      return cpu_predictor_;
-#endif  // defined(XGBOOST_USE_CUDA)
-    }
-
-    CHECK(cpu_predictor_);
-    return cpu_predictor_;
-  }
+                                                 DMatrix* f_dmat = nullptr) const;

  // commit new trees all at once
  virtual void CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees,