Simplify inplace-predict. (#7910)

Pass the `X` as part of Proxy DMatrix instead of an independent `dmlc::any`.
2022-05-18 17:52:00 +08:00
parent 19775ffe15
commit 765097d514
17 changed files with 317 additions and 297 deletions
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -795,88 +795,75 @@ class Dart : public GBTree {
    this->PredictBatchImpl(p_fmat, p_out_preds, training, layer_begin, layer_end);
  }

-  void InplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
-                      float missing, PredictionCacheEntry *out_preds,
-                      uint32_t layer_begin, unsigned layer_end) const override {
+  void InplacePredict(std::shared_ptr<DMatrix> p_fmat, float missing,
+                      PredictionCacheEntry* p_out_preds, uint32_t layer_begin,
+                      unsigned layer_end) const override {
    uint32_t tree_begin, tree_end;
    std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
-    std::vector<Predictor const *> predictors{
+    auto n_groups = model_.learner_model_param->num_output_group;
+
+    std::vector<Predictor const*> predictors {
      cpu_predictor_.get(),
 #if defined(XGBOOST_USE_CUDA)
      gpu_predictor_.get()
 #endif  // defined(XGBOOST_USE_CUDA)
    };
-    Predictor const * predictor {nullptr};
-
-    MetaInfo info;
+    Predictor const* predictor{nullptr};
    StringView msg{"Unsupported data type for inplace predict."};
-    int32_t device = GenericParameter::kCpuId;
+
    PredictionCacheEntry predts;
-    // Inplace predict is not used for training, so no need to drop tree.
-    for (size_t i = tree_begin; i < tree_end; ++i) {
+    if (ctx_->gpu_id != Context::kCpuId) {
+      predts.predictions.SetDevice(ctx_->gpu_id);
+    }
+    predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0);
+
+    auto predict_impl = [&](size_t i) {
+      predts.predictions.Fill(0);
      if (tparam_.predictor == PredictorType::kAuto) {
        // Try both predictor implementations
        bool success = false;
-        for (auto const &p : predictors) {
-          if (p && p->InplacePredict(x, nullptr, model_, missing, &predts, i,
-                                     i + 1)) {
+        for (auto const& p : predictors) {
+          if (p && p->InplacePredict(p_fmat, model_, missing, &predts, i, i + 1)) {
            success = true;
            predictor = p;
-#if defined(XGBOOST_USE_CUDA)
-            device = predts.predictions.DeviceIdx();
-#endif  // defined(XGBOOST_USE_CUDA)
            break;
          }
        }
        CHECK(success) << msg;
      } else {
-        // No base margin from meta info for each tree
        predictor = this->GetPredictor().get();
-        bool success = predictor->InplacePredict(x, nullptr, model_, missing,
-                                                 &predts, i, i + 1);
-        device = predts.predictions.DeviceIdx();
+        bool success = predictor->InplacePredict(p_fmat, model_, missing, &predts, i, i + 1);
        CHECK(success) << msg << std::endl
                       << "Current Predictor: "
-                       << (tparam_.predictor == PredictorType::kCPUPredictor
-                               ? "cpu_predictor"
-                               : "gpu_predictor");
+                       << (tparam_.predictor == PredictorType::kCPUPredictor ? "cpu_predictor"
+                                                                             : "gpu_predictor");
      }
+    };

-      auto w = this->weight_drop_.at(i);
-      size_t n_groups = model_.learner_model_param->num_output_group;
-      auto n_rows = predts.predictions.Size() / n_groups;
-
+    // Inplace predict is not used for training, so no need to drop tree.
+    for (size_t i = tree_begin; i < tree_end; ++i) {
+      predict_impl(i);
      if (i == tree_begin) {
-        // base margin is added here.
-        if (p_m) {
-          p_m->Info().num_row_ = n_rows;
-          predictor->InitOutPredictions(p_m->Info(), &out_preds->predictions,
-                                        model_);
-        } else {
-          info.num_row_ = n_rows;
-          predictor->InitOutPredictions(info, &out_preds->predictions, model_);
-        }
+        predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions, model_);
      }
-
      // Multiple the tree weight
-      CHECK_EQ(predts.predictions.Size(), out_preds->predictions.Size());
+      auto w = this->weight_drop_.at(i);
      auto group = model_.tree_info.at(i);
+      CHECK_EQ(predts.predictions.Size(), p_out_preds->predictions.Size());

-      if (device == GenericParameter::kCpuId) {
-        auto &h_predts = predts.predictions.HostVector();
-        auto &h_out_predts = out_preds->predictions.HostVector();
+      size_t n_rows = p_fmat->Info().num_row_;
+      if (predts.predictions.DeviceIdx() != Context::kCpuId) {
+        p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
+        GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
+                                 predts.predictions.DeviceSpan(), w, n_rows,
+                                 model_.learner_model_param->base_score, n_groups, group);
+      } else {
+        auto& h_predts = predts.predictions.HostVector();
+        auto& h_out_predts = p_out_preds->predictions.HostVector();
        common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
          const size_t offset = ridx * n_groups + group;
-          // Need to remove the base margin from individual tree.
          h_out_predts[offset] += (h_predts[offset] - model_.learner_model_param->base_score) * w;
        });
-      } else {
-        out_preds->predictions.SetDevice(device);
-        predts.predictions.SetDevice(device);
-        GPUDartInplacePredictInc(out_preds->predictions.DeviceSpan(),
-                                 predts.predictions.DeviceSpan(), w, n_rows,
-                                 model_.learner_model_param->base_score,
-                                 n_groups, group);
      }
    }
  }
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@@ -261,8 +261,7 @@ class GBTree : public GradientBooster {
  void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *out_preds,
                    bool training, unsigned layer_begin, unsigned layer_end) override;

-  void InplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
-                      float missing, PredictionCacheEntry *out_preds,
+  void InplacePredict(std::shared_ptr<DMatrix> p_m, float missing, PredictionCacheEntry* out_preds,
                      uint32_t layer_begin, unsigned layer_end) const override {
    CHECK(configured_);
    uint32_t tree_begin, tree_end;
@@ -278,15 +277,14 @@ class GBTree : public GradientBooster {
    if (tparam_.predictor == PredictorType::kAuto) {
      // Try both predictor implementations
      for (auto const &p : predictors) {
-        if (p && p->InplacePredict(x, p_m, model_, missing, out_preds,
-                                   tree_begin, tree_end)) {
+        if (p && p->InplacePredict(p_m, model_, missing, out_preds, tree_begin, tree_end)) {
          return;
        }
      }
      LOG(FATAL) << msg;
    } else {
-      bool success = this->GetPredictor()->InplacePredict(
-          x, p_m, model_, missing, out_preds, tree_begin, tree_end);
+      bool success = this->GetPredictor()->InplacePredict(p_m, model_, missing, out_preds,
+                                                          tree_begin, tree_end);
      CHECK(success) << msg << std::endl
                     << "Current Predictor: "
                     << (tparam_.predictor == PredictorType::kCPUPredictor