Move prediction cache to Learner. (#5220)

* Move prediction cache into Learner. * Clean-ups - Remove duplicated cache in Learner and GBM. - Remove ad-hoc fix of invalid cache. - Remove `PredictFromCache` in predictors. - Remove prediction cache for linear altogether, as it's only moving the prediction into training process but doesn't provide any actual overall speed gain. - The cache is now unique to Learner, which means the ownership is no longer shared by any other components. * Changes - Add version to prediction cache. - Use weak ptr to check expired DMatrix. - Pass shared pointer instead of raw pointer.
2020-02-14 13:04:23 +08:00
parent 24ad9dec0b
commit c35cdecddd
19 changed files with 457 additions and 372 deletions
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -15,6 +15,7 @@

 #include "xgboost/gbm.h"
 #include "xgboost/json.h"
+#include "xgboost/predictor.h"
 #include "xgboost/linear_updater.h"
 #include "xgboost/logging.h"
 #include "xgboost/learner.h"
@@ -50,21 +51,14 @@ struct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {
 */
 class GBLinear : public GradientBooster {
 public:
-  explicit GBLinear(const std::vector<std::shared_ptr<DMatrix> > &cache,
-                    LearnerModelParam const* learner_model_param)
+  explicit GBLinear(LearnerModelParam const* learner_model_param)
      : learner_model_param_{learner_model_param},
        model_{learner_model_param_},
        previous_model_{learner_model_param_},
        sum_instance_weight_(0),
        sum_weight_complete_(false),
-        is_converged_(false) {
-    // Add matrices to the prediction cache
-    for (auto &d : cache) {
-      PredictionCacheEntry e;
-      e.data = d;
-      cache_[d.get()] = std::move(e);
-    }
-  }
+        is_converged_(false) {}
+
  void Configure(const Args& cfg) override {
    if (model_.weight.size() == 0) {
      model_.Configure(cfg);
@@ -118,7 +112,7 @@ class GBLinear : public GradientBooster {

  void DoBoost(DMatrix *p_fmat,
               HostDeviceVector<GradientPair> *in_gpair,
-               ObjFunction* obj) override {
+               PredictionCacheEntry* predt) override {
    monitor_.Start("DoBoost");

    model_.LazyInitModel();
@@ -127,28 +121,19 @@ class GBLinear : public GradientBooster {
    if (!this->CheckConvergence()) {
      updater_->Update(in_gpair, p_fmat, &model_, sum_instance_weight_);
    }
-    this->UpdatePredictionCache();

    monitor_.Stop("DoBoost");
  }

  void PredictBatch(DMatrix *p_fmat,
-                    HostDeviceVector<bst_float> *out_preds,
+                    PredictionCacheEntry *predts,
                    bool training,
                    unsigned ntree_limit) override {
    monitor_.Start("PredictBatch");
+    auto* out_preds = &predts->predictions;
    CHECK_EQ(ntree_limit, 0U)
        << "GBLinear::Predict ntrees is only valid for gbtree predictor";
-
-    // Try to predict from cache
-    auto it = cache_.find(p_fmat);
-    if (it != cache_.end() && it->second.predictions.size() != 0) {
-      std::vector<bst_float> &y = it->second.predictions;
-      out_preds->Resize(y.size());
-      std::copy(y.begin(), y.end(), out_preds->HostVector().begin());
-    } else {
-      this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
-    }
+    this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
    monitor_.Stop("PredictBatch");
  }
  // add base margin
@@ -258,7 +243,8 @@ class GBLinear : public GradientBooster {
        const size_t ridx = batch.base_rowid + i;
        // loop over output groups
        for (int gid = 0; gid < ngroup; ++gid) {
-          bst_float margin =  (base_margin.size() != 0) ?
+          bst_float margin =
+              (base_margin.size() != 0) ?
              base_margin[ridx * ngroup + gid] : learner_model_param_->base_score;
          this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
        }
@@ -266,17 +252,6 @@ class GBLinear : public GradientBooster {
    }
    monitor_.Stop("PredictBatchInternal");
  }
-  void UpdatePredictionCache() {
-    // update cache entry
-    for (auto &kv : cache_) {
-      PredictionCacheEntry &e = kv.second;
-      if (e.predictions.size() == 0) {
-        size_t n = model_.learner_model_param_->num_output_group * e.data->Info().num_row_;
-        e.predictions.resize(n);
-      }
-      this->PredictBatchInternal(e.data.get(), &e.predictions);
-    }
-  }

  bool CheckConvergence() {
    if (param_.tolerance == 0.0f) return false;
@@ -327,22 +302,6 @@ class GBLinear : public GradientBooster {
  bool sum_weight_complete_;
  common::Monitor monitor_;
  bool is_converged_;
-
-  /**
-   * \struct  PredictionCacheEntry
-   *
-   * \brief Contains pointer to input matrix and associated cached predictions.
-   */
-  struct PredictionCacheEntry {
-    std::shared_ptr<DMatrix> data;
-    std::vector<bst_float> predictions;
-  };
-
-  /**
-   * \brief Map of matrices and associated cached predictions to facilitate
-   * storing and looking up predictions.
-   */
-  std::unordered_map<DMatrix*, PredictionCacheEntry> cache_;
 };

 // register the objective functions
@@ -350,9 +309,8 @@ DMLC_REGISTER_PARAMETER(GBLinearTrainParam);

 XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
    .describe("Linear booster, implement generalized linear model.")
-    .set_body([](const std::vector<std::shared_ptr<DMatrix> > &cache,
-                 LearnerModelParam const* booster_config) {
-      return new GBLinear(cache, booster_config);
+    .set_body([](LearnerModelParam const* booster_config) {
+      return new GBLinear(booster_config);
    });
 }  // namespace gbm
 }  // namespace xgboost
--- a/src/gbm/gblinear_model.h
+++ b/src/gbm/gblinear_model.h
@@ -55,8 +55,9 @@ class GBLinearModel : public Model {
  std::vector<bst_float> weight;
  // initialize the model parameter
  inline void LazyInitModel() {
-    if (!weight.empty())
+    if (!weight.empty()) {
      return;
+    }
    // bias is the last weight
    weight.resize((learner_model_param_->num_feature + 1) *
                  learner_model_param_->num_output_group);
--- a/src/gbm/gbm.cc
+++ b/src/gbm/gbm.cc
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2015 by Contributors
+ * Copyright 2015-2020 by Contributors
 * \file gbm.cc
 * \brief Registry of gradient boosters.
 */
@@ -20,13 +20,12 @@ namespace xgboost {
 GradientBooster* GradientBooster::Create(
    const std::string& name,
    GenericParameter const* generic_param,
-    LearnerModelParam const* learner_model_param,
-    const std::vector<std::shared_ptr<DMatrix> >& cache_mats) {
+    LearnerModelParam const* learner_model_param) {
  auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
  if (e == nullptr) {
    LOG(FATAL) << "Unknown gbm type " << name;
  }
-  auto p_bst =  (e->body)(cache_mats, learner_model_param);
+  auto p_bst =  (e->body)(learner_model_param);
  p_bst->generic_param_ = generic_param;
  return p_bst;
 }
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2014-2019 by Contributors
+ * Copyright 2014-2020 by Contributors
 * \file gbtree.cc
 * \brief gradient boosted tree implementation.
 * \author Tianqi Chen
@@ -14,6 +14,7 @@
 #include <limits>
 #include <algorithm>

+#include "xgboost/data.h"
 #include "xgboost/gbm.h"
 #include "xgboost/logging.h"
 #include "xgboost/json.h"
@@ -47,14 +48,14 @@ void GBTree::Configure(const Args& cfg) {
  // configure predictors
  if (!cpu_predictor_) {
    cpu_predictor_ = std::unique_ptr<Predictor>(
-        Predictor::Create("cpu_predictor", this->generic_param_, cache_));
+        Predictor::Create("cpu_predictor", this->generic_param_));
  }
  cpu_predictor_->Configure(cfg);
 #if defined(XGBOOST_USE_CUDA)
  auto n_gpus = common::AllVisibleGPUs();
  if (!gpu_predictor_ && n_gpus != 0) {
    gpu_predictor_ = std::unique_ptr<Predictor>(
-        Predictor::Create("gpu_predictor", this->generic_param_, cache_));
+        Predictor::Create("gpu_predictor", this->generic_param_));
  }
  if (n_gpus != 0) {
    gpu_predictor_->Configure(cfg);
@@ -183,7 +184,7 @@ void GBTree::ConfigureUpdaters() {

 void GBTree::DoBoost(DMatrix* p_fmat,
                     HostDeviceVector<GradientPair>* in_gpair,
-                     ObjFunction* obj) {
+                     PredictionCacheEntry* predt) {
  std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
  const int ngroup = model_.learner_model_param_->num_output_group;
  ConfigureWithKnownData(this->cfg_, p_fmat);
@@ -195,7 +196,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
    new_trees.push_back(std::move(ret));
  } else {
    CHECK_EQ(in_gpair->Size() % ngroup, 0U)
-        << "must have exactly ngroup*nrow gpairs";
+        << "must have exactly ngroup * nrow gpairs";
    // TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
    HostDeviceVector<GradientPair> tmp(in_gpair->Size() / ngroup,
                                       GradientPair(),
@@ -214,7 +215,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
    }
  }
  monitor_.Stop("BoostNewTrees");
-  this->CommitModel(std::move(new_trees));
+  this->CommitModel(std::move(new_trees), p_fmat, predt);
 }

 void GBTree::InitUpdater(Args const& cfg) {
@@ -286,7 +287,9 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
  }
 }

-void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
+void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees,
+                         DMatrix* m,
+                         PredictionCacheEntry* predts) {
  monitor_.Start("CommitModel");
  int num_new_trees = 0;
  for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
@@ -294,7 +297,7 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
    model_.CommitModel(std::move(new_trees[gid]), gid);
  }
  CHECK(configured_);
-  GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees);
+  GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees, m, predts);
  monitor_.Stop("CommitModel");
 }

@@ -303,13 +306,16 @@ void GBTree::LoadConfig(Json const& in) {
  fromJson(in["gbtree_train_param"], &tparam_);
  int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
  if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
+    LOG(WARNING)
+        << "Loading from a raw memory buffer on CPU only machine.  "
+           "Changing predictor to auto.";
    tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
  }
  if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
    tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
    LOG(WARNING)
        << "Loading from a raw memory buffer on CPU only machine.  "
-           "Change tree_method to hist.";
+           "Changing tree_method to hist.";
  }

  auto const& j_updaters = get<Object const>(in["updater"]);
@@ -415,7 +421,7 @@ class Dart : public GBTree {
  }

  void PredictBatch(DMatrix* p_fmat,
-                    HostDeviceVector<bst_float>* p_out_preds,
+                    PredictionCacheEntry* p_out_preds,
                    bool training,
                    unsigned ntree_limit) override {
    DropTrees(training);
@@ -426,7 +432,7 @@ class Dart : public GBTree {
    }
    size_t n = num_group * p_fmat->Info().num_row_;
    const auto &base_margin = p_fmat->Info().base_margin_.ConstHostVector();
-    auto& out_preds = p_out_preds->HostVector();
+    auto& out_preds = p_out_preds->predictions.HostVector();
    out_preds.resize(n);
    if (base_margin.size() != 0) {
      CHECK_EQ(out_preds.size(), n);
@@ -539,7 +545,9 @@ class Dart : public GBTree {

  // commit new trees all at once
  void
-  CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) override {
+  CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees,
+              DMatrix* m,
+              PredictionCacheEntry* predts) override {
    int num_new_trees = 0;
    for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
      num_new_trees += new_trees[gid].size();
@@ -681,16 +689,13 @@ DMLC_REGISTER_PARAMETER(DartTrainParam);

 XGBOOST_REGISTER_GBM(GBTree, "gbtree")
 .describe("Tree booster, gradient boosted trees.")
-.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
-             LearnerModelParam const* booster_config) {
+.set_body([](LearnerModelParam const* booster_config) {
    auto* p = new GBTree(booster_config);
-    p->InitCache(cached_mats);
    return p;
  });
 XGBOOST_REGISTER_GBM(Dart, "dart")
 .describe("Tree booster, dart.")
-.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
-             LearnerModelParam const* booster_config) {
+.set_body([](LearnerModelParam const* booster_config) {
    GBTree* p = new Dart(booster_config);
    return p;
  });
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@@ -16,6 +16,7 @@
 #include <string>
 #include <unordered_map>

+#include "xgboost/data.h"
 #include "xgboost/logging.h"
 #include "xgboost/gbm.h"
 #include "xgboost/predictor.h"
@@ -151,14 +152,8 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
 // gradient boosted trees
 class GBTree : public GradientBooster {
 public:
-  explicit GBTree(LearnerModelParam const* booster_config) : model_(booster_config) {}
-
-  void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
-    cache_ = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
-    for (std::shared_ptr<DMatrix> const& d : cache) {
-      (*cache_)[d.get()].data = d;
-    }
-  }
+  explicit GBTree(LearnerModelParam const* booster_config) :
+      model_(booster_config) {}

  void Configure(const Args& cfg) override;
  // Revise `tree_method` and `updater` parameters after seeing the training
@@ -171,7 +166,7 @@ class GBTree : public GradientBooster {
  /*! \brief Carry out one iteration of boosting */
  void DoBoost(DMatrix* p_fmat,
               HostDeviceVector<GradientPair>* in_gpair,
-               ObjFunction* obj) override;
+               PredictionCacheEntry* predt) override;

  bool UseGPU() const override {
    return
@@ -204,11 +199,12 @@ class GBTree : public GradientBooster {
  }

  void PredictBatch(DMatrix* p_fmat,
-                    HostDeviceVector<bst_float>* out_preds,
+                    PredictionCacheEntry* out_preds,
                    bool training,
                    unsigned ntree_limit) override {
    CHECK(configured_);
-    GetPredictor(out_preds, p_fmat)->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
+    GetPredictor(&out_preds->predictions, p_fmat)->PredictBatch(
+        p_fmat, out_preds, model_, 0, ntree_limit);
  }

  void PredictInstance(const SparsePage::Inst& inst,
@@ -318,7 +314,9 @@ class GBTree : public GradientBooster {
  }

  // commit new trees all at once
-  virtual void CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees);
+  virtual void CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees,
+                           DMatrix* m,
+                           PredictionCacheEntry* predts);

  // --- data structure ---
  GBTreeModel model_;
@@ -332,11 +330,6 @@ class GBTree : public GradientBooster {
  Args cfg_;
  // the updaters that can be applied to each of tree
  std::vector<std::unique_ptr<TreeUpdater>> updaters_;
-  /**
-   * \brief Map of matrices and associated cached predictions to facilitate
-   * storing and looking up predictions.
-   */
-  std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache_;
  // Predictors
  std::unique_ptr<Predictor> cpu_predictor_;
 #if defined(XGBOOST_USE_CUDA)