Move prediction cache to Learner. (#5220)

* Move prediction cache into Learner. * Clean-ups - Remove duplicated cache in Learner and GBM. - Remove ad-hoc fix of invalid cache. - Remove `PredictFromCache` in predictors. - Remove prediction cache for linear altogether, as it's only moving the prediction into training process but doesn't provide any actual overall speed gain. - The cache is now unique to Learner, which means the ownership is no longer shared by any other components. * Changes - Add version to prediction cache. - Use weak ptr to check expired DMatrix. - Pass shared pointer instead of raw pointer.
2020-02-14 13:04:23 +08:00
parent 24ad9dec0b
commit c35cdecddd
19 changed files with 457 additions and 372 deletions
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -10,6 +10,7 @@
 #include <algorithm>
 #include <iomanip>
 #include <limits>
+#include <memory>
 #include <sstream>
 #include <string>
 #include <stack>
@@ -17,6 +18,8 @@
 #include <vector>

 #include "xgboost/base.h"
+#include "xgboost/data.h"
+#include "xgboost/predictor.h"
 #include "xgboost/feature_map.h"
 #include "xgboost/gbm.h"
 #include "xgboost/generic_parameters.h"
@@ -195,9 +198,12 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
 */
 class LearnerImpl : public Learner {
 public:
-  explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> >  cache)
-      : need_configuration_{true}, cache_(std::move(cache)) {
+  explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> > cache)
+      : need_configuration_{true} {
    monitor_.Init("Learner");
+    for (std::shared_ptr<DMatrix> const& d : cache) {
+      cache_.Cache(d, GenericParameter::kCpuId);
+    }
  }
  // Configuration before data is known.
  void Configure() override {
@@ -358,8 +364,7 @@ class LearnerImpl : public Learner {
    name = get<String>(gradient_booster["name"]);
    tparam_.UpdateAllowUnknown(Args{{"booster", name}});
    gbm_.reset(GradientBooster::Create(tparam_.booster,
-                                       &generic_parameters_, &learner_model_param_,
-                                       cache_));
+                                       &generic_parameters_, &learner_model_param_));
    gbm_->LoadModel(gradient_booster);

    auto const& j_attributes = get<Object const>(learner.at("attributes"));
@@ -413,8 +418,7 @@ class LearnerImpl : public Learner {
    tparam_.booster = get<String>(gradient_booster["name"]);
    if (!gbm_) {
      gbm_.reset(GradientBooster::Create(tparam_.booster,
-                                         &generic_parameters_, &learner_model_param_,
-                                         cache_));
+                                         &generic_parameters_, &learner_model_param_));
    }
    gbm_->LoadConfig(gradient_booster);

@@ -500,7 +504,7 @@ class LearnerImpl : public Learner {

    obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
    gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
-                                       &learner_model_param_, cache_));
+                                       &learner_model_param_));
    gbm_->Load(fi);
    if (mparam_.contain_extra_attrs != 0) {
      std::vector<std::pair<std::string, std::string> > attr;
@@ -726,17 +730,18 @@ class LearnerImpl : public Learner {
    this->CheckDataSplitMode();
    this->ValidateDMatrix(train.get());

+    auto& predt = this->cache_.Cache(train, generic_parameters_.gpu_id);
+
    monitor_.Start("PredictRaw");
-    this->PredictRaw(train.get(), &preds_[train.get()], true);
+    this->PredictRaw(train.get(), &predt, true);
    monitor_.Stop("PredictRaw");
-    TrainingObserver::Instance().Observe(preds_[train.get()], "Predictions");

    monitor_.Start("GetGradient");
-    obj_->GetGradient(preds_[train.get()], train->Info(), iter, &gpair_);
+    obj_->GetGradient(predt.predictions, train->Info(), iter, &gpair_);
    monitor_.Stop("GetGradient");
    TrainingObserver::Instance().Observe(gpair_, "Gradients");

-    gbm_->DoBoost(train.get(), &gpair_, obj_.get());
+    gbm_->DoBoost(train.get(), &gpair_, &predt);
    monitor_.Stop("UpdateOneIter");
  }

@@ -749,12 +754,14 @@ class LearnerImpl : public Learner {
    }
    this->CheckDataSplitMode();
    this->ValidateDMatrix(train.get());
+    this->cache_.Cache(train, generic_parameters_.gpu_id);

-    gbm_->DoBoost(train.get(), in_gpair);
+    gbm_->DoBoost(train.get(), in_gpair, &cache_.Entry(train.get()));
    monitor_.Stop("BoostOneIter");
  }

-  std::string EvalOneIter(int iter, const std::vector<std::shared_ptr<DMatrix>>& data_sets,
+  std::string EvalOneIter(int iter,
+                          const std::vector<std::shared_ptr<DMatrix>>& data_sets,
                          const std::vector<std::string>& data_names) override {
    monitor_.Start("EvalOneIter");
    this->Configure();
@@ -766,14 +773,19 @@ class LearnerImpl : public Learner {
      metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
    }
    for (size_t i = 0; i < data_sets.size(); ++i) {
-      DMatrix * dmat = data_sets[i].get();
-      this->ValidateDMatrix(dmat);
-      this->PredictRaw(dmat, &preds_[dmat], false);
-      obj_->EvalTransform(&preds_[dmat]);
+      std::shared_ptr<DMatrix> m = data_sets[i];
+      auto &predt = this->cache_.Cache(m, generic_parameters_.gpu_id);
+      this->ValidateDMatrix(m.get());
+      this->PredictRaw(m.get(), &predt, false);
+
+      auto &out = output_predictions_.Cache(m, generic_parameters_.gpu_id).predictions;
+      out.Resize(predt.predictions.Size());
+      out.Copy(predt.predictions);
+
+      obj_->EvalTransform(&out);
      for (auto& ev : metrics_) {
        os << '\t' << data_names[i] << '-' << ev->Name() << ':'
-           << ev->Eval(preds_[dmat], data_sets[i]->Info(),
-                       tparam_.dsplit == DataSplitMode::kRow);
+           << ev->Eval(out, m->Info(), tparam_.dsplit == DataSplitMode::kRow);
      }
    }

@@ -848,7 +860,12 @@ class LearnerImpl : public Learner {
    } else if (pred_leaf) {
      gbm_->PredictLeaf(data.get(), &out_preds->HostVector(), ntree_limit);
    } else {
-      this->PredictRaw(data.get(), out_preds, training, ntree_limit);
+      auto& prediction = cache_.Cache(data, generic_parameters_.gpu_id);
+      this->PredictRaw(data.get(), &prediction, training, ntree_limit);
+      // Copy the prediction cache to output prediction. out_preds comes from C API
+      out_preds->SetDevice(generic_parameters_.gpu_id);
+      out_preds->Resize(prediction.predictions.Size());
+      out_preds->Copy(prediction.predictions);
      if (!output_margin) {
        obj_->PredTransform(out_preds);
      }
@@ -868,11 +885,10 @@ class LearnerImpl : public Learner {
   *   predictor, when it equals 0, this means we are using all the trees
   * \param training allow dropout when the DART booster is being used
   */
-  void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
+  void PredictRaw(DMatrix* data, PredictionCacheEntry* out_preds,
                  bool training,
                  unsigned ntree_limit = 0) const {
-    CHECK(gbm_ != nullptr)
-        << "Predict must happen after Load or configuration";
+    CHECK(gbm_ != nullptr) << "Predict must happen after Load or configuration";
    this->ValidateDMatrix(data);
    gbm_->PredictBatch(data, out_preds, training, ntree_limit);
  }
@@ -920,7 +936,7 @@ class LearnerImpl : public Learner {
  void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
    if (gbm_ == nullptr || old.booster != tparam_.booster) {
      gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
-                                         &learner_model_param_, cache_));
+                                         &learner_model_param_));
    }
    gbm_->Configure(args);
  }
@@ -930,9 +946,10 @@ class LearnerImpl : public Learner {
    // estimate feature bound
    // TODO(hcho3): Change num_feature to 64-bit integer
    unsigned num_feature = 0;
-    for (auto & matrix : cache_) {
-      CHECK(matrix != nullptr);
-      const uint64_t num_col = matrix->Info().num_col_;
+    for (auto & matrix : cache_.Container()) {
+      CHECK(matrix.first);
+      CHECK(!matrix.second.ref.expired());
+      const uint64_t num_col = matrix.first->Info().num_col_;
      CHECK_LE(num_col, static_cast<uint64_t>(std::numeric_limits<unsigned>::max()))
          << "Unfortunately, XGBoost does not support data matrices with "
          << std::numeric_limits<unsigned>::max() << " features or greater";
@@ -990,13 +1007,12 @@ class LearnerImpl : public Learner {
  // `enable_experimental_json_serialization' is set to false.  Will be removed once JSON
  // takes over.
  std::string const serialisation_header_ { u8"CONFIG-offset:" };
-  // configurations
+  // User provided configurations
  std::map<std::string, std::string> cfg_;
+  // Stores information like best-iteration for early stopping.
  std::map<std::string, std::string> attributes_;
  std::vector<std::string> metric_names_;
  static std::string const kEvalMetric;  // NOLINT
-  // temporal storages for prediction
-  std::map<DMatrix*, HostDeviceVector<bst_float>> preds_;
  // gradient pairs
  HostDeviceVector<GradientPair> gpair_;
  bool need_configuration_;
@@ -1004,8 +1020,11 @@ class LearnerImpl : public Learner {
 private:
  /*! \brief random number transformation seed. */
  static int32_t constexpr kRandSeedMagic = 127;
-  // internal cached dmatrix
-  std::vector<std::shared_ptr<DMatrix> > cache_;
+  // internal cached dmatrix for prediction.
+  PredictionContainer cache_;
+  /*! \brief Temporary storage to prediction.  Useful for storing data transformed by
+   *  objective function */
+  PredictionContainer output_predictions_;

  common::Monitor monitor_;