Move prediction cache to Learner. (#5220)

* Move prediction cache into Learner. * Clean-ups - Remove duplicated cache in Learner and GBM. - Remove ad-hoc fix of invalid cache. - Remove `PredictFromCache` in predictors. - Remove prediction cache for linear altogether, as it's only moving the prediction into training process but doesn't provide any actual overall speed gain. - The cache is now unique to Learner, which means the ownership is no longer shared by any other components. * Changes - Add version to prediction cache. - Use weak ptr to check expired DMatrix. - Pass shared pointer instead of raw pointer.
2020-02-14 13:04:23 +08:00
parent 24ad9dec0b
commit c35cdecddd
19 changed files with 457 additions and 372 deletions
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright by Contributors
+ * Copyright 2014-2020 by Contributors
 * \file gbm.h
 * \brief Interface of gradient booster,
 *  that learns through gradient statistics.
@@ -18,6 +18,7 @@
 #include <utility>
 #include <string>
 #include <functional>
+#include <unordered_map>
 #include <memory>

 namespace xgboost {
@@ -28,6 +29,8 @@ class ObjFunction;

 struct GenericParameter;
 struct LearnerModelParam;
+struct PredictionCacheEntry;
+class PredictionContainer;

 /*!
 * \brief interface of gradient boosting model.
@@ -38,7 +41,7 @@ class GradientBooster : public Model, public Configurable {

 public:
  /*! \brief virtual destructor */
-  virtual ~GradientBooster() = default;
+  ~GradientBooster() override = default;
  /*!
   * \brief Set the configuration of gradient boosting.
   *  User must call configure once before InitModel and Training.
@@ -71,19 +74,22 @@ class GradientBooster : public Model, public Configurable {
   * \param obj The objective function, optional, can be nullptr when use customized version
   * the booster may change content of gpair
   */
-  virtual void DoBoost(DMatrix* p_fmat,
-                       HostDeviceVector<GradientPair>* in_gpair,
-                       ObjFunction* obj = nullptr) = 0;
+  virtual void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
+                       PredictionCacheEntry *prediction) = 0;

  /*!
   * \brief generate predictions for given feature matrix
   * \param dmat feature matrix
   * \param out_preds output vector to hold the predictions
-   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
-   *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
+   * \param training Whether the prediction value is used for training.  For dart booster
+   *                 drop out is performed during training.
+   * \param ntree_limit limit the number of trees used in prediction,
+   *                    when it equals 0, this means we do not limit
+   *                    number of trees, this parameter is only valid
+   *                    for gbtree, but not for gblinear
   */
  virtual void PredictBatch(DMatrix* dmat,
-                            HostDeviceVector<bst_float>* out_preds,
+                            PredictionCacheEntry* out_preds,
                            bool training,
                            unsigned ntree_limit = 0) = 0;
  /*!
@@ -158,8 +164,7 @@ class GradientBooster : public Model, public Configurable {
  static GradientBooster* Create(
      const std::string& name,
      GenericParameter const* generic_param,
-      LearnerModelParam const* learner_model_param,
-      const std::vector<std::shared_ptr<DMatrix> >& cache_mats);
+      LearnerModelParam const* learner_model_param);

  static void AssertGPUSupport() {
 #ifndef XGBOOST_USE_CUDA
@@ -174,8 +179,7 @@ class GradientBooster : public Model, public Configurable {
 struct GradientBoosterReg
    : public dmlc::FunctionRegEntryBase<
  GradientBoosterReg,
-  std::function<GradientBooster* (const std::vector<std::shared_ptr<DMatrix> > &cached_mats,
-                                  LearnerModelParam const* learner_model_param)> > {
+  std::function<GradientBooster* (LearnerModelParam const* learner_model_param)> > {
 };

 /*!
--- a/include/xgboost/predictor.h
+++ b/include/xgboost/predictor.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright by Contributors
+ * Copyright 2017-2020 by Contributors
 * \file predictor.h
 * \brief Interface of predictor,
 *  performs predictions for a gradient booster.
@@ -32,47 +32,83 @@ namespace xgboost {
 * \brief Contains pointer to input matrix and associated cached predictions.
 */
 struct PredictionCacheEntry {
-  std::shared_ptr<DMatrix> data;
+  // A storage for caching prediction values
  HostDeviceVector<bst_float> predictions;
+  // The version of current cache, corresponding number of layers of trees
+  uint32_t version;
+  // A weak pointer for checking whether the DMatrix object has expired.
+  std::weak_ptr< DMatrix > ref;
+
+  PredictionCacheEntry() : version { 0 } {}
+  /* \brief Update the cache entry by number of versions.
+   *
+   * \param v Added versions.
+   */
+  void Update(uint32_t v) {
+    version += v;
+  }
+};
+
+/* \brief A container for managed prediction caches.
+ */
+class PredictionContainer {
+  std::unordered_map<DMatrix *, PredictionCacheEntry> container_;
+  void ClearExpiredEntries();
+
+ public:
+  PredictionContainer() = default;
+  /* \brief Add a new DMatrix to the cache, at the same time this function will clear out
+   *        all expired caches by checking the `std::weak_ptr`.  Caching an existing
+   *        DMatrix won't renew it.
+   *
+   *  Passing in a `shared_ptr` is critical here.  First to create a `weak_ptr` inside the
+   *  entry this shared pointer is necessary.  More importantly, the life time of this
+   *  cache is tied to the shared pointer.
+   *
+   *  Another way to make a safe cache is create a proxy to this entry, with anther shared
+   *  pointer defined inside, and pass this proxy around instead of the real entry.  But
+   *  seems to be too messy.  In XGBoost, functions like `UpdateOneIter` will have
+   *  (memory) safe access to the DMatrix as long as it's passed in as a `shared_ptr`.
+   *
+   * \param m shared pointer to the DMatrix that needs to be cached.
+   * \param device Which device should the cache be allocated on.  Pass
+   *               GenericParameter::kCpuId for CPU or positive integer for GPU id.
+   *
+   * \return the cache entry for passed in DMatrix, either an existing cache or newly
+   *         created.
+   */
+  PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, int32_t device);
+  /* \brief Get a prediction cache entry.  This entry must be already allocated by `Cache`
+   *        method.  Otherwise a dmlc::Error is thrown.
+   *
+   * \param m pointer to the DMatrix.
+   * \return The prediction cache for passed in DMatrix.
+   */
+  PredictionCacheEntry& Entry(DMatrix* m);
+  /* \brief Get a const reference to the underlying hash map.  Clear expired caches before
+   *        returning.
+   */
+  decltype(container_) const& Container();
 };

 /**
 * \class Predictor
 *
- * \brief Performs prediction on individual training instances or batches of
- * instances for GBTree. The predictor also manages a prediction cache
- * associated with input matrices. If possible, it will use previously
- * calculated predictions instead of calculating new predictions.
- *        Prediction functions all take a GBTreeModel and a DMatrix as input and
- * output a vector of predictions. The predictor does not modify any state of
- * the model itself.
+ * \brief Performs prediction on individual training instances or batches of instances for
+ *        GBTree. Prediction functions all take a GBTreeModel and a DMatrix as input and
+ *        output a vector of predictions. The predictor does not modify any state of the
+ *        model itself.
 */
-
 class Predictor {
 protected:
  /*
   * \brief Runtime parameters.
   */
  GenericParameter const* generic_param_;
-  /**
-   * \brief Map of matrices and associated cached predictions to facilitate
-   * storing and looking up predictions.
-   */
-  std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache_;
-
-  std::unordered_map<DMatrix*, PredictionCacheEntry>::iterator FindCache(DMatrix const* dmat) {
-    auto cache_emtry = std::find_if(
-        cache_->begin(), cache_->end(),
-        [dmat](std::pair<DMatrix *, PredictionCacheEntry const &> const &kv) {
-          return kv.second.data.get() == dmat;
-        });
-    return cache_emtry;
-  }

 public:
-  Predictor(GenericParameter const* generic_param,
-            std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache) :
-      generic_param_{generic_param}, cache_{cache} {}
+  explicit Predictor(GenericParameter const* generic_param) :
+      generic_param_{generic_param} {}
  virtual ~Predictor() = default;

  /**
@@ -91,12 +127,11 @@ class Predictor {
   * \param           model       The model to predict from.
   * \param           tree_begin  The tree begin index.
   * \param           ntree_limit (Optional) The ntree limit. 0 means do not
-   * limit trees.
+   *                              limit trees.
   */
-
-  virtual void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
+  virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds,
                            const gbm::GBTreeModel& model, int tree_begin,
-                            unsigned ntree_limit = 0) = 0;
+                            uint32_t const ntree_limit = 0) = 0;

  /**
   * \fn  virtual void Predictor::UpdatePredictionCache( const gbm::GBTreeModel
@@ -116,7 +151,9 @@ class Predictor {
  virtual void UpdatePredictionCache(
      const gbm::GBTreeModel& model,
      std::vector<std::unique_ptr<TreeUpdater>>* updaters,
-      int num_new_trees) = 0;
+      int num_new_trees,
+      DMatrix* m,
+      PredictionCacheEntry* predts) = 0;

  /**
   * \fn  virtual void Predictor::PredictInstance( const SparsePage::Inst&
@@ -200,8 +237,7 @@ class Predictor {
   * \param cache          Pointer to prediction cache.
   */
  static Predictor* Create(
-      std::string const& name, GenericParameter const* generic_param,
-      std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache);
+      std::string const& name, GenericParameter const* generic_param);
 };

 /*!
@@ -209,9 +245,7 @@ class Predictor {
 */
 struct PredictorReg
    : public dmlc::FunctionRegEntryBase<
-  PredictorReg, std::function<Predictor*(
-      GenericParameter const*,
-      std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>>)>> {};
+  PredictorReg, std::function<Predictor*(GenericParameter const*)>> {};

 #define XGBOOST_REGISTER_PREDICTOR(UniqueId, Name)      \
  static DMLC_ATTRIBUTE_UNUSED ::xgboost::PredictorReg& \
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -158,7 +158,7 @@ class RegTree : public Model {
    }
    /*! \brief whether this node is deleted */
    XGBOOST_DEVICE bool IsDeleted() const {
-      return sindex_ == std::numeric_limits<unsigned>::max();
+      return sindex_ == std::numeric_limits<uint32_t>::max();
    }
    /*! \brief whether current node is root */
    XGBOOST_DEVICE bool IsRoot() const { return parent_ == kInvalidNodeId; }