Replaced std::vector-based interfaces with HostDeviceVector-based interfaces. (#3116)

* Replaced std::vector-based interfaces with HostDeviceVector-based interfaces. - replacement was performed in the learner, boosters, predictors, updaters, and objective functions - only interfaces used in training were replaced; interfaces like PredictInstance() still use std::vector - refactoring necessary for replacement of interfaces was also performed, such as using HostDeviceVector in prediction cache * HostDeviceVector-based interfaces for custom objective function example plugin.
2018-02-28 01:00:04 +01:00
parent 11bfa8584d
commit d5992dd881
38 changed files with 371 additions and 519 deletions
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -68,12 +68,9 @@ class GradientBooster {
   * \param obj The objective function, optional, can be nullptr when use customized version
   * the booster may change content of gpair
   */
-  virtual void DoBoost(DMatrix* p_fmat,
-                       std::vector<bst_gpair>* in_gpair,
-                       ObjFunction* obj = nullptr) = 0;
  virtual void DoBoost(DMatrix* p_fmat,
                       HostDeviceVector<bst_gpair>* in_gpair,
-                       ObjFunction* obj = nullptr);
+                       ObjFunction* obj = nullptr) = 0;

  /*!
   * \brief generate predictions for given feature matrix
@@ -82,12 +79,9 @@ class GradientBooster {
   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
   *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
   */
-  virtual void PredictBatch(DMatrix* dmat,
-                       std::vector<bst_float>* out_preds,
-                       unsigned ntree_limit = 0) = 0;
  virtual void PredictBatch(DMatrix* dmat,
                            HostDeviceVector<bst_float>* out_preds,
-                            unsigned ntree_limit = 0);
+                            unsigned ntree_limit = 0) = 0;
  /*!
   * \brief online prediction function, predict score for one instance at a time
   *  NOTE: use the batch prediction interface if possible, batch prediction is usually
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -84,7 +84,7 @@ class Learner : public rabit::Serializable {
   */
  virtual void BoostOneIter(int iter,
                            DMatrix* train,
-                            std::vector<bst_gpair>* in_gpair) = 0;
+                            HostDeviceVector<bst_gpair>* in_gpair) = 0;
  /*!
   * \brief evaluate the model for specific iteration using the configured metrics.
   * \param iter iteration number
@@ -109,7 +109,7 @@ class Learner : public rabit::Serializable {
   */
  virtual void Predict(DMatrix* data,
                       bool output_margin,
-                       std::vector<bst_float> *out_preds,
+                       HostDeviceVector<bst_float> *out_preds,
                       unsigned ntree_limit = 0,
                       bool pred_leaf = false,
                       bool pred_contribs = false,
@@ -169,7 +169,7 @@ class Learner : public rabit::Serializable {
   */
  inline void Predict(const SparseBatch::Inst &inst,
                      bool output_margin,
-                      std::vector<bst_float> *out_preds,
+                      HostDeviceVector<bst_float> *out_preds,
                      unsigned ntree_limit = 0) const;
  /*!
   * \brief Create a new instance of learner.
@@ -192,9 +192,9 @@ class Learner : public rabit::Serializable {
 // implementation of inline functions.
 inline void Learner::Predict(const SparseBatch::Inst& inst,
                             bool output_margin,
-                             std::vector<bst_float>* out_preds,
+                             HostDeviceVector<bst_float>* out_preds,
                             unsigned ntree_limit) const {
-  gbm_->PredictInstance(inst, out_preds, ntree_limit);
+  gbm_->PredictInstance(inst, &out_preds->data_h(), ntree_limit);
  if (!output_margin) {
    obj_->PredTransform(out_preds);
  }
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -44,14 +44,10 @@ class ObjFunction {
   * \param iteration current iteration number.
   * \param out_gpair output of get gradient, saves gradient and second order gradient in
   */
-  virtual void GetGradient(const std::vector<bst_float>& preds,
-                           const MetaInfo& info,
-                           int iteration,
-                           std::vector<bst_gpair>* out_gpair) = 0;
  virtual void GetGradient(HostDeviceVector<bst_float>* preds,
                           const MetaInfo& info,
                           int iteration,
-                           HostDeviceVector<bst_gpair>* out_gpair);
+                           HostDeviceVector<bst_gpair>* out_gpair) = 0;

  /*! \return the default evaluation metric for the objective */
  virtual const char* DefaultEvalMetric() const = 0;
@@ -60,17 +56,13 @@ class ObjFunction {
   * \brief transform prediction values, this is only called when Prediction is called
   * \param io_preds prediction values, saves to this vector as well
   */
-  virtual void PredTransform(std::vector<bst_float> *io_preds) {}
-  virtual void PredTransform(HostDeviceVector<bst_float> *io_preds);
+  virtual void PredTransform(HostDeviceVector<bst_float> *io_preds) {}

  /*!
   * \brief transform prediction values, this is only called when Eval is called,
   *  usually it redirect to PredTransform
   * \param io_preds prediction values, saves to this vector as well
   */
-  virtual void EvalTransform(std::vector<bst_float> *io_preds) {
-    this->PredTransform(io_preds);
-  }
  virtual void EvalTransform(HostDeviceVector<bst_float> *io_preds) {
    this->PredTransform(io_preds);
  }
--- a/include/xgboost/predictor.h
+++ b/include/xgboost/predictor.h
@@ -63,22 +63,6 @@ class Predictor {
   * limit trees.
   */

-  virtual void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
-                            const gbm::GBTreeModel& model, int tree_begin,
-                            unsigned ntree_limit = 0) = 0;
-
-  /**
-   * \brief Generate batch predictions for a given feature matrix. May use
-   * cached predictions if available instead of calculating from scratch.
-   *
-   * \param [in,out]  dmat        Feature matrix.
-   * \param [in,out]  out_preds   The output preds.
-   * \param           model       The model to predict from.
-   * \param           tree_begin  The tree begin index.
-   * \param           ntree_limit (Optional) The ntree limit. 0 means do not
-   * limit trees.
-   */
-
  virtual void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
                            const gbm::GBTreeModel& model, int tree_begin,
                            unsigned ntree_limit = 0) = 0;
@@ -186,41 +170,14 @@ class Predictor {
  static Predictor* Create(std::string name);

 protected:
-  /**
-   * \fn  bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>*
-   * out_preds, const gbm::GBTreeModel& model, unsigned ntree_limit = 0)
-   *
-   * \brief Attempt to predict from cache.
-   *
-   * \return  True if it succeeds, false if it fails.
-   */
-  bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>* out_preds,
-                        const gbm::GBTreeModel& model,
-                        unsigned ntree_limit = 0);
-
-  /**
-   * \fn void Predictor::InitOutPredictions(const MetaInfo& info,
-   * std::vector<bst_float>* out_preds, const gbm::GBTreeModel& model) const;
-   *
-   * \brief  Init out predictions according to base margin.
-   *
-   * \param          info      Dmatrix info possibly containing base margin.
-   * \param [in,out] out_preds The out preds.
-   * \param          model     The model.
-   */
-  void InitOutPredictions(const MetaInfo& info,
-                          std::vector<bst_float>* out_preds,
-                          const gbm::GBTreeModel& model) const;
-
  /**
   * \struct  PredictionCacheEntry
   *
   * \brief Contains pointer to input matrix and associated cached predictions.
   */
-
  struct PredictionCacheEntry {
    std::shared_ptr<DMatrix> data;
-    std::vector<bst_float> predictions;
+    HostDeviceVector<bst_float> predictions;
  };

  /**
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@@ -40,12 +40,9 @@ class TreeUpdater {
   *         but maybe different random seeds, usually one tree is passed in at a time,
   *         there can be multiple trees when we train random forest style model
   */
-  virtual void Update(const std::vector<bst_gpair>& gpair,
-                      DMatrix* data,
-                      const std::vector<RegTree*>& trees) = 0;
  virtual void Update(HostDeviceVector<bst_gpair>* gpair,
                      DMatrix* data,
-                      const std::vector<RegTree*>& trees);
+                      const std::vector<RegTree*>& trees) = 0;

  /*!
   * \brief determines whether updater has enough knowledge about a given dataset
@@ -58,11 +55,9 @@ class TreeUpdater {
   *         updated by the time this function returns.
   */
  virtual bool UpdatePredictionCache(const DMatrix* data,
-                                     std::vector<bst_float>* out_preds) {
+                                     HostDeviceVector<bst_float>* out_preds) {
    return false;
  }
-  virtual bool UpdatePredictionCache(const DMatrix* data,
-                                     HostDeviceVector<bst_float>* out_preds);

  /*!
   * \brief Create a tree updater given name