Replaced std::vector-based interfaces with HostDeviceVector-based interfaces. (#3116)

* Replaced std::vector-based interfaces with HostDeviceVector-based interfaces. - replacement was performed in the learner, boosters, predictors, updaters, and objective functions - only interfaces used in training were replaced; interfaces like PredictInstance() still use std::vector - refactoring necessary for replacement of interfaces was also performed, such as using HostDeviceVector in prediction cache * HostDeviceVector-based interfaces for custom objective function example plugin.
2018-02-28 01:00:04 +01:00
parent 11bfa8584d
commit d5992dd881
38 changed files with 371 additions and 519 deletions
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -104,14 +104,43 @@ class CPUPredictor : public Predictor {
                      tree_begin, ntree_limit);
  }

- public:
-  void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
-                    const gbm::GBTreeModel& model, int tree_begin,
-                    unsigned ntree_limit = 0) override {
-    PredictBatch(dmat, &out_preds->data_h(), model, tree_begin, ntree_limit);
+  bool PredictFromCache(DMatrix* dmat,
+                        HostDeviceVector<bst_float>* out_preds,
+                        const gbm::GBTreeModel& model,
+                        unsigned ntree_limit) {
+    if (ntree_limit == 0 ||
+        ntree_limit * model.param.num_output_group >= model.trees.size()) {
+      auto it = cache_.find(dmat);
+      if (it != cache_.end()) {
+        HostDeviceVector<bst_float>& y = it->second.predictions;
+        if (y.size() != 0) {
+          out_preds->resize(y.size());
+          std::copy(y.data_h().begin(), y.data_h().end(),
+                    out_preds->data_h().begin());
+          return true;
+        }
+      }
+    }
+    return false;
  }

-  void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
+  void InitOutPredictions(const MetaInfo& info,
+                          HostDeviceVector<bst_float>* out_preds,
+                          const gbm::GBTreeModel& model) const {
+    size_t n = model.param.num_output_group * info.num_row;
+    const std::vector<bst_float>& base_margin = info.base_margin;
+    out_preds->resize(n);
+    std::vector<bst_float>& out_preds_h = out_preds->data_h();
+    if (base_margin.size() != 0) {
+      CHECK_EQ(out_preds->size(), n);
+      std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin());
+    } else {
+      std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin);
+    }
+  }
+
+ public:
+  void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
                    const gbm::GBTreeModel& model, int tree_begin,
                    unsigned ntree_limit = 0) override {
    if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
@@ -125,12 +154,14 @@ class CPUPredictor : public Predictor {
      ntree_limit = static_cast<unsigned>(model.trees.size());
    }

-    this->PredLoopInternal(dmat, out_preds, model, tree_begin, ntree_limit);
+    this->PredLoopInternal(dmat, &out_preds->data_h(), model,
+                           tree_begin, ntree_limit);
  }

-  void UpdatePredictionCache(const gbm::GBTreeModel& model,
-                             std::vector<std::unique_ptr<TreeUpdater>>* updaters,
-                             int num_new_trees) override {
+  void UpdatePredictionCache(
+      const gbm::GBTreeModel& model,
+      std::vector<std::unique_ptr<TreeUpdater>>* updaters,
+      int num_new_trees) override {
    int old_ntree = model.trees.size() - num_new_trees;
    // update cache entry
    for (auto& kv : cache_) {
@@ -138,7 +169,7 @@ class CPUPredictor : public Predictor {

      if (e.predictions.size() == 0) {
        InitOutPredictions(e.data->info(), &(e.predictions), model);
-        PredLoopInternal(e.data.get(), &(e.predictions), model, 0,
+        PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, 0,
                         model.trees.size());
      } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
                 num_new_trees == 1 &&
@@ -146,7 +177,7 @@ class CPUPredictor : public Predictor {
                                                         &(e.predictions))) {
        {}  // do nothing
      } else {
-        PredLoopInternal(e.data.get(), &(e.predictions), model, old_ntree,
+        PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, old_ntree,
                         model.trees.size());
      }
    }
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -256,8 +256,6 @@ class GPUPredictor : public xgboost::Predictor {
    HostDeviceVector<bst_float> predictions;
  };

-  std::unordered_map<DMatrix*, DevicePredictionCacheEntry> device_cache_;
-
 private:
  void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
                             const gbm::GBTreeModel& model, size_t tree_begin,
@@ -337,25 +335,16 @@ class GPUPredictor : public xgboost::Predictor {
 public:
  GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {}

-  void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
-                    const gbm::GBTreeModel& model, int tree_begin,
-                    unsigned ntree_limit = 0) override {
-    HostDeviceVector<bst_float> out_preds_d;
-    PredictBatch(dmat, &out_preds_d, model, tree_begin, ntree_limit);
-    out_preds->resize(out_preds_d.size());
-    thrust::copy(out_preds_d.tbegin(param.gpu_id),
-                 out_preds_d.tend(param.gpu_id), out_preds->begin());
-  }
-
  void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
                    const gbm::GBTreeModel& model, int tree_begin,
                    unsigned ntree_limit = 0) override {
-    if (this->PredictFromCacheDevice(dmat, out_preds, model, ntree_limit)) {
+    if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
      return;
    }
-    this->InitOutPredictionsDevice(dmat->info(), out_preds, model);
+    this->InitOutPredictions(dmat->info(), out_preds, model);

    int tree_end = ntree_limit * model.param.num_output_group;
+
    if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
      tree_end = static_cast<unsigned>(model.trees.size());
    }
@@ -363,13 +352,13 @@ class GPUPredictor : public xgboost::Predictor {
    DevicePredictInternal(dmat, out_preds, model, tree_begin, tree_end);
  }

-
-  void InitOutPredictionsDevice(const MetaInfo& info,
+ protected:
+  void InitOutPredictions(const MetaInfo& info,
                          HostDeviceVector<bst_float>* out_preds,
                          const gbm::GBTreeModel& model) const {
    size_t n = model.param.num_output_group * info.num_row;
    const std::vector<bst_float>& base_margin = info.base_margin;
-    out_preds->resize(n, param.gpu_id);
+    out_preds->resize(n, 0.0f, param.gpu_id);
    if (base_margin.size() != 0) {
      CHECK_EQ(out_preds->size(), n);
      thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id));
@@ -380,29 +369,16 @@ class GPUPredictor : public xgboost::Predictor {
  }

  bool PredictFromCache(DMatrix* dmat,
-                        std::vector<bst_float>* out_preds,
+                        HostDeviceVector<bst_float>* out_preds,
                        const gbm::GBTreeModel& model,
                        unsigned ntree_limit) {
-    HostDeviceVector<bst_float> out_preds_d(0, -1);
-    bool result = PredictFromCacheDevice(dmat, &out_preds_d, model, ntree_limit);
-    if (!result) return false;
-    out_preds->resize(out_preds_d.size(), param.gpu_id);
-    thrust::copy(out_preds_d.tbegin(param.gpu_id),
-                 out_preds_d.tend(param.gpu_id), out_preds->begin());
-    return true;
-  }
-
-  bool PredictFromCacheDevice(DMatrix* dmat,
-                              HostDeviceVector<bst_float>* out_preds,
-                              const gbm::GBTreeModel& model,
-                              unsigned ntree_limit) {
    if (ntree_limit == 0 ||
        ntree_limit * model.param.num_output_group >= model.trees.size()) {
-      auto it = device_cache_.find(dmat);
-      if (it != device_cache_.end()) {
+      auto it = cache_.find(dmat);
+      if (it != cache_.end()) {
        HostDeviceVector<bst_float>& y = it->second.predictions;
        if (y.size() != 0) {
-          out_preds->resize(y.size(), param.gpu_id);
+          out_preds->resize(y.size(), 0.0f, param.gpu_id);
          thrust::copy(y.tbegin(param.gpu_id), y.tend(param.gpu_id),
                       out_preds->tbegin(param.gpu_id));
          return true;
@@ -418,15 +394,15 @@ class GPUPredictor : public xgboost::Predictor {
                             int num_new_trees) override {
    auto old_ntree = model.trees.size() - num_new_trees;
    // update cache entry
-    for (auto& kv : device_cache_) {
-      DevicePredictionCacheEntry& e = kv.second;
+    for (auto& kv : cache_) {
+      PredictionCacheEntry& e = kv.second;
      DMatrix* dmat = kv.first;
      HostDeviceVector<bst_float>& predictions = e.predictions;

      if (predictions.size() == 0) {
        // ensure that the device in predictions is correct
-        predictions.resize(0, param.gpu_id);
-        cpu_predictor->PredictBatch(dmat, &predictions.data_h(), model, 0,
+        predictions.resize(0, 0.0f, param.gpu_id);
+        cpu_predictor->PredictBatch(dmat, &predictions, model, 0,
                                    static_cast<bst_uint>(model.trees.size()));
      } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
                 num_new_trees == 1 &&
@@ -477,8 +453,6 @@ class GPUPredictor : public xgboost::Predictor {
    Predictor::Init(cfg, cache);
    cpu_predictor->Init(cfg, cache);
    param.InitAllowUnknown(cfg);
-    for (const std::shared_ptr<DMatrix>& d : cache)
-      device_cache_[d.get()].data = d;
    max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id);
  }

--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -11,43 +11,8 @@ namespace xgboost {
 void Predictor::Init(
    const std::vector<std::pair<std::string, std::string>>& cfg,
    const std::vector<std::shared_ptr<DMatrix>>& cache) {
-  for (const std::shared_ptr<DMatrix>& d : cache) {
-    PredictionCacheEntry e;
-    e.data = d;
-    cache_[d.get()] = std::move(e);
-  }
-}
-bool Predictor::PredictFromCache(DMatrix* dmat,
-                                 std::vector<bst_float>* out_preds,
-                                 const gbm::GBTreeModel& model,
-                                 unsigned ntree_limit) {
-  if (ntree_limit == 0 ||
-      ntree_limit * model.param.num_output_group >= model.trees.size()) {
-    auto it = cache_.find(dmat);
-    if (it != cache_.end()) {
-      std::vector<bst_float>& y = it->second.predictions;
-      if (y.size() != 0) {
-        out_preds->resize(y.size());
-        std::copy(y.begin(), y.end(), out_preds->begin());
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-void Predictor::InitOutPredictions(const MetaInfo& info,
-                                   std::vector<bst_float>* out_preds,
-                                   const gbm::GBTreeModel& model) const {
-  size_t n = model.param.num_output_group * info.num_row;
-  const std::vector<bst_float>& base_margin = info.base_margin;
-  out_preds->resize(n);
-  if (base_margin.size() != 0) {
-    CHECK_EQ(out_preds->size(), n);
-    std::copy(base_margin.begin(), base_margin.end(), out_preds->begin());
-  } else {
-    std::fill(out_preds->begin(), out_preds->end(), model.base_margin);
-  }
+  for (const std::shared_ptr<DMatrix>& d : cache)
+    cache_[d.get()].data = d;
 }
 Predictor* Predictor::Create(std::string name) {
  auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);