Objective function evaluation on GPU with minimal PCIe transfers (#2935)

* Added GPU objective function and no-copy interface. - xgboost::HostDeviceVector<T> syncs automatically between host and device - no-copy interfaces have been added - default implementations just sync the data to host and call the implementations with std::vector - GPU objective function, predictor, histogram updater process data directly on GPU
2018-01-12 14:03:39 +05:30
parent a187ed6c8f
commit 84ab74f3a5
23 changed files with 1036 additions and 127 deletions
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -5,6 +5,7 @@
 #include <xgboost/tree_model.h>
 #include <xgboost/tree_updater.h>
 #include "dmlc/logging.h"
+#include "../common/host_device_vector.h"

 namespace xgboost {
 namespace predictor {
@@ -108,6 +109,12 @@ class CPUPredictor : public Predictor {
  }

 public:
+  void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
+                    const gbm::GBTreeModel& model, int tree_begin,
+                    unsigned ntree_limit = 0) override {
+    PredictBatch(dmat, &out_preds->data_h(), model, tree_begin, ntree_limit);
+  }
+
  void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
                    const gbm::GBTreeModel& model, int tree_begin,
                    unsigned ntree_limit = 0) override {