Combine thread launches into single launch per tree for gpu_hist (#4343)

* Combine thread launches into single launch per tree for gpu_hist algorithm. * Address deprecation warning * Add manual column sampler constructor * Turn off omp dynamic to get a guaranteed number of threads * Enable openmp in cuda code
2019-04-29 09:58:34 +12:00
parent 146e83f3b3
commit 5e582b0fa7
10 changed files with 402 additions and 325 deletions
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -342,7 +342,8 @@ class GPUPredictor : public xgboost::Predictor {
  }

 public:
-  GPUPredictor() : cpu_predictor_(Predictor::Create("cpu_predictor")) {}
+  GPUPredictor()                                               // NOLINT
+      : cpu_predictor_(Predictor::Create("cpu_predictor")) {}  // NOLINT

  void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
                    const gbm::GBTreeModel& model, int tree_begin,