Avoid repeated cuda API call in GPU predictor and only synchronize used GPUs (#2936)

2017-12-08 22:00:42 -05:00
parent e8a6597957
commit 4d36036fe6
3 changed files with 36 additions and 23 deletions
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -303,7 +303,7 @@ class GPUPredictor : public xgboost::Predictor {
    int shared_memory_bytes = static_cast<int>(
        sizeof(float) * device_matrix->p_mat->info().num_col * BLOCK_THREADS);
    bool use_shared = true;
-    if (shared_memory_bytes > dh::max_shared_memory(param.gpu_id)) {
+    if (shared_memory_bytes > max_shared_memory_bytes) {
      shared_memory_bytes = 0;
      use_shared = false;
    }
@@ -391,6 +391,7 @@ class GPUPredictor : public xgboost::Predictor {
    Predictor::Init(cfg, cache);
    cpu_predictor->Init(cfg, cache);
    param.InitAllowUnknown(cfg);
+    max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id);
  }

 private:
@@ -401,6 +402,7 @@ class GPUPredictor : public xgboost::Predictor {
  thrust::device_vector<DevicePredictionNode> nodes;
  thrust::device_vector<size_t> tree_segments;
  thrust::device_vector<int> tree_group;
+  size_t max_shared_memory_bytes;
 };
 XGBOOST_REGISTER_PREDICTOR(GPUPredictor, "gpu_predictor")
    .describe("Make predictions using GPU.")