Avoid repeated cuda API call in GPU predictor and only synchronize used GPUs (#2936)

This commit is contained in:
PSEUDOTENSOR / Jonathan McKinney
2017-12-08 22:00:42 -05:00
committed by Rory Mitchell
parent e8a6597957
commit 4d36036fe6
3 changed files with 36 additions and 23 deletions

View File

@@ -303,7 +303,7 @@ class GPUPredictor : public xgboost::Predictor {
int shared_memory_bytes = static_cast<int>(
sizeof(float) * device_matrix->p_mat->info().num_col * BLOCK_THREADS);
bool use_shared = true;
if (shared_memory_bytes > dh::max_shared_memory(param.gpu_id)) {
if (shared_memory_bytes > max_shared_memory_bytes) {
shared_memory_bytes = 0;
use_shared = false;
}
@@ -391,6 +391,7 @@ class GPUPredictor : public xgboost::Predictor {
Predictor::Init(cfg, cache);
cpu_predictor->Init(cfg, cache);
param.InitAllowUnknown(cfg);
max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id);
}
private:
@@ -401,6 +402,7 @@ class GPUPredictor : public xgboost::Predictor {
thrust::device_vector<DevicePredictionNode> nodes;
thrust::device_vector<size_t> tree_segments;
thrust::device_vector<int> tree_group;
size_t max_shared_memory_bytes;
};
XGBOOST_REGISTER_PREDICTOR(GPUPredictor, "gpu_predictor")
.describe("Make predictions using GPU.")