Avoid repeated cuda API call in GPU predictor and only synchronize used GPUs (#2936)
This commit is contained in:
committed by
Rory Mitchell
parent
e8a6597957
commit
4d36036fe6
@@ -303,7 +303,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
int shared_memory_bytes = static_cast<int>(
|
||||
sizeof(float) * device_matrix->p_mat->info().num_col * BLOCK_THREADS);
|
||||
bool use_shared = true;
|
||||
if (shared_memory_bytes > dh::max_shared_memory(param.gpu_id)) {
|
||||
if (shared_memory_bytes > max_shared_memory_bytes) {
|
||||
shared_memory_bytes = 0;
|
||||
use_shared = false;
|
||||
}
|
||||
@@ -391,6 +391,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
Predictor::Init(cfg, cache);
|
||||
cpu_predictor->Init(cfg, cache);
|
||||
param.InitAllowUnknown(cfg);
|
||||
max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id);
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -401,6 +402,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
thrust::device_vector<DevicePredictionNode> nodes;
|
||||
thrust::device_vector<size_t> tree_segments;
|
||||
thrust::device_vector<int> tree_group;
|
||||
size_t max_shared_memory_bytes;
|
||||
};
|
||||
XGBOOST_REGISTER_PREDICTOR(GPUPredictor, "gpu_predictor")
|
||||
.describe("Make predictions using GPU.")
|
||||
|
||||
Reference in New Issue
Block a user