Avoid repeated cuda API call in GPU predictor and only synchronize used GPUs (#2936)

This commit is contained in:
PSEUDOTENSOR / Jonathan McKinney
2017-12-08 22:00:42 -05:00
committed by Rory Mitchell
parent e8a6597957
commit 4d36036fe6
3 changed files with 36 additions and 23 deletions

View File

@@ -6,6 +6,7 @@
#include <iostream>
#include <map>
#include <string>
#include <vector>
namespace xgboost {
namespace common {
@@ -63,11 +64,21 @@ struct Monitor {
this->label = label;
}
void Start(const std::string &name) { timer_map[name].Start(); }
void Stop(const std::string &name) {
void Start(const std::string &name, std::vector<int> dList) {
if (debug_verbose) {
#ifdef __CUDACC__
#include "device_helpers.cuh"
dh::synchronize_all();
dh::synchronize_n_devices(dList.size(), dList);
#endif
}
timer_map[name].Start();
}
void Stop(const std::string &name) { timer_map[name].Stop(); }
void Stop(const std::string &name, std::vector<int> dList) {
if (debug_verbose) {
#ifdef __CUDACC__
#include "device_helpers.cuh"
dh::synchronize_n_devices(dList.size(), dList);
#endif
}
timer_map[name].Stop();