Avoid repeated cuda API call in GPU predictor and only synchronize used GPUs (#2936)

2017-12-08 22:00:42 -05:00
parent e8a6597957
commit 4d36036fe6
3 changed files with 36 additions and 23 deletions
--- a/src/common/timer.h
+++ b/src/common/timer.h
@@ -6,6 +6,7 @@
 #include <iostream>
 #include <map>
 #include <string>
+#include <vector>

 namespace xgboost {
 namespace common {
@@ -63,11 +64,21 @@ struct Monitor {
    this->label = label;
  }
  void Start(const std::string &name) { timer_map[name].Start(); }
-  void Stop(const std::string &name) {
+  void Start(const std::string &name, std::vector<int> dList) {
    if (debug_verbose) {
 #ifdef __CUDACC__
 #include "device_helpers.cuh"
-      dh::synchronize_all();
+      dh::synchronize_n_devices(dList.size(), dList);
+#endif
+    }
+    timer_map[name].Start();
+  }
+  void Stop(const std::string &name) { timer_map[name].Stop(); }
+  void Stop(const std::string &name, std::vector<int> dList) {
+    if (debug_verbose) {
+#ifdef __CUDACC__
+#include "device_helpers.cuh"
+      dh::synchronize_n_devices(dList.size(), dList);
 #endif
    }
    timer_map[name].Stop();