Use nccl group calls to prevent from dead lock. (#4113)

* launch all reduce sequentially. * Fix gpu_exact test memory leak.
2019-02-08 06:12:39 +08:00
parent 05243642bb
commit f8ca2960fc
4 changed files with 18 additions and 15 deletions
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -852,7 +852,7 @@ class AllReducer {
 #ifdef XGBOOST_USE_NCCL
  std::vector<ncclComm_t> comms;
  std::vector<cudaStream_t> streams;
-  std::vector<int> device_ordinals;
+  std::vector<int> device_ordinals;  // device id from CUDA
 #endif

 public: