Use nccl group calls to prevent from dead lock. (#4113)

* launch all reduce sequentially.
* Fix gpu_exact test memory leak.
This commit is contained in:
Jiaming Yuan
2019-02-08 06:12:39 +08:00
committed by GitHub
parent 05243642bb
commit f8ca2960fc
4 changed files with 18 additions and 15 deletions

View File

@@ -852,7 +852,7 @@ class AllReducer {
#ifdef XGBOOST_USE_NCCL
std::vector<ncclComm_t> comms;
std::vector<cudaStream_t> streams;
std::vector<int> device_ordinals;
std::vector<int> device_ordinals; // device id from CUDA
#endif
public: