Use nccl group calls to prevent from dead lock. (#4113)

* launch all reduce sequentially.
* Fix gpu_exact test memory leak.
This commit is contained in:
Jiaming Yuan
2019-02-08 06:12:39 +08:00
committed by GitHub
parent 05243642bb
commit f8ca2960fc
4 changed files with 18 additions and 15 deletions

View File

@@ -852,7 +852,7 @@ class AllReducer {
#ifdef XGBOOST_USE_NCCL
std::vector<ncclComm_t> comms;
std::vector<cudaStream_t> streams;
std::vector<int> device_ordinals;
std::vector<int> device_ordinals; // device id from CUDA
#endif
public:

View File

@@ -459,9 +459,8 @@ HostDeviceVector<T>& HostDeviceVector<T>::operator=
template <typename T>
HostDeviceVector<T>::~HostDeviceVector() {
HostDeviceVectorImpl<T>* tmp = impl_;
delete impl_;
impl_ = nullptr;
delete tmp;
}
template <typename T>