Use nccl group calls to prevent from dead lock. (#4113)
* launch all reduce sequentially. * Fix gpu_exact test memory leak.
This commit is contained in:
@@ -852,7 +852,7 @@ class AllReducer {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
std::vector<ncclComm_t> comms;
|
||||
std::vector<cudaStream_t> streams;
|
||||
std::vector<int> device_ordinals;
|
||||
std::vector<int> device_ordinals; // device id from CUDA
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
@@ -459,9 +459,8 @@ HostDeviceVector<T>& HostDeviceVector<T>::operator=
|
||||
|
||||
template <typename T>
|
||||
HostDeviceVector<T>::~HostDeviceVector() {
|
||||
HostDeviceVectorImpl<T>* tmp = impl_;
|
||||
delete impl_;
|
||||
impl_ = nullptr;
|
||||
delete tmp;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
||||
Reference in New Issue
Block a user