* Update CUDA docker image and NCCL. (#8139) * Rest of the CI. * CPU test dependencies.
This commit is contained in:
@@ -38,6 +38,9 @@ void AllReducer::Init(int _device_ordinal) {
|
||||
|
||||
int32_t const rank = rabit::GetRank();
|
||||
int32_t const world = rabit::GetWorldSize();
|
||||
if (world == 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<uint64_t> uuids(world * kUuidLength, 0);
|
||||
auto s_uuid = xgboost::common::Span<uint64_t>{uuids.data(), uuids.size()};
|
||||
|
||||
@@ -775,13 +775,16 @@ class AllReducer {
|
||||
*/
|
||||
|
||||
void AllReduceSum(const double *sendbuff, double *recvbuff, int count) {
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
CHECK(initialised_);
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclDouble, ncclSum, comm_, stream_));
|
||||
allreduce_bytes_ += count * sizeof(double);
|
||||
allreduce_calls_ += 1;
|
||||
#endif
|
||||
#endif // XGBOOST_USE_NCCL
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -796,9 +799,12 @@ class AllReducer {
|
||||
|
||||
void AllGather(uint32_t const* data, size_t length,
|
||||
dh::caching_device_vector<uint32_t>* recvbuf) {
|
||||
size_t world = rabit::GetWorldSize();
|
||||
if (world == 1) {
|
||||
return;
|
||||
}
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
CHECK(initialised_);
|
||||
size_t world = rabit::GetWorldSize();
|
||||
recvbuf->resize(length * world);
|
||||
safe_nccl(ncclAllGather(data, recvbuf->data().get(), length, ncclUint32,
|
||||
comm_, stream_));
|
||||
@@ -813,9 +819,11 @@ class AllReducer {
|
||||
* \param recvbuff The recvbuff.
|
||||
* \param count Number of elements.
|
||||
*/
|
||||
|
||||
void AllReduceSum(const float *sendbuff, float *recvbuff, int count) {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
CHECK(initialised_);
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclFloat, ncclSum, comm_, stream_));
|
||||
@@ -836,6 +844,9 @@ class AllReducer {
|
||||
|
||||
void AllReduceSum(const int64_t *sendbuff, int64_t *recvbuff, int count) {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
CHECK(initialised_);
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
@@ -845,6 +856,9 @@ class AllReducer {
|
||||
|
||||
void AllReduceSum(const uint32_t *sendbuff, uint32_t *recvbuff, int count) {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
CHECK(initialised_);
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
@@ -853,6 +867,9 @@ class AllReducer {
|
||||
}
|
||||
|
||||
void AllReduceSum(const uint64_t *sendbuff, uint64_t *recvbuff, int count) {
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
CHECK(initialised_);
|
||||
|
||||
@@ -867,12 +884,15 @@ class AllReducer {
|
||||
std::enable_if_t<std::is_same<size_t, T>::value &&
|
||||
!std::is_same<size_t, unsigned long long>::value> // NOLINT
|
||||
* = nullptr>
|
||||
void AllReduceSum(const T *sendbuff, T *recvbuff, int count) { // NOLINT
|
||||
void AllReduceSum(const T *sendbuff, T *recvbuff, int count) { // NOLINT
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
CHECK(initialised_);
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); // NOLINT
|
||||
static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); // NOLINT
|
||||
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclUint64, ncclSum, comm_, stream_));
|
||||
#endif
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user