Upgrading to NCCL2 (#3404)
* Upgrading to NCCL2 * Part - II of NCCL2 upgradation - Doc updates to build with nccl2 - Dockerfile.gpu update for a correct CI build with nccl2 - Updated FindNccl package to have env-var NCCL_ROOT to take precedence * Upgrading to v9.2 for CI workflow, since it has the nccl2 binaries available * Added NCCL2 license + copy the nccl binaries into /usr location for the FindNccl module to find * Set LD_LIBRARY_PATH variable to pick nccl2 binary at runtime * Need the nccl2 library download instructions inside Dockerfile.release as well * Use NCCL2 as a static library
This commit is contained in:
committed by
Philip Hyunsu Cho
parent
a6331925d2
commit
2200939416
@@ -946,6 +946,24 @@ class AllReducer {
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Use in exactly the same way as ncclGroupStart
|
||||
*/
|
||||
void GroupStart() {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
dh::safe_nccl(ncclGroupStart());
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Use in exactly the same way as ncclGroupEnd
|
||||
*/
|
||||
void GroupEnd() {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
dh::safe_nccl(ncclGroupEnd());
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Allreduce. Use in exactly the same way as NCCL but without needing
|
||||
* streams or comms.
|
||||
|
||||
@@ -810,6 +810,7 @@ class GPUHistMaker : public TreeUpdater {
|
||||
}
|
||||
|
||||
void AllReduceHist(int nidx) {
|
||||
reducer_.GroupStart();
|
||||
for (auto& shard : shards_) {
|
||||
auto d_node_hist = shard->hist.GetHistPtr(nidx);
|
||||
reducer_.AllReduceSum(
|
||||
@@ -818,6 +819,7 @@ class GPUHistMaker : public TreeUpdater {
|
||||
reinterpret_cast<GradientPairSumT::ValueT*>(d_node_hist),
|
||||
n_bins_ * (sizeof(GradientPairSumT) / sizeof(GradientPairSumT::ValueT)));
|
||||
}
|
||||
reducer_.GroupEnd();
|
||||
|
||||
reducer_.Synchronize();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user