fix RCCL
This commit is contained in:
parent
843fdde61b
commit
db8420225b
@ -37,7 +37,7 @@ void XGBBuildInfoDevice(Json *p_info) {
|
||||
#elif defined(XGBOOST_USE_RCCL)
|
||||
info["USE_RCCL"] = Boolean{true};
|
||||
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
|
||||
info["NCCL_VERSION"] = v;
|
||||
info["RCCL_VERSION"] = v;
|
||||
#else
|
||||
info["USE_NCCL"] = Boolean{false};
|
||||
info["USE_RCCL"] = Boolean{false};
|
||||
|
||||
@ -110,7 +110,7 @@ inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int li
|
||||
ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n";
|
||||
} else if (code == ncclSystemError) {
|
||||
ss << " This might be caused by a network configuration issue. Please consider specifying "
|
||||
"the network interface for NCCL via environment variables listed in its reference: "
|
||||
"the network interface for RCCL via environment variables listed in its reference: "
|
||||
"`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n";
|
||||
}
|
||||
LOG(FATAL) << ss.str();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user