This commit is contained in:
amdsc21 2023-04-12 01:09:14 +02:00
parent 843fdde61b
commit db8420225b
2 changed files with 2 additions and 2 deletions

View File

@ -37,7 +37,7 @@ void XGBBuildInfoDevice(Json *p_info) {
#elif defined(XGBOOST_USE_RCCL)
info["USE_RCCL"] = Boolean{true};
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
info["NCCL_VERSION"] = v;
info["RCCL_VERSION"] = v;
#else
info["USE_NCCL"] = Boolean{false};
info["USE_RCCL"] = Boolean{false};

View File

@ -110,7 +110,7 @@ inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int li
ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n";
} else if (code == ncclSystemError) {
ss << " This might be caused by a network configuration issue. Please consider specifying "
"the network interface for NCCL via environment variables listed in its reference: "
"the network interface for RCCL via environment variables listed in its reference: "
"`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n";
}
LOG(FATAL) << ss.str();