This commit is contained in:
amdsc21 2023-04-12 01:09:14 +02:00
parent 843fdde61b
commit db8420225b
2 changed files with 2 additions and 2 deletions

View File

@ -37,7 +37,7 @@ void XGBBuildInfoDevice(Json *p_info) {
#elif defined(XGBOOST_USE_RCCL) #elif defined(XGBOOST_USE_RCCL)
info["USE_RCCL"] = Boolean{true}; info["USE_RCCL"] = Boolean{true};
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}}; v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
info["NCCL_VERSION"] = v; info["RCCL_VERSION"] = v;
#else #else
info["USE_NCCL"] = Boolean{false}; info["USE_NCCL"] = Boolean{false};
info["USE_RCCL"] = Boolean{false}; info["USE_RCCL"] = Boolean{false};

View File

@ -110,7 +110,7 @@ inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int li
ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n"; ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n";
} else if (code == ncclSystemError) { } else if (code == ncclSystemError) {
ss << " This might be caused by a network configuration issue. Please consider specifying " ss << " This might be caused by a network configuration issue. Please consider specifying "
"the network interface for NCCL via environment variables listed in its reference: " "the network interface for RCCL via environment variables listed in its reference: "
"`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n"; "`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n";
} }
LOG(FATAL) << ss.str(); LOG(FATAL) << ss.str();