[doc] [dask] Troubleshooting NCCL errors. (#8943)

This commit is contained in:
Jiaming Yuan
2023-03-22 22:17:26 +08:00
committed by GitHub
parent a551bed803
commit ea04d4c46c
3 changed files with 44 additions and 20 deletions

View File

@@ -1,10 +1,12 @@
/*!
* Copyright 2022 XGBoost contributors
/**
* Copyright 2022-2023, XGBoost contributors
*/
#ifdef XGBOOST_USE_NCCL
#include <gtest/gtest.h>
#include <string> // for string
#include "../../../src/collective/nccl_device_communicator.cuh"
namespace xgboost {
@@ -20,7 +22,15 @@ TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidCommunicator) {
EXPECT_THROW(construct(), dmlc::Error);
}
TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
try {
dh::safe_nccl(ncclSystemError);
} catch (dmlc::Error const& e) {
auto str = std::string{e.what()};
ASSERT_TRUE(str.find("environment variables") != std::string::npos);
}
}
} // namespace collective
} // namespace xgboost
#endif
#endif // XGBOOST_USE_NCCL