From c2b85ab68a29e1f691ebe61f9d1180cb7484b81f Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 1 Aug 2023 23:31:18 -0700 Subject: [PATCH] Clean up MGPU C++ tests (#9430) --- src/collective/communicator.cc | 3 +- src/collective/communicator.cu | 3 +- src/collective/communicator.h | 4 +- .../test_nccl_device_communicator.cu | 9 ++- tests/cpp/common/test_quantile.cu | 39 +++++-------- tests/cpp/helpers.h | 57 +++++++++++++------ tests/cpp/linear/test_json_io.h | 2 +- tests/cpp/linear/test_linear.cc | 4 +- tests/cpp/metric/test_auc.cc | 24 ++++---- tests/cpp/metric/test_auc.h | 12 ++-- tests/cpp/metric/test_elementwise_metric.cc | 40 ++++++------- tests/cpp/metric/test_elementwise_metric.h | 38 ++++++------- tests/cpp/metric/test_metric.cc | 10 +--- tests/cpp/metric/test_multiclass_metric.cc | 8 +-- tests/cpp/metric/test_multiclass_metric.h | 8 +-- tests/cpp/metric/test_rank_metric.cc | 18 +++--- tests/cpp/metric/test_rank_metric.h | 8 +-- tests/cpp/metric/test_survival_metric.cu | 12 ++-- tests/cpp/metric/test_survival_metric.h | 6 +- tests/cpp/objective/test_aft_obj.cc | 10 ++-- tests/cpp/objective/test_hinge.cc | 2 +- tests/cpp/objective/test_lambdarank_obj.cc | 2 +- tests/cpp/objective/test_multiclass_obj.cc | 6 +- tests/cpp/objective/test_quantile_obj.cc | 4 +- tests/cpp/objective/test_regression_obj.cc | 32 +++++------ tests/cpp/plugin/test_example_objective.cc | 2 +- tests/cpp/plugin/test_federated_adapter.cu | 18 +++--- tests/cpp/predictor/test_gpu_predictor.cu | 13 ++--- 28 files changed, 200 insertions(+), 194 deletions(-) diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc index 22c85f3ad..e4c491c2b 100644 --- a/src/collective/communicator.cc +++ b/src/collective/communicator.cc @@ -41,7 +41,8 @@ void Communicator::Init(Json const& config) { #endif break; } - case CommunicatorType::kInMemory: { + case CommunicatorType::kInMemory: + case CommunicatorType::kInMemoryNccl: { communicator_.reset(InMemoryCommunicator::Create(config)); break; } diff --git a/src/collective/communicator.cu b/src/collective/communicator.cu index 915a3beca..a80eab6d5 100644 --- a/src/collective/communicator.cu +++ b/src/collective/communicator.cu @@ -34,9 +34,10 @@ DeviceCommunicator* Communicator::GetDevice(int device_ordinal) { device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, false)); break; case CommunicatorType::kFederated: + case CommunicatorType::kInMemory: device_communicator_.reset(new DeviceCommunicatorAdapter(device_ordinal)); break; - case CommunicatorType::kInMemory: + case CommunicatorType::kInMemoryNccl: device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, true)); break; default: diff --git a/src/collective/communicator.h b/src/collective/communicator.h index 6cda5e47c..def961513 100644 --- a/src/collective/communicator.h +++ b/src/collective/communicator.h @@ -69,7 +69,7 @@ enum class Operation { class DeviceCommunicator; -enum class CommunicatorType { kUnknown, kRabit, kFederated, kInMemory }; +enum class CommunicatorType { kUnknown, kRabit, kFederated, kInMemory, kInMemoryNccl }; /** \brief Case-insensitive string comparison. */ inline int CompareStringsCaseInsensitive(const char *s1, const char *s2) { @@ -220,6 +220,8 @@ class Communicator { result = CommunicatorType::kFederated; } else if (!CompareStringsCaseInsensitive("in-memory", str)) { result = CommunicatorType::kInMemory; + } else if (!CompareStringsCaseInsensitive("in-memory-nccl", str)) { + result = CommunicatorType::kInMemoryNccl; } else { LOG(FATAL) << "Unknown communicator type " << str; } diff --git a/tests/cpp/collective/test_nccl_device_communicator.cu b/tests/cpp/collective/test_nccl_device_communicator.cu index cd9cd26de..d6ed400b2 100644 --- a/tests/cpp/collective/test_nccl_device_communicator.cu +++ b/tests/cpp/collective/test_nccl_device_communicator.cu @@ -46,7 +46,8 @@ TEST(NcclDeviceCommunicator, MGPUAllReduceBitwiseAND) { if (n_gpus <= 1) { GTEST_SKIP() << "Skipping MGPUAllReduceBitwiseAND test with # GPUs = " << n_gpus; } - RunWithInMemoryCommunicator(n_gpus, VerifyAllReduceBitwiseAND); + auto constexpr kUseNccl = true; + RunWithInMemoryCommunicator(n_gpus, VerifyAllReduceBitwiseAND); } namespace { @@ -67,7 +68,8 @@ TEST(NcclDeviceCommunicator, MGPUAllReduceBitwiseOR) { if (n_gpus <= 1) { GTEST_SKIP() << "Skipping MGPUAllReduceBitwiseOR test with # GPUs = " << n_gpus; } - RunWithInMemoryCommunicator(n_gpus, VerifyAllReduceBitwiseOR); + auto constexpr kUseNccl = true; + RunWithInMemoryCommunicator(n_gpus, VerifyAllReduceBitwiseOR); } namespace { @@ -88,7 +90,8 @@ TEST(NcclDeviceCommunicator, MGPUAllReduceBitwiseXOR) { if (n_gpus <= 1) { GTEST_SKIP() << "Skipping MGPUAllReduceBitwiseXOR test with # GPUs = " << n_gpus; } - RunWithInMemoryCommunicator(n_gpus, VerifyAllReduceBitwiseXOR); + auto constexpr kUseNccl = true; + RunWithInMemoryCommunicator(n_gpus, VerifyAllReduceBitwiseXOR); } } // namespace collective diff --git a/tests/cpp/common/test_quantile.cu b/tests/cpp/common/test_quantile.cu index eda55ee47..28d698685 100644 --- a/tests/cpp/common/test_quantile.cu +++ b/tests/cpp/common/test_quantile.cu @@ -19,6 +19,9 @@ struct IsSorted { }; } namespace common { + +class MGPUQuantileTest : public BaseMGPUTest {}; + TEST(GPUQuantile, Basic) { constexpr size_t kRows = 1000, kCols = 100, kBins = 256; HostDeviceVector ft; @@ -344,12 +347,11 @@ TEST(GPUQuantile, MultiMerge) { } namespace { -void TestAllReduceBasic(int32_t n_gpus) { +void TestAllReduceBasic() { auto const world = collective::GetWorldSize(); - CHECK_EQ(world, n_gpus); constexpr size_t kRows = 1000, kCols = 100; RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) { - auto const device = collective::GetRank(); + auto const device = GetGPUId(); // Set up single node version; HostDeviceVector ft({}, device); @@ -422,12 +424,8 @@ void TestAllReduceBasic(int32_t n_gpus) { } } // anonymous namespace -TEST(GPUQuantile, MGPUAllReduceBasic) { - auto const n_gpus = AllVisibleGPUs(); - if (n_gpus <= 1) { - GTEST_SKIP() << "Skipping MGPUAllReduceBasic test with # GPUs = " << n_gpus; - } - RunWithInMemoryCommunicator(n_gpus, TestAllReduceBasic, n_gpus); +TEST_F(MGPUQuantileTest, AllReduceBasic) { + DoTest(TestAllReduceBasic); } namespace { @@ -442,7 +440,7 @@ void TestColumnSplitBasic() { }()}; // Generate cuts for distributed environment. - auto ctx = MakeCUDACtx(rank); + auto ctx = MakeCUDACtx(GetGPUId()); HistogramCuts distributed_cuts = common::DeviceSketch(&ctx, m.get(), kBins); // Generate cuts for single node environment @@ -474,23 +472,18 @@ void TestColumnSplitBasic() { } } // anonymous namespace -TEST(GPUQuantile, MGPUColumnSplitBasic) { - auto const n_gpus = AllVisibleGPUs(); - if (n_gpus <= 1) { - GTEST_SKIP() << "Skipping MGPUColumnSplitBasic test with # GPUs = " << n_gpus; - } - RunWithInMemoryCommunicator(n_gpus, TestColumnSplitBasic); +TEST_F(MGPUQuantileTest, ColumnSplitBasic) { + DoTest(TestColumnSplitBasic); } namespace { -void TestSameOnAllWorkers(std::int32_t n_gpus) { +void TestSameOnAllWorkers() { auto world = collective::GetWorldSize(); - CHECK_EQ(world, n_gpus); constexpr size_t kRows = 1000, kCols = 100; RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const &info) { auto const rank = collective::GetRank(); - auto const device = rank; + auto const device = GetGPUId(); HostDeviceVector ft({}, device); SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device); HostDeviceVector storage({}, device); @@ -544,12 +537,8 @@ void TestSameOnAllWorkers(std::int32_t n_gpus) { } } // anonymous namespace -TEST(GPUQuantile, MGPUSameOnAllWorkers) { - auto const n_gpus = AllVisibleGPUs(); - if (n_gpus <= 1) { - GTEST_SKIP() << "Skipping MGPUSameOnAllWorkers test with # GPUs = " << n_gpus; - } - RunWithInMemoryCommunicator(n_gpus, TestSameOnAllWorkers, n_gpus); +TEST_F(MGPUQuantileTest, SameOnAllWorkers) { + DoTest(TestSameOnAllWorkers); } TEST(GPUQuantile, Push) { diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index b166109d9..6cb0b3405 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -34,24 +34,12 @@ #define DeclareUnifiedTest(name) name #endif -#if defined(__CUDACC__) -#define GPUIDX 0 -#else -#define GPUIDX -1 -#endif - #if defined(__CUDACC__) #define DeclareUnifiedDistributedTest(name) MGPU ## name #else #define DeclareUnifiedDistributedTest(name) name #endif -#if defined(__CUDACC__) -#define WORLD_SIZE_FOR_TEST (xgboost::common::AllVisibleGPUs()) -#else -#define WORLD_SIZE_FOR_TEST (3) -#endif - namespace xgboost { class ObjFunction; class Metric; @@ -522,11 +510,15 @@ inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint inline std::int32_t AllThreadsForTest() { return Context{}.Threads(); } -template +template void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&... args) { auto run = [&](auto rank) { Json config{JsonObject()}; - config["xgboost_communicator"] = String("in-memory"); + if constexpr (use_nccl) { + config["xgboost_communicator"] = String("in-memory-nccl"); + } else { + config["xgboost_communicator"] = String("in-memory"); + } config["in_memory_world_size"] = world_size; config["in_memory_rank"] = rank; xgboost::collective::Init(config); @@ -548,15 +540,44 @@ void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&& #endif } -class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test { +inline int GetGPUId() { +#if defined(__CUDACC__) + auto const n_gpus = common::AllVisibleGPUs(); + return n_gpus == 1 ? 0 : collective::GetRank(); +#else + return -1; +#endif +} + +class BaseMGPUTest : public ::testing::Test { protected: int world_size_; + bool use_nccl_{false}; void SetUp() override { - world_size_ = WORLD_SIZE_FOR_TEST; - if (world_size_ <= 1) { - GTEST_SKIP() << "Skipping MGPU test with # GPUs = " << world_size_; + auto const n_gpus = common::AllVisibleGPUs(); + if (n_gpus <= 1) { + // Use a single GPU to simulate distributed environment. + world_size_ = 3; + // NCCL doesn't like sharing a single GPU, so we use the adapter instead. + use_nccl_ = false; + } else { + // Use multiple GPUs for real. + world_size_ = n_gpus; + use_nccl_ = true; + } + } + + template + void DoTest(Function&& function, Args&&... args) { + if (use_nccl_) { + RunWithInMemoryCommunicator(world_size_, function, args...); + } else { + RunWithInMemoryCommunicator(world_size_, function, args...); } } }; + +class DeclareUnifiedDistributedTest(MetricTest) : public BaseMGPUTest{}; + } // namespace xgboost diff --git a/tests/cpp/linear/test_json_io.h b/tests/cpp/linear/test_json_io.h index c423448e2..48d4497c3 100644 --- a/tests/cpp/linear/test_json_io.h +++ b/tests/cpp/linear/test_json_io.h @@ -12,7 +12,7 @@ namespace xgboost { inline void TestUpdaterJsonIO(std::string updater_str) { - Context ctx{MakeCUDACtx(GPUIDX)}; + Context ctx{MakeCUDACtx(GetGPUId())}; Json config_0 {Object() }; { diff --git a/tests/cpp/linear/test_linear.cc b/tests/cpp/linear/test_linear.cc index 6b2d17e10..f15a47e64 100644 --- a/tests/cpp/linear/test_linear.cc +++ b/tests/cpp/linear/test_linear.cc @@ -17,7 +17,7 @@ TEST(Linear, Shotgun) { auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); LearnerModelParam mparam{MakeMP(kCols, .5, 1)}; { @@ -49,7 +49,7 @@ TEST(Linear, coordinate) { auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); LearnerModelParam mparam{MakeMP(kCols, .5, 1)}; auto updater = std::unique_ptr( diff --git a/tests/cpp/metric/test_auc.cc b/tests/cpp/metric/test_auc.cc index de42bba53..eea54fc32 100644 --- a/tests/cpp/metric/test_auc.cc +++ b/tests/cpp/metric/test_auc.cc @@ -18,51 +18,51 @@ TEST(Metric, DeclareUnifiedTest(MultiClassPRAUC)) { VerifyMultiClassPRAUC(); } TEST(Metric, DeclareUnifiedTest(RankingPRAUC)) { VerifyRankingPRAUC(); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kRow); + DoTest(VerifyBinaryAUC, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kCol); + DoTest(VerifyBinaryAUC, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kRow); + DoTest(VerifyMultiClassAUC, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kCol); + DoTest(VerifyMultiClassAUC, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kRow); + DoTest(VerifyRankingAUC, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kCol); + DoTest(VerifyRankingAUC, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kRow); + DoTest(VerifyPRAUC, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kCol); + DoTest(VerifyPRAUC, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kRow); + DoTest(VerifyMultiClassPRAUC, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kCol); + DoTest(VerifyMultiClassPRAUC, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kRow); + DoTest(VerifyRankingPRAUC, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kCol); + DoTest(VerifyRankingPRAUC, DataSplitMode::kCol); } } // namespace metric } // namespace xgboost diff --git a/tests/cpp/metric/test_auc.h b/tests/cpp/metric/test_auc.h index 0dd3dd83e..cd0095ebb 100644 --- a/tests/cpp/metric/test_auc.h +++ b/tests/cpp/metric/test_auc.h @@ -11,7 +11,7 @@ namespace xgboost { namespace metric { inline void VerifyBinaryAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr uni_ptr{Metric::Create("auc", &ctx)}; Metric* metric = uni_ptr.get(); ASSERT_STREQ(metric->Name(), "auc"); @@ -54,7 +54,7 @@ inline void VerifyBinaryAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) } inline void VerifyMultiClassAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr uni_ptr{Metric::Create("auc", &ctx)}; auto metric = uni_ptr.get(); @@ -115,7 +115,7 @@ inline void VerifyMultiClassAUC(DataSplitMode data_split_mode = DataSplitMode::k } inline void VerifyRankingAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr metric{Metric::Create("auc", &ctx)}; // single group @@ -149,7 +149,7 @@ inline void VerifyRankingAUC(DataSplitMode data_split_mode = DataSplitMode::kRow } inline void VerifyPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); xgboost::Metric* metric = xgboost::Metric::Create("aucpr", &ctx); ASSERT_STREQ(metric->Name(), "aucpr"); @@ -186,7 +186,7 @@ inline void VerifyPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { } inline void VerifyMultiClassPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr metric{Metric::Create("aucpr", &ctx)}; @@ -210,7 +210,7 @@ inline void VerifyMultiClassPRAUC(DataSplitMode data_split_mode = DataSplitMode: } inline void VerifyRankingPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr metric{Metric::Create("aucpr", &ctx)}; diff --git a/tests/cpp/metric/test_elementwise_metric.cc b/tests/cpp/metric/test_elementwise_metric.cc index 2407dde39..13021fb6a 100644 --- a/tests/cpp/metric/test_elementwise_metric.cc +++ b/tests/cpp/metric/test_elementwise_metric.cc @@ -26,83 +26,83 @@ TEST(Metric, DeclareUnifiedTest(MultiRMSE)) { VerifyMultiRMSE(); } TEST(Metric, DeclareUnifiedTest(Quantile)) { VerifyQuantile(); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSERowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kRow); + DoTest(VerifyRMSE, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSEColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kCol); + DoTest(VerifyRMSE, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLERowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kRow); + DoTest(VerifyRMSLE, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLEColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kCol); + DoTest(VerifyRMSLE, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAERowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kRow); + DoTest(VerifyMAE, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAEColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kCol); + DoTest(VerifyMAE, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPERowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kRow); + DoTest(VerifyMAPE, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPEColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kCol); + DoTest(VerifyMAPE, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHERowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kRow); + DoTest(VerifyMPHE, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHEColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kCol); + DoTest(VerifyMPHE, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kRow); + DoTest(VerifyLogLoss, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kCol); + DoTest(VerifyLogLoss, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kRow); + DoTest(VerifyError, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kCol); + DoTest(VerifyError, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kRow); + DoTest(VerifyPoissonNegLogLik, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kCol); + DoTest(VerifyPoissonNegLogLik, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSERowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kRow); + DoTest(VerifyMultiRMSE, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSEColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kCol); + DoTest(VerifyMultiRMSE, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kRow); + DoTest(VerifyQuantile, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kCol); + DoTest(VerifyQuantile, DataSplitMode::kCol); } } // namespace metric } // namespace xgboost diff --git a/tests/cpp/metric/test_elementwise_metric.h b/tests/cpp/metric/test_elementwise_metric.h index a32bb0438..9a3d3fe9f 100644 --- a/tests/cpp/metric/test_elementwise_metric.h +++ b/tests/cpp/metric/test_elementwise_metric.h @@ -46,7 +46,7 @@ inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) } inline void VerifyRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); xgboost::Metric * metric = xgboost::Metric::Create("rmse", &ctx); metric->Configure({}); ASSERT_STREQ(metric->Name(), "rmse"); @@ -71,11 +71,11 @@ inline void VerifyRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) { 0.6708f, 0.001f); delete metric; - CheckDeterministicMetricElementWise(StringView{"rmse"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"rmse"}, GetGPUId()); } inline void VerifyRMSLE(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); xgboost::Metric * metric = xgboost::Metric::Create("rmsle", &ctx); metric->Configure({}); ASSERT_STREQ(metric->Name(), "rmsle"); @@ -100,11 +100,11 @@ inline void VerifyRMSLE(DataSplitMode data_split_mode = DataSplitMode::kRow) { 0.2415f, 1e-4); delete metric; - CheckDeterministicMetricElementWise(StringView{"rmsle"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"rmsle"}, GetGPUId()); } inline void VerifyMAE(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); xgboost::Metric * metric = xgboost::Metric::Create("mae", &ctx); metric->Configure({}); ASSERT_STREQ(metric->Name(), "mae"); @@ -129,11 +129,11 @@ inline void VerifyMAE(DataSplitMode data_split_mode = DataSplitMode::kRow) { 0.54f, 0.001f); delete metric; - CheckDeterministicMetricElementWise(StringView{"mae"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"mae"}, GetGPUId()); } inline void VerifyMAPE(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); xgboost::Metric * metric = xgboost::Metric::Create("mape", &ctx); metric->Configure({}); ASSERT_STREQ(metric->Name(), "mape"); @@ -158,11 +158,11 @@ inline void VerifyMAPE(DataSplitMode data_split_mode = DataSplitMode::kRow) { 1.3250f, 0.001f); delete metric; - CheckDeterministicMetricElementWise(StringView{"mape"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"mape"}, GetGPUId()); } inline void VerifyMPHE(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr metric{xgboost::Metric::Create("mphe", &ctx)}; metric->Configure({}); ASSERT_STREQ(metric->Name(), "mphe"); @@ -186,7 +186,7 @@ inline void VerifyMPHE(DataSplitMode data_split_mode = DataSplitMode::kRow) { { 1, 2, 9, 8}, {}, data_split_mode), 0.1922f, 1e-4); - CheckDeterministicMetricElementWise(StringView{"mphe"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"mphe"}, GetGPUId()); metric->Configure({{"huber_slope", "0.1"}}); EXPECT_NEAR(GetMetricEval(metric.get(), @@ -197,7 +197,7 @@ inline void VerifyMPHE(DataSplitMode data_split_mode = DataSplitMode::kRow) { } inline void VerifyLogLoss(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); xgboost::Metric * metric = xgboost::Metric::Create("logloss", &ctx); metric->Configure({}); ASSERT_STREQ(metric->Name(), "logloss"); @@ -226,11 +226,11 @@ inline void VerifyLogLoss(DataSplitMode data_split_mode = DataSplitMode::kRow) { 1.3138f, 0.001f); delete metric; - CheckDeterministicMetricElementWise(StringView{"logloss"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"logloss"}, GetGPUId()); } inline void VerifyError(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); xgboost::Metric * metric = xgboost::Metric::Create("error", &ctx); metric->Configure({}); ASSERT_STREQ(metric->Name(), "error"); @@ -288,11 +288,11 @@ inline void VerifyError(DataSplitMode data_split_mode = DataSplitMode::kRow) { 0.45f, 0.001f); delete metric; - CheckDeterministicMetricElementWise(StringView{"error@0.5"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"error@0.5"}, GetGPUId()); } inline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); xgboost::Metric * metric = xgboost::Metric::Create("poisson-nloglik", &ctx); metric->Configure({}); ASSERT_STREQ(metric->Name(), "poisson-nloglik"); @@ -321,18 +321,18 @@ inline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode = DataSplitMode 1.5783f, 0.001f); delete metric; - CheckDeterministicMetricElementWise(StringView{"poisson-nloglik"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"poisson-nloglik"}, GetGPUId()); } inline void VerifyMultiRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) { size_t n_samples = 32, n_targets = 8; - linalg::Tensor y{{n_samples, n_targets}, GPUIDX}; + linalg::Tensor y{{n_samples, n_targets}, GetGPUId()}; auto &h_y = y.Data()->HostVector(); std::iota(h_y.begin(), h_y.end(), 0); HostDeviceVector predt(n_samples * n_targets, 0); - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr metric{Metric::Create("rmse", &ctx)}; metric->Configure({}); @@ -347,7 +347,7 @@ inline void VerifyMultiRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) } inline void VerifyQuantile(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr metric{Metric::Create("quantile", &ctx)}; HostDeviceVector predts{0.1f, 0.9f, 0.1f, 0.9f}; diff --git a/tests/cpp/metric/test_metric.cc b/tests/cpp/metric/test_metric.cc index d269dc746..c629a1481 100644 --- a/tests/cpp/metric/test_metric.cc +++ b/tests/cpp/metric/test_metric.cc @@ -4,18 +4,14 @@ #include "../helpers.h" namespace xgboost { TEST(Metric, UnknownMetric) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); xgboost::Metric* metric = nullptr; EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name", &ctx)); EXPECT_NO_THROW(metric = xgboost::Metric::Create("rmse", &ctx)); - if (metric) { - delete metric; - } + delete metric; metric = nullptr; EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name@1", &ctx)); EXPECT_NO_THROW(metric = xgboost::Metric::Create("error@0.5f", &ctx)); - if (metric) { - delete metric; - } + delete metric; } } // namespace xgboost diff --git a/tests/cpp/metric/test_multiclass_metric.cc b/tests/cpp/metric/test_multiclass_metric.cc index bfb638924..7fc8bc429 100644 --- a/tests/cpp/metric/test_multiclass_metric.cc +++ b/tests/cpp/metric/test_multiclass_metric.cc @@ -11,19 +11,19 @@ TEST(Metric, DeclareUnifiedTest(MultiClassError)) { VerifyMultiClassError(); } TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) { VerifyMultiClassLogLoss(); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kRow); + DoTest(VerifyMultiClassError, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kCol); + DoTest(VerifyMultiClassError, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kRow); + DoTest(VerifyMultiClassLogLoss, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kCol); + DoTest(VerifyMultiClassLogLoss, DataSplitMode::kCol); } } // namespace metric } // namespace xgboost diff --git a/tests/cpp/metric/test_multiclass_metric.h b/tests/cpp/metric/test_multiclass_metric.h index 5fdead596..f147c91fa 100644 --- a/tests/cpp/metric/test_multiclass_metric.h +++ b/tests/cpp/metric/test_multiclass_metric.h @@ -60,8 +60,8 @@ inline void TestMultiClassError(int device, DataSplitMode data_split_mode) { } inline void VerifyMultiClassError(DataSplitMode data_split_mode = DataSplitMode::kRow) { - TestMultiClassError(GPUIDX, data_split_mode); - CheckDeterministicMetricMultiClass(StringView{"merror"}, GPUIDX); + TestMultiClassError(GetGPUId(), data_split_mode); + CheckDeterministicMetricMultiClass(StringView{"merror"}, GetGPUId()); } inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) { @@ -81,8 +81,8 @@ inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) { } inline void VerifyMultiClassLogLoss(DataSplitMode data_split_mode = DataSplitMode::kRow) { - TestMultiClassLogLoss(GPUIDX, data_split_mode); - CheckDeterministicMetricMultiClass(StringView{"mlogloss"}, GPUIDX); + TestMultiClassLogLoss(GetGPUId(), data_split_mode); + CheckDeterministicMetricMultiClass(StringView{"mlogloss"}, GetGPUId()); } } // namespace metric diff --git a/tests/cpp/metric/test_rank_metric.cc b/tests/cpp/metric/test_rank_metric.cc index 8c83dee5c..066e981b9 100644 --- a/tests/cpp/metric/test_rank_metric.cc +++ b/tests/cpp/metric/test_rank_metric.cc @@ -22,7 +22,7 @@ namespace metric { #if !defined(__CUDACC__) TEST(Metric, AMS) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); EXPECT_ANY_THROW(Metric::Create("ams", &ctx)); Metric* metric = Metric::Create("ams@0.5f", &ctx); ASSERT_STREQ(metric->Name(), "ams@0.5"); @@ -50,35 +50,35 @@ TEST(Metric, DeclareUnifiedTest(MAP)) { VerifyMAP(); } TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) { VerifyNDCGExpGain(); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kRow); + DoTest(VerifyPrecision, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kCol); + DoTest(VerifyPrecision, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kRow); + DoTest(VerifyNDCG, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kCol); + DoTest(VerifyNDCG, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kRow); + DoTest(VerifyMAP, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kCol); + DoTest(VerifyMAP, DataSplitMode::kCol); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kRow); + DoTest(VerifyNDCGExpGain, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kCol); + DoTest(VerifyNDCGExpGain, DataSplitMode::kCol); } } // namespace metric } // namespace xgboost diff --git a/tests/cpp/metric/test_rank_metric.h b/tests/cpp/metric/test_rank_metric.h index 2f7785689..82d3725f5 100644 --- a/tests/cpp/metric/test_rank_metric.h +++ b/tests/cpp/metric/test_rank_metric.h @@ -20,7 +20,7 @@ namespace xgboost::metric { inline void VerifyPrecision(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr metric{Metric::Create("pre", &ctx)}; ASSERT_STREQ(metric->Name(), "pre"); EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5, 1e-7); @@ -44,7 +44,7 @@ inline void VerifyPrecision(DataSplitMode data_split_mode = DataSplitMode::kRow) } inline void VerifyNDCG(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); Metric * metric = xgboost::Metric::Create("ndcg", &ctx); ASSERT_STREQ(metric->Name(), "ndcg"); EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode)); @@ -102,7 +102,7 @@ inline void VerifyNDCG(DataSplitMode data_split_mode = DataSplitMode::kRow) { } inline void VerifyMAP(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); Metric * metric = xgboost::Metric::Create("map", &ctx); ASSERT_STREQ(metric->Name(), "map"); EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, kRtEps); @@ -150,7 +150,7 @@ inline void VerifyMAP(DataSplitMode data_split_mode = DataSplitMode::kRow) { } inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRow) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix(); MetaInfo& info = p_fmat->Info(); diff --git a/tests/cpp/metric/test_survival_metric.cu b/tests/cpp/metric/test_survival_metric.cu index e3f4501b5..da97b083b 100644 --- a/tests/cpp/metric/test_survival_metric.cu +++ b/tests/cpp/metric/test_survival_metric.cu @@ -12,26 +12,26 @@ namespace common { TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) { VerifyAFTNegLogLik(); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kRow); + DoTest(VerifyAFTNegLogLik, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kCol); + DoTest(VerifyAFTNegLogLik, DataSplitMode::kCol); } TEST(Metric, DeclareUnifiedTest(IntervalRegressionAccuracy)) { VerifyIntervalRegressionAccuracy(); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyRowSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kRow); + DoTest(VerifyIntervalRegressionAccuracy, DataSplitMode::kRow); } TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyColumnSplit) { - RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kCol); + DoTest(VerifyIntervalRegressionAccuracy, DataSplitMode::kCol); } // Test configuration of AFT metric TEST(AFTNegLogLikMetric, DeclareUnifiedTest(Configuration)) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr metric(Metric::Create("aft-nloglik", &ctx)); metric->Configure({{"aft_loss_distribution", "normal"}, {"aft_loss_distribution_scale", "10"}}); @@ -42,7 +42,7 @@ TEST(AFTNegLogLikMetric, DeclareUnifiedTest(Configuration)) { EXPECT_EQ(get(aft_param_json["aft_loss_distribution"]), "normal"); EXPECT_EQ(get(aft_param_json["aft_loss_distribution_scale"]), "10"); - CheckDeterministicMetricElementWise(StringView{"aft-nloglik"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"aft-nloglik"}, GetGPUId()); } } // namespace common } // namespace xgboost diff --git a/tests/cpp/metric/test_survival_metric.h b/tests/cpp/metric/test_survival_metric.h index 1626d3772..5baa5b5a4 100644 --- a/tests/cpp/metric/test_survival_metric.h +++ b/tests/cpp/metric/test_survival_metric.h @@ -48,7 +48,7 @@ inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) } inline void VerifyAFTNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); /** * Test aggregate output from the AFT metric over a small test data set. @@ -79,7 +79,7 @@ inline void VerifyAFTNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kR } inline void VerifyIntervalRegressionAccuracy(DataSplitMode data_split_mode = DataSplitMode::kRow) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); auto p_fmat = EmptyDMatrix(); MetaInfo& info = p_fmat->Info(); @@ -101,7 +101,7 @@ inline void VerifyIntervalRegressionAccuracy(DataSplitMode data_split_mode = Dat info.labels_lower_bound_.HostVector()[0] = 70.0f; EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f); - CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GPUIDX); + CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GetGPUId()); } } // namespace common } // namespace xgboost diff --git a/tests/cpp/objective/test_aft_obj.cc b/tests/cpp/objective/test_aft_obj.cc index 74973918c..60aebdf3a 100644 --- a/tests/cpp/objective/test_aft_obj.cc +++ b/tests/cpp/objective/test_aft_obj.cc @@ -16,7 +16,7 @@ namespace xgboost { namespace common { TEST(Objective, DeclareUnifiedTest(AFTObjConfiguration)) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr objective(ObjFunction::Create("survival:aft", &ctx)); objective->Configure({ {"aft_loss_distribution", "logistic"}, {"aft_loss_distribution_scale", "5"} }); @@ -77,7 +77,7 @@ static inline void CheckGPairOverGridPoints( } TEST(Objective, DeclareUnifiedTest(AFTObjGPairUncensoredLabels)) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr obj(ObjFunction::Create("survival:aft", &ctx)); CheckGPairOverGridPoints(obj.get(), 100.0f, 100.0f, "normal", @@ -101,7 +101,7 @@ TEST(Objective, DeclareUnifiedTest(AFTObjGPairUncensoredLabels)) { } TEST(Objective, DeclareUnifiedTest(AFTObjGPairLeftCensoredLabels)) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr obj(ObjFunction::Create("survival:aft", &ctx)); CheckGPairOverGridPoints(obj.get(), 0.0f, 20.0f, "normal", @@ -122,7 +122,7 @@ TEST(Objective, DeclareUnifiedTest(AFTObjGPairLeftCensoredLabels)) { } TEST(Objective, DeclareUnifiedTest(AFTObjGPairRightCensoredLabels)) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr obj(ObjFunction::Create("survival:aft", &ctx)); CheckGPairOverGridPoints(obj.get(), 60.0f, std::numeric_limits::infinity(), "normal", @@ -146,7 +146,7 @@ TEST(Objective, DeclareUnifiedTest(AFTObjGPairRightCensoredLabels)) { } TEST(Objective, DeclareUnifiedTest(AFTObjGPairIntervalCensoredLabels)) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr obj(ObjFunction::Create("survival:aft", &ctx)); CheckGPairOverGridPoints(obj.get(), 16.0f, 200.0f, "normal", diff --git a/tests/cpp/objective/test_hinge.cc b/tests/cpp/objective/test_hinge.cc index 17d2609d4..a4b8525fa 100644 --- a/tests/cpp/objective/test_hinge.cc +++ b/tests/cpp/objective/test_hinge.cc @@ -6,7 +6,7 @@ #include "../helpers.h" namespace xgboost { TEST(Objective, DeclareUnifiedTest(HingeObj)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr obj{ObjFunction::Create("binary:hinge", &ctx)}; float eps = std::numeric_limits::min(); diff --git a/tests/cpp/objective/test_lambdarank_obj.cc b/tests/cpp/objective/test_lambdarank_obj.cc index c808e97f0..0c65780ae 100644 --- a/tests/cpp/objective/test_lambdarank_obj.cc +++ b/tests/cpp/objective/test_lambdarank_obj.cc @@ -71,7 +71,7 @@ void TestNDCGGPair(Context const* ctx) { HostDeviceVector predts{0, 1, 0, 1}; MetaInfo info; - info.labels = linalg::Tensor{{0, 1, 0, 1}, {4, 1}, GPUIDX}; + info.labels = linalg::Tensor{{0, 1, 0, 1}, {4, 1}, GetGPUId()}; info.group_ptr_ = {0, 2, 4}; info.num_row_ = 4; HostDeviceVector gpairs; diff --git a/tests/cpp/objective/test_multiclass_obj.cc b/tests/cpp/objective/test_multiclass_obj.cc index d028ef9cf..fa8fc27e4 100644 --- a/tests/cpp/objective/test_multiclass_obj.cc +++ b/tests/cpp/objective/test_multiclass_obj.cc @@ -9,7 +9,7 @@ namespace xgboost { TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args {{"num_class", "3"}}; std::unique_ptr obj { ObjFunction::Create("multi:softmax", &ctx) @@ -36,7 +36,7 @@ TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) { } TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) { - auto ctx = MakeCUDACtx(GPUIDX); + auto ctx = MakeCUDACtx(GetGPUId()); std::vector> args{ std::pair("num_class", "3")}; @@ -57,7 +57,7 @@ TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) { } TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args { std::pair("num_class", "3")}; diff --git a/tests/cpp/objective/test_quantile_obj.cc b/tests/cpp/objective/test_quantile_obj.cc index b263b4a8f..5078440bb 100644 --- a/tests/cpp/objective/test_quantile_obj.cc +++ b/tests/cpp/objective/test_quantile_obj.cc @@ -14,7 +14,7 @@ namespace xgboost { TEST(Objective, DeclareUnifiedTest(Quantile)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); { Args args{{"quantile_alpha", "[0.6, 0.8]"}}; @@ -37,7 +37,7 @@ TEST(Objective, DeclareUnifiedTest(Quantile)) { } TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); Args args{{"quantile_alpha", "[0.6, 0.8]"}}; std::unique_ptr obj{ObjFunction::Create("reg:quantileerror", &ctx)}; obj->Configure(args); diff --git a/tests/cpp/objective/test_regression_obj.cc b/tests/cpp/objective/test_regression_obj.cc index b8a40603b..635fae997 100644 --- a/tests/cpp/objective/test_regression_obj.cc +++ b/tests/cpp/objective/test_regression_obj.cc @@ -17,7 +17,7 @@ namespace xgboost { TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj{ObjFunction::Create("reg:squarederror", &ctx)}; @@ -39,7 +39,7 @@ TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) { } TEST(Objective, DeclareUnifiedTest(SquaredLog)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj{ObjFunction::Create("reg:squaredlogerror", &ctx)}; @@ -62,7 +62,7 @@ TEST(Objective, DeclareUnifiedTest(SquaredLog)) { } TEST(Objective, DeclareUnifiedTest(PseudoHuber)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); Args args; std::unique_ptr obj{ObjFunction::Create("reg:pseudohubererror", &ctx)}; @@ -91,7 +91,7 @@ TEST(Objective, DeclareUnifiedTest(PseudoHuber)) { } TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj{ObjFunction::Create("reg:logistic", &ctx)}; @@ -107,7 +107,7 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) { } TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj{ObjFunction::Create("reg:logistic", &ctx)}; @@ -136,7 +136,7 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) { } TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj { ObjFunction::Create("binary:logitraw", &ctx) @@ -152,7 +152,7 @@ TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) { } TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj { ObjFunction::Create("count:poisson", &ctx) @@ -176,7 +176,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) { } TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj { ObjFunction::Create("count:poisson", &ctx) @@ -205,7 +205,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) { } TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj { ObjFunction::Create("reg:gamma", &ctx) @@ -227,7 +227,7 @@ TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) { } TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj{ObjFunction::Create("reg:gamma", &ctx)}; @@ -256,7 +256,7 @@ TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) { } TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj{ObjFunction::Create("reg:tweedie", &ctx)}; @@ -280,7 +280,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) { #if defined(__CUDACC__) TEST(Objective, CPU_vs_CUDA) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); ObjFunction* obj = ObjFunction::Create("reg:squarederror", &ctx); HostDeviceVector cpu_out_preds; @@ -331,7 +331,7 @@ TEST(Objective, CPU_vs_CUDA) { #endif TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj{ObjFunction::Create("reg:tweedie", &ctx)}; @@ -360,7 +360,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) { // CoxRegression not implemented in GPU code, no need for testing. #if !defined(__CUDACC__) TEST(Objective, CoxRegressionGPair) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::vector> args; std::unique_ptr obj{ObjFunction::Create("survival:cox", &ctx)}; @@ -375,7 +375,7 @@ TEST(Objective, CoxRegressionGPair) { #endif TEST(Objective, DeclareUnifiedTest(AbsoluteError)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr obj{ObjFunction::Create("reg:absoluteerror", &ctx)}; obj->Configure({}); CheckConfigReload(obj, "reg:absoluteerror"); @@ -419,7 +419,7 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteError)) { } TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) { - Context ctx = MakeCUDACtx(GPUIDX); + Context ctx = MakeCUDACtx(GetGPUId()); bst_target_t constexpr kTargets = 3, kRows = 16; std::unique_ptr obj{ObjFunction::Create("reg:absoluteerror", &ctx)}; obj->Configure({}); diff --git a/tests/cpp/plugin/test_example_objective.cc b/tests/cpp/plugin/test_example_objective.cc index ccb83c781..29fe2ad2b 100644 --- a/tests/cpp/plugin/test_example_objective.cc +++ b/tests/cpp/plugin/test_example_objective.cc @@ -5,7 +5,7 @@ namespace xgboost { TEST(Plugin, ExampleObjective) { - xgboost::Context ctx = MakeCUDACtx(GPUIDX); + xgboost::Context ctx = MakeCUDACtx(GetGPUId()); auto* obj = xgboost::ObjFunction::Create("mylogistic", &ctx); ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"logloss"}); delete obj; diff --git a/tests/cpp/plugin/test_federated_adapter.cu b/tests/cpp/plugin/test_federated_adapter.cu index 134446f11..75422fcca 100644 --- a/tests/cpp/plugin/test_federated_adapter.cu +++ b/tests/cpp/plugin/test_federated_adapter.cu @@ -12,6 +12,7 @@ #include "../../../src/collective/communicator-inl.cuh" #include "../../../src/collective/device_communicator_adapter.cuh" #include "./helpers.h" +#include "../helpers.h" namespace xgboost::collective { @@ -26,10 +27,12 @@ namespace { void VerifyAllReduceSum() { auto const world_size = collective::GetWorldSize(); auto const rank = collective::GetRank(); + auto const device = GetGPUId(); int count = 3; + common::SetDevice(device); thrust::device_vector buffer(count, 0); thrust::sequence(buffer.begin(), buffer.end()); - collective::AllReduce(rank, buffer.data().get(), count); + collective::AllReduce(device, buffer.data().get(), count); thrust::host_vector host_buffer = buffer; EXPECT_EQ(host_buffer.size(), count); for (auto i = 0; i < count; i++) { @@ -39,10 +42,6 @@ void VerifyAllReduceSum() { } // anonymous namespace TEST_F(FederatedAdapterTest, MGPUAllReduceSum) { - auto const n_gpus = common::AllVisibleGPUs(); - if (n_gpus <= 1) { - GTEST_SKIP() << "Skipping MGPUAllReduceSum test with # GPUs = " << n_gpus; - } RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAllReduceSum); } @@ -50,13 +49,15 @@ namespace { void VerifyAllGatherV() { auto const world_size = collective::GetWorldSize(); auto const rank = collective::GetRank(); + auto const device = GetGPUId(); int const count = rank + 2; + common::SetDevice(device); thrust::device_vector buffer(count, 0); thrust::sequence(buffer.begin(), buffer.end()); std::vector segments(world_size); dh::caching_device_vector receive_buffer{}; - collective::AllGatherV(rank, buffer.data().get(), count, &segments, &receive_buffer); + collective::AllGatherV(device, buffer.data().get(), count, &segments, &receive_buffer); EXPECT_EQ(segments[0], 2); EXPECT_EQ(segments[1], 3); @@ -70,11 +71,6 @@ void VerifyAllGatherV() { } // anonymous namespace TEST_F(FederatedAdapterTest, MGPUAllGatherV) { - auto const n_gpus = common::AllVisibleGPUs(); - if (n_gpus <= 1) { - GTEST_SKIP() << "Skipping MGPUAllGatherV test with # GPUs = " << n_gpus; - } RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAllGatherV); } - } // namespace xgboost::collective diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index be0cad5ce..ecddf2288 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -60,7 +60,7 @@ void VerifyBasicColumnSplit(std::array, 32> const& expected_r auto const world_size = collective::GetWorldSize(); auto const rank = collective::GetRank(); - auto ctx = MakeCUDACtx(rank); + auto ctx = MakeCUDACtx(GetGPUId()); std::unique_ptr predictor = std::unique_ptr(Predictor::Create("gpu_predictor", &ctx)); predictor->Configure({}); @@ -85,12 +85,9 @@ void VerifyBasicColumnSplit(std::array, 32> const& expected_r } } // anonymous namespace -TEST(GPUPredictor, MGPUBasicColumnSplit) { - auto const n_gpus = common::AllVisibleGPUs(); - if (n_gpus <= 1) { - GTEST_SKIP() << "Skipping MGPUIBasicColumnSplit test with # GPUs = " << n_gpus; - } +class MGPUPredictorTest : public BaseMGPUTest {}; +TEST_F(MGPUPredictorTest, BasicColumnSplit) { auto ctx = MakeCUDACtx(0); std::unique_ptr predictor = std::unique_ptr(Predictor::Create("gpu_predictor", &ctx)); @@ -114,7 +111,7 @@ TEST(GPUPredictor, MGPUBasicColumnSplit) { result[i - 1] = out_predictions_h; } - RunWithInMemoryCommunicator(n_gpus, VerifyBasicColumnSplit, result); + DoTest(VerifyBasicColumnSplit, result); } TEST(GPUPredictor, EllpackBasic) { @@ -286,7 +283,7 @@ TEST(GPUPredictor, CategoricalPredictLeaf) { TEST(GPUPredictor, PredictLeafBasic) { size_t constexpr kRows = 5, kCols = 5; auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(0).GenerateDMatrix(); - auto lparam = MakeCUDACtx(GPUIDX); + auto lparam = MakeCUDACtx(GetGPUId()); std::unique_ptr gpu_predictor = std::unique_ptr(Predictor::Create("gpu_predictor", &lparam)); gpu_predictor->Configure({});