From 6d1452074a3399dfd4a8e857cb64de3d705ef480 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 27 Sep 2022 21:18:23 +0800 Subject: [PATCH] Remove MGPU cpp tests. (#8276) Co-authored-by: Hyunsu Philip Cho --- src/common/common.cu | 8 ++++- src/common/common.h | 10 ++++++ src/learner.cc | 2 ++ tests/buildkite/pipeline.yml | 2 +- tests/cpp/common/test_host_device_vector.cu | 20 +++--------- tests/cpp/common/test_transform_range.cu | 35 --------------------- tests/cpp/metric/test_multiclass_metric.cc | 26 --------------- tests/cpp/predictor/test_cpu_predictor.cc | 4 +-- tests/cpp/predictor/test_gpu_predictor.cu | 18 +---------- tests/python-gpu/test_gpu_prediction.py | 31 +++++++++++++----- 10 files changed, 52 insertions(+), 104 deletions(-) delete mode 100644 tests/cpp/common/test_transform_range.cu diff --git a/src/common/common.cu b/src/common/common.cu index 4636a4cdc..b6965904a 100644 --- a/src/common/common.cu +++ b/src/common/common.cu @@ -1,11 +1,17 @@ /*! - * Copyright 2018 XGBoost contributors + * Copyright 2018-2022 XGBoost contributors */ #include "common.h" namespace xgboost { namespace common { +void SetDevice(std::int32_t device) { + if (device >= 0) { + dh::safe_cuda(cudaSetDevice(device)); + } +} + int AllVisibleGPUs() { int n_visgpus = 0; try { diff --git a/src/common/common.h b/src/common/common.h index 1eaf9ae7f..b2d7211c6 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -246,6 +246,16 @@ inline void AssertOneAPISupport() { #endif // XGBOOST_USE_ONEAPI } +void SetDevice(std::int32_t device); + +#if !defined(XGBOOST_USE_CUDA) +inline void SetDevice(std::int32_t device) { + if (device >= 0) { + AssertGPUSupport(); + } +} +#endif + template > diff --git a/src/learner.cc b/src/learner.cc index 2ee83fb71..0d69db764 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -327,6 +327,8 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) { // Just set it to CPU, don't think about it. this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}}); #endif // defined(XGBOOST_USE_CUDA) + + common::SetDevice(this->gpu_id); } int32_t GenericParameter::Threads() const { diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml index 86763e75c..af5d88f53 100644 --- a/tests/buildkite/pipeline.yml +++ b/tests/buildkite/pipeline.yml @@ -78,7 +78,7 @@ steps: command: "tests/buildkite/test-cpp-gpu.sh" key: test-cpp-gpu agents: - queue: linux-amd64-mgpu + queue: linux-amd64-gpu - label: ":console: Run integration tests with JVM packages" command: "tests/buildkite/test-integration-jvm-packages.sh" key: test-integration-jvm-packages diff --git a/tests/cpp/common/test_host_device_vector.cu b/tests/cpp/common/test_host_device_vector.cu index f38038585..ade2537f9 100644 --- a/tests/cpp/common/test_host_device_vector.cu +++ b/tests/cpp/common/test_host_device_vector.cu @@ -11,13 +11,14 @@ namespace xgboost { namespace common { - -void SetDevice(int device) { +namespace { +void SetDeviceForTest(int device) { int n_devices; dh::safe_cuda(cudaGetDeviceCount(&n_devices)); device %= n_devices; dh::safe_cuda(cudaSetDevice(device)); } +} // namespace struct HostDeviceVectorSetDeviceHandler { template @@ -57,7 +58,7 @@ void InitHostDeviceVector(size_t n, int device, HostDeviceVector *v) { void PlusOne(HostDeviceVector *v) { int device = v->DeviceIdx(); - SetDevice(device); + SetDeviceForTest(device); thrust::transform(dh::tcbegin(*v), dh::tcend(*v), dh::tbegin(*v), [=]__device__(unsigned int a){ return a + 1; }); ASSERT_TRUE(v->DeviceCanWrite()); @@ -68,7 +69,7 @@ void CheckDevice(HostDeviceVector* v, unsigned int first, GPUAccess access) { ASSERT_EQ(v->Size(), size); - SetDevice(v->DeviceIdx()); + SetDeviceForTest(v->DeviceIdx()); ASSERT_TRUE(thrust::equal(dh::tcbegin(*v), dh::tcend(*v), thrust::make_counting_iterator(first))); @@ -182,16 +183,5 @@ TEST(HostDeviceVector, Empty) { ASSERT_FALSE(another.Empty()); ASSERT_TRUE(vec.Empty()); } - -TEST(HostDeviceVector, MGPU_Basic) { // NOLINT - if (AllVisibleGPUs() < 2) { - LOG(WARNING) << "Not testing in multi-gpu environment."; - return; - } - - size_t n = 1001; - int device = 1; - TestHostDeviceVector(n, device); -} } // namespace common } // namespace xgboost diff --git a/tests/cpp/common/test_transform_range.cu b/tests/cpp/common/test_transform_range.cu deleted file mode 100644 index 172d7aeb3..000000000 --- a/tests/cpp/common/test_transform_range.cu +++ /dev/null @@ -1,35 +0,0 @@ -/*! - * Copyright 2018-2022 by XGBoost Contributors - * \brief This converts all tests from CPU to GPU. - */ -#include "test_transform_range.cc" - -namespace xgboost { -namespace common { - -TEST(Transform, MGPU_SpecifiedGpuId) { // NOLINT - if (AllVisibleGPUs() < 2) { - LOG(WARNING) << "Not testing in multi-gpu environment."; - return; - } - // Use 1 GPU, Numbering of GPU starts from 1 - auto device = 1; - auto const size {256}; - std::vector h_in(size); - std::vector h_out(size); - std::iota(h_in.begin(), h_in.end(), 0); - std::vector h_sol(size); - std::iota(h_sol.begin(), h_sol.end(), 0); - - const HostDeviceVector in_vec {h_in, device}; - HostDeviceVector out_vec {h_out, device}; - - ASSERT_NO_THROW(Transform<>::Init(TestTransformRange{}, Range{0, size}, - common::OmpGetNumThreads(0), device) - .Eval(&out_vec, &in_vec)); - std::vector res = out_vec.HostVector(); - ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin())); -} - -} // namespace common -} // namespace xgboost diff --git a/tests/cpp/metric/test_multiclass_metric.cc b/tests/cpp/metric/test_multiclass_metric.cc index 80757abb3..a2c4be8fc 100644 --- a/tests/cpp/metric/test_multiclass_metric.cc +++ b/tests/cpp/metric/test_multiclass_metric.cc @@ -84,29 +84,3 @@ TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) { TestMultiClassLogLoss(GPUIDX); xgboost::CheckDeterministicMetricMultiClass(xgboost::StringView{"mlogloss"}, GPUIDX); } - -#if defined(__CUDACC__) -namespace xgboost { -namespace common { -TEST(Metric, MGPU_MultiClassError) { - if (AllVisibleGPUs() < 2) { - LOG(WARNING) << "Not testing in multi-gpu environment."; - return; - } - - { - TestMultiClassError(0); - } - { - TestMultiClassError(1); - } - { - TestMultiClassLogLoss(0); - } - { - TestMultiClassLogLoss(1); - } -} -} // namespace common -} // namespace xgboost -#endif // defined(__CUDACC__) diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc index 8db605be3..137cb36fe 100644 --- a/tests/cpp/predictor/test_cpu_predictor.cc +++ b/tests/cpp/predictor/test_cpu_predictor.cc @@ -172,7 +172,7 @@ TEST(CpuPredictor, InplacePredict) { std::string arr_str; Json::Dump(array_interface, &arr_str); x->SetArrayData(arr_str.data()); - TestInplacePrediction(x, "cpu_predictor", kRows, kCols, -1); + TestInplacePrediction(x, "cpu_predictor", kRows, kCols, Context::kCpuId); } { @@ -189,7 +189,7 @@ TEST(CpuPredictor, InplacePredict) { Json::Dump(col_interface, &col_str); std::shared_ptr x{new data::DMatrixProxy}; x->SetCSRData(rptr_str.data(), col_str.data(), data_str.data(), kCols, true); - TestInplacePrediction(x, "cpu_predictor", kRows, kCols, -1); + TestInplacePrediction(x, "cpu_predictor", kRows, kCols, Context::kCpuId); } } diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index 2a0b69cbd..4a3293dbe 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -140,26 +140,10 @@ TEST(GPUPredictor, InplacePredictCuDF) { TestInplacePrediction(p_fmat, "gpu_predictor", kRows, kCols, 0); } -TEST(GPUPredictor, MGPU_InplacePredict) { // NOLINT - int32_t n_gpus = xgboost::common::AllVisibleGPUs(); - if (n_gpus <= 1) { - LOG(WARNING) << "GPUPredictor.MGPU_InplacePredict is skipped."; - return; - } - size_t constexpr kRows{128}, kCols{64}; - RandomDataGenerator gen(kRows, kCols, 0.5); - gen.Device(1); - HostDeviceVector data; - std::string interface_str = gen.GenerateArrayInterface(&data); - std::shared_ptr p_fmat{new data::DMatrixProxy}; - dynamic_cast(p_fmat.get())->SetCUDAArray(interface_str.c_str()); - TestInplacePrediction(p_fmat, "gpu_predictor", kRows, kCols, 1); - EXPECT_THROW(TestInplacePrediction(p_fmat, "gpu_predictor", kRows, kCols, 0), dmlc::Error); -} - TEST(GpuPredictor, LesserFeatures) { TestPredictionWithLesserFeatures("gpu_predictor"); } + // Very basic test of empty model TEST(GPUPredictor, ShapStump) { cudaSetDevice(0); diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index 4e41e637f..8976113ca 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -148,10 +148,9 @@ class TestGPUPredict: from_dmatrix = booster.predict(dtrain) cp.testing.assert_allclose(from_inplace, from_dmatrix) - @pytest.mark.skipif(**tm.no_cupy()) - def test_inplace_predict_cupy(self): + def run_inplace_predict_cupy(self, device: int) -> None: import cupy as cp - cp.cuda.runtime.setDevice(0) + cp.cuda.runtime.setDevice(device) rows = 1000 cols = 10 missing = 11 # set to integer for testing @@ -166,15 +165,17 @@ class TestGPUPredict: dtrain = xgb.DMatrix(X, y) - booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10) + booster = xgb.train( + {'tree_method': 'gpu_hist', "gpu_id": device}, dtrain, num_boost_round=10 + ) test = xgb.DMatrix(X[:10, ...], missing=missing) predt_from_array = booster.inplace_predict(X[:10, ...], missing=missing) predt_from_dmatrix = booster.predict(test) - cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix) def predict_dense(x): + cp.cuda.runtime.setDevice(device) inplace_predt = booster.inplace_predict(x) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) @@ -183,7 +184,8 @@ class TestGPUPredict: # Don't do this on Windows, see issue #5793 if sys.platform.startswith("win"): pytest.skip( - 'Multi-threaded in-place prediction with cuPy is not working on Windows') + 'Multi-threaded in-place prediction with cuPy is not working on Windows' + ) for i in range(10): run_threaded_predict(X, rows, predict_dense) @@ -196,13 +198,28 @@ class TestGPUPredict: missing_idx = [i for i in range(0, X.shape[1], 16)] X[:, missing_idx] = missing - reg = xgb.XGBRegressor(tree_method="gpu_hist", n_estimators=8, missing=missing) + reg = xgb.XGBRegressor( + tree_method="gpu_hist", n_estimators=8, missing=missing, gpu_id=device + ) reg.fit(X, y) gpu_predt = reg.predict(X) reg.set_params(predictor="cpu_predictor") cpu_predt = reg.predict(X) np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6) + cp.cuda.runtime.setDevice(0) + + @pytest.mark.skipif(**tm.no_cupy()) + def test_inplace_predict_cupy(self): + self.run_inplace_predict_cupy(0) + + @pytest.mark.skipif(**tm.no_cupy()) + @pytest.mark.mgpu + def test_inplace_predict_cupy_specified_device(self): + import cupy as cp + n_devices = cp.cuda.runtime.getDeviceCount() + for d in range(n_devices): + self.run_inplace_predict_cupy(d) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cudf())