From 4f75f514ce48936e654c6baddccb7fb864e16af2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 17 Mar 2021 06:23:35 +0800 Subject: [PATCH] Fix GPU RF (#6755) * Fix sampling. --- src/tree/updater_gpu_hist.cu | 9 ++++++++- tests/cpp/tree/test_gpu_hist.cu | 5 ++++- tests/python-gpu/test_gpu_with_sklearn.py | 6 +++++- tests/python/test_with_sklearn.py | 15 +++++++++------ 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 14eec53a6..8b46f7468 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -169,6 +169,7 @@ struct GPUHistMakerDevice { std::unique_ptr row_partitioner; DeviceHistogram hist{}; + dh::caching_device_vector d_gpair; // storage for gpair; common::Span gpair; dh::caching_device_vector monotone_constraints; @@ -269,7 +270,13 @@ struct GPUHistMakerDevice { std::fill(node_sum_gradients.begin(), node_sum_gradients.end(), GradientPair()); - auto sample = sampler->Sample(dh_gpair->DeviceSpan(), dmat); + if (d_gpair.size() != dh_gpair->Size()) { + d_gpair.resize(dh_gpair->Size()); + } + thrust::copy(thrust::device, dh_gpair->ConstDevicePointer(), + dh_gpair->ConstDevicePointer() + dh_gpair->Size(), + d_gpair.begin()); + auto sample = sampler->Sample(dh::ToSpan(d_gpair), dmat); page = sample.page; gpair = sample.gpair; diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 37f738c24..5d4f6e864 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -503,12 +503,15 @@ TEST(GpuHist, ExternalMemoryWithSampling) { auto gpair = GenerateRandomGradients(kRows); // Build a tree using the in-memory DMatrix. + auto rng = common::GlobalRandom(); + RegTree tree; HostDeviceVector preds(kRows, 0.0, 0); UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows); // Build another tree using multiple ELLPACK pages. + common::GlobalRandom() = rng; RegTree tree_ext; HostDeviceVector preds_ext(kRows, 0.0, 0); UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, @@ -518,7 +521,7 @@ TEST(GpuHist, ExternalMemoryWithSampling) { auto preds_h = preds.ConstHostVector(); auto preds_ext_h = preds_ext.ConstHostVector(); for (int i = 0; i < kRows; i++) { - EXPECT_NEAR(preds_h[i], preds_ext_h[i], 2e-3); + EXPECT_NEAR(preds_h[i], preds_ext_h[i], 1e-3); } } diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py index bae0a5dd5..8f99cba97 100644 --- a/tests/python-gpu/test_gpu_with_sklearn.py +++ b/tests/python-gpu/test_gpu_with_sklearn.py @@ -33,4 +33,8 @@ def test_gpu_binary_classification(): def test_boost_from_prediction_gpu_hist(): - cpu_test = twskl.run_boost_from_prediction('gpu_hist') + twskl.run_boost_from_prediction('gpu_hist') + + +def test_num_parallel_tree(): + twskl.run_boston_housing_rf_regression("gpu_hist") diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 29eaeb449..c5081fd83 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -357,23 +357,26 @@ def test_boston_housing_regression(): assert mean_squared_error(preds4, labels) < 350 -def test_boston_housing_rf_regression(): +def run_boston_housing_rf_regression(tree_method): from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston from sklearn.model_selection import KFold - boston = load_boston() - y = boston['target'] - X = boston['data'] + X, y = load_boston(return_X_y=True) kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X, y): - xgb_model = xgb.XGBRFRegressor(random_state=42).fit( - X[train_index], y[train_index]) + xgb_model = xgb.XGBRFRegressor(random_state=42, tree_method=tree_method).fit( + X[train_index], y[train_index] + ) preds = xgb_model.predict(X[test_index]) labels = y[test_index] assert mean_squared_error(preds, labels) < 35 +def test_boston_housing_rf_regression(): + run_boston_housing_rf_regression("hist") + + def test_parameter_tuning(): from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_boston