From 58d783df16d54361a50b98e211e98a36d4d876ac Mon Sep 17 00:00:00 2001 From: Andy Adinets Date: Tue, 28 Aug 2018 19:50:52 +0200 Subject: [PATCH] Fixed issue 3605. (#3628) * Fixed issue 3605. - https://github.com/dmlc/xgboost/issues/3605 * Fixed the bug in a better way. * Added a test to catch the bug. * Fixed linter errors. --- src/common/hist_util.cu | 12 ++++++------ tests/python-gpu/test_gpu_updaters.py | 2 +- tests/python/regression_test_utilities.py | 12 +++++++++++- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu index 894d87751..0beb4fb55 100644 --- a/src/common/hist_util.cu +++ b/src/common/hist_util.cu @@ -257,13 +257,13 @@ struct GPUSketcher { n_cuts_cur_[icol] = std::min(n_cuts_, n_unique); // if less elements than cuts: copy all elements with their weights if (n_cuts_ > n_unique) { - auto weights2_iter = weights2_.begin(); - auto fvalues_iter = fvalues_cur_.begin(); - auto cuts_iter = cuts_d_.begin() + icol * n_cuts_; + float* weights2_ptr = weights2_.data().get(); + float* fvalues_ptr = fvalues_cur_.data().get(); + WXQSketch::Entry* cuts_ptr = cuts_d_.data().get() + icol * n_cuts_; dh::LaunchN(device_, n_unique, [=]__device__(size_t i) { - bst_float rmax = weights2_iter[i]; - bst_float rmin = i > 0 ? weights2_iter[i - 1] : 0; - cuts_iter[i] = WXQSketch::Entry(rmin, rmax, rmax - rmin, fvalues_iter[i]); + bst_float rmax = weights2_ptr[i]; + bst_float rmin = i > 0 ? weights2_ptr[i - 1] : 0; + cuts_ptr[i] = WXQSketch::Entry(rmin, rmax, rmax - rmin, fvalues_ptr[i]); }); } else if (n_cuts_cur_[icol] > 0) { // if more elements than cuts: use binary search on cumulative weights diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index 2fdbfaee4..6be0d9cc6 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -14,7 +14,7 @@ def assert_gpu_results(cpu_results, gpu_results): assert np.allclose(cpu_res["eval"][-1], gpu_res["eval"][-1], 1e-2, 1e-2) datasets = ["Boston", "Cancer", "Digits", "Sparse regression", - "Sparse regression with weights"] + "Sparse regression with weights", "Small weights regression"] class TestGPU(unittest.TestCase): def test_gpu_exact(self): diff --git a/tests/python/regression_test_utilities.py b/tests/python/regression_test_utilities.py index 6918ed6a7..8c15c5c4e 100644 --- a/tests/python/regression_test_utilities.py +++ b/tests/python/regression_test_utilities.py @@ -55,12 +55,20 @@ def get_sparse(): def get_sparse_weights(): + return get_weights_regression(1, 10) + + +def get_small_weights(): + return get_weights_regression(1e-6, 1e-5) + + +def get_weights_regression(min_weight, max_weight): rng = np.random.RandomState(199) n = 10000 sparsity = 0.25 X, y = datasets.make_regression(n, random_state=rng) X = np.array([[np.nan if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X]) - w = np.array([rng.uniform(1, 10) for i in range(n)]) + w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)]) return X, y, w @@ -130,6 +138,8 @@ def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False): Dataset("Sparse regression", get_sparse, "reg:linear", "rmse"), Dataset("Sparse regression with weights", get_sparse_weights, "reg:linear", "rmse", has_weights=True), + Dataset("Small weights regression", get_small_weights, + "reg:linear", "rmse", has_weights=True), Dataset("Boston External Memory", get_boston, "reg:linear", "rmse", use_external_memory=True) ]