Fixed issue 3605. (#3628)

* Fixed issue 3605.

- https://github.com/dmlc/xgboost/issues/3605

* Fixed the bug in a better way.

* Added a test to catch the bug.

* Fixed linter errors.
This commit is contained in:
Andy Adinets 2018-08-28 19:50:52 +02:00 committed by Philip Hyunsu Cho
parent 78bea0d204
commit 58d783df16
3 changed files with 18 additions and 8 deletions

View File

@ -257,13 +257,13 @@ struct GPUSketcher {
n_cuts_cur_[icol] = std::min(n_cuts_, n_unique);
// if less elements than cuts: copy all elements with their weights
if (n_cuts_ > n_unique) {
auto weights2_iter = weights2_.begin();
auto fvalues_iter = fvalues_cur_.begin();
auto cuts_iter = cuts_d_.begin() + icol * n_cuts_;
float* weights2_ptr = weights2_.data().get();
float* fvalues_ptr = fvalues_cur_.data().get();
WXQSketch::Entry* cuts_ptr = cuts_d_.data().get() + icol * n_cuts_;
dh::LaunchN(device_, n_unique, [=]__device__(size_t i) {
bst_float rmax = weights2_iter[i];
bst_float rmin = i > 0 ? weights2_iter[i - 1] : 0;
cuts_iter[i] = WXQSketch::Entry(rmin, rmax, rmax - rmin, fvalues_iter[i]);
bst_float rmax = weights2_ptr[i];
bst_float rmin = i > 0 ? weights2_ptr[i - 1] : 0;
cuts_ptr[i] = WXQSketch::Entry(rmin, rmax, rmax - rmin, fvalues_ptr[i]);
});
} else if (n_cuts_cur_[icol] > 0) {
// if more elements than cuts: use binary search on cumulative weights

View File

@ -14,7 +14,7 @@ def assert_gpu_results(cpu_results, gpu_results):
assert np.allclose(cpu_res["eval"][-1], gpu_res["eval"][-1], 1e-2, 1e-2)
datasets = ["Boston", "Cancer", "Digits", "Sparse regression",
"Sparse regression with weights"]
"Sparse regression with weights", "Small weights regression"]
class TestGPU(unittest.TestCase):
def test_gpu_exact(self):

View File

@ -55,12 +55,20 @@ def get_sparse():
def get_sparse_weights():
return get_weights_regression(1, 10)
def get_small_weights():
return get_weights_regression(1e-6, 1e-5)
def get_weights_regression(min_weight, max_weight):
rng = np.random.RandomState(199)
n = 10000
sparsity = 0.25
X, y = datasets.make_regression(n, random_state=rng)
X = np.array([[np.nan if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X])
w = np.array([rng.uniform(1, 10) for i in range(n)])
w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)])
return X, y, w
@ -130,6 +138,8 @@ def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False):
Dataset("Sparse regression", get_sparse, "reg:linear", "rmse"),
Dataset("Sparse regression with weights", get_sparse_weights,
"reg:linear", "rmse", has_weights=True),
Dataset("Small weights regression", get_small_weights,
"reg:linear", "rmse", has_weights=True),
Dataset("Boston External Memory", get_boston, "reg:linear", "rmse",
use_external_memory=True)
]