Use hypothesis (#5759)
* Use hypothesis * Allow int64 array interface for groups * Add packages to Windows CI * Add to travis * Make sure device index is set correctly * Fix dask-cudf test * appveyor
This commit is contained in:
@@ -1,30 +1,50 @@
|
||||
import sys
|
||||
import pytest
|
||||
import unittest
|
||||
|
||||
sys.path.append('tests/python/')
|
||||
import test_linear # noqa: E402
|
||||
import testing as tm # noqa: E402
|
||||
from hypothesis import strategies, given, settings, assume
|
||||
import xgboost as xgb
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
|
||||
|
||||
class TestGPULinear(unittest.TestCase):
|
||||
datasets = ["Boston", "Digits", "Cancer", "Sparse regression"]
|
||||
common_param = {
|
||||
'booster': ['gblinear'],
|
||||
'updater': ['gpu_coord_descent'],
|
||||
'eta': [0.5],
|
||||
'top_k': [10],
|
||||
'tolerance': [1e-5],
|
||||
'alpha': [.1],
|
||||
'lambda': [0.005],
|
||||
'coordinate_selection': ['cyclic', 'random', 'greedy']}
|
||||
parameter_strategy = strategies.fixed_dictionaries({
|
||||
'booster': strategies.just('gblinear'),
|
||||
'eta': strategies.floats(0.01, 0.25),
|
||||
'tolerance': strategies.floats(1e-5, 1e-2),
|
||||
'nthread': strategies.integers(1, 4),
|
||||
'feature_selector': strategies.sampled_from(['cyclic', 'shuffle',
|
||||
'greedy', 'thrifty']),
|
||||
'top_k': strategies.integers(1, 10),
|
||||
})
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_gpu_coordinate(self):
|
||||
parameters = self.common_param.copy()
|
||||
parameters['gpu_id'] = [0]
|
||||
for param in test_linear.parameter_combinations(parameters):
|
||||
results = test_linear.run_suite(
|
||||
param, 100, self.datasets, scale_features=True)
|
||||
test_linear.assert_regression_result(results, 1e-2)
|
||||
test_linear.assert_classification_result(results)
|
||||
def train_result(param, dmat, num_rounds):
|
||||
result = {}
|
||||
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
|
||||
evals_result=result)
|
||||
return result
|
||||
|
||||
|
||||
class TestGPULinear:
|
||||
@given(parameter_strategy, strategies.integers(10, 50),
|
||||
tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
def test_gpu_coordinate(self, param, num_rounds, dataset):
|
||||
assume(len(dataset.y) > 0)
|
||||
param['updater'] = 'gpu_coord_descent'
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
|
||||
assert tm.non_increasing(result)
|
||||
|
||||
# Loss is not guaranteed to always decrease because of regularisation parameters
|
||||
# We test a weaker condition that the loss has not increased between the first and last
|
||||
# iteration
|
||||
@given(parameter_strategy, strategies.integers(10, 50),
|
||||
tm.dataset_strategy, strategies.floats(1e-5, 2.0),
|
||||
strategies.floats(1e-5, 2.0))
|
||||
@settings(deadline=None)
|
||||
def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
||||
assume(len(dataset.y) > 0)
|
||||
param['updater'] = 'gpu_coord_descent'
|
||||
param['alpha'] = alpha
|
||||
param['lambda'] = lambd
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
|
||||
assert tm.non_increasing([result[0], result[-1]])
|
||||
|
||||
@@ -4,9 +4,13 @@ import unittest
|
||||
import numpy as np
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pytest
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import XGBClassifier
|
||||
|
||||
@@ -90,7 +94,6 @@ class TestPickling(unittest.TestCase):
|
||||
)
|
||||
status = subprocess.call(args, env=env)
|
||||
assert status == 0
|
||||
|
||||
os.remove(model_path)
|
||||
|
||||
def test_pickled_predictor(self):
|
||||
|
||||
@@ -158,10 +158,10 @@ class TestGPUPredict(unittest.TestCase):
|
||||
rows = 1000
|
||||
cols = 10
|
||||
rng = np.random.RandomState(1994)
|
||||
cp.cuda.runtime.setDevice(0)
|
||||
X = rng.randn(rows, cols)
|
||||
X = pd.DataFrame(X)
|
||||
y = rng.randn(rows)
|
||||
|
||||
X = cudf.from_pandas(X)
|
||||
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
|
||||
@@ -1,74 +1,71 @@
|
||||
import numpy as np
|
||||
import sys
|
||||
import unittest
|
||||
import pytest
|
||||
import xgboost as xgb
|
||||
from hypothesis import given, strategies, assume, settings, note
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
from regression_test_utilities import run_suite, parameter_combinations, \
|
||||
assert_results_non_increasing
|
||||
|
||||
parameter_strategy = strategies.fixed_dictionaries({
|
||||
'max_depth': strategies.integers(0, 11),
|
||||
'max_leaves': strategies.integers(0, 256),
|
||||
'max_bin': strategies.integers(2, 1024),
|
||||
'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
|
||||
'single_precision_histogram': strategies.booleans(),
|
||||
'min_child_weight': strategies.floats(0.5, 2.0),
|
||||
'seed': strategies.integers(0, 10),
|
||||
# We cannot enable subsampling as the training loss can increase
|
||||
# 'subsample': strategies.floats(0.5, 1.0),
|
||||
'colsample_bytree': strategies.floats(0.5, 1.0),
|
||||
'colsample_bylevel': strategies.floats(0.5, 1.0),
|
||||
}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
|
||||
x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
|
||||
|
||||
|
||||
def assert_gpu_results(cpu_results, gpu_results):
|
||||
for cpu_res, gpu_res in zip(cpu_results, gpu_results):
|
||||
# Check final eval result roughly equivalent
|
||||
assert np.allclose(cpu_res["eval"][-1],
|
||||
gpu_res["eval"][-1], 1e-1, 1e-1)
|
||||
def train_result(param, dmat, num_rounds):
|
||||
result = {}
|
||||
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
|
||||
evals_result=result)
|
||||
return result
|
||||
|
||||
|
||||
datasets = ["Boston", "Cancer", "Digits", "Sparse regression",
|
||||
"Sparse regression with weights", "Small weights regression"]
|
||||
|
||||
test_param = parameter_combinations({
|
||||
'gpu_id': [0],
|
||||
'max_depth': [2, 8],
|
||||
'max_leaves': [255, 4],
|
||||
'max_bin': [4, 256],
|
||||
'grow_policy': ['lossguide'],
|
||||
'single_precision_histogram': [True],
|
||||
'min_child_weight': [0],
|
||||
'lambda': [0]})
|
||||
|
||||
|
||||
class TestGPU(unittest.TestCase):
|
||||
def test_gpu_hist(self):
|
||||
for param in test_param:
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
gpu_results = run_suite(param, select_datasets=datasets)
|
||||
assert_results_non_increasing(gpu_results, 1e-2)
|
||||
param['tree_method'] = 'hist'
|
||||
cpu_results = run_suite(param, select_datasets=datasets)
|
||||
assert_gpu_results(cpu_results, gpu_results)
|
||||
class TestGPUUpdaters:
|
||||
@given(parameter_strategy, strategies.integers(1, 20),
|
||||
tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
def test_gpu_hist(self, param, num_rounds, dataset):
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result['train'][dataset.metric])
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_gpu_hist_device_dmatrix(self):
|
||||
# DeviceDMatrix does not currently accept sparse formats
|
||||
device_dmatrix_datasets = ["Boston", "Cancer", "Digits"]
|
||||
for param in test_param:
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
|
||||
gpu_results_device_dmatrix = run_suite(param, select_datasets=device_dmatrix_datasets,
|
||||
DMatrixT=xgb.DeviceQuantileDMatrix,
|
||||
dmatrix_params={'max_bin': param['max_bin']})
|
||||
assert_results_non_increasing(gpu_results_device_dmatrix, 1e-2)
|
||||
gpu_results = run_suite(param, select_datasets=device_dmatrix_datasets)
|
||||
assert_gpu_results(gpu_results, gpu_results_device_dmatrix)
|
||||
@given(parameter_strategy, strategies.integers(1, 20),
|
||||
tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
|
||||
# We cannot handle empty dataset yet
|
||||
assume(len(dataset.y) > 0)
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_device_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result['train'][dataset.metric])
|
||||
|
||||
# NOTE(rongou): Because the `Boston` dataset is too small, this only tests external memory mode
|
||||
# with a single page. To test multiple pages, set DMatrix::kPageSize to, say, 1024.
|
||||
def test_external_memory(self):
|
||||
for param in reversed(test_param):
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
param['gpu_page_size'] = 1024
|
||||
gpu_results = run_suite(param, select_datasets=["Boston"])
|
||||
assert_results_non_increasing(gpu_results, 1e-2)
|
||||
ext_mem_results = run_suite(param, select_datasets=["Boston External Memory"])
|
||||
assert_results_non_increasing(ext_mem_results, 1e-2)
|
||||
assert_gpu_results(gpu_results, ext_mem_results)
|
||||
break
|
||||
@given(parameter_strategy, strategies.integers(1, 20),
|
||||
tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
def test_external_memory(self, param, num_rounds, dataset):
|
||||
# We cannot handle empty dataset yet
|
||||
assume(len(dataset.y) > 0)
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
param = dataset.set_params(param)
|
||||
external_result = train_result(param, dataset.get_external_dmat(), num_rounds)
|
||||
assert tm.non_increasing(external_result['train'][dataset.metric])
|
||||
|
||||
def test_with_empty_dmatrix(self):
|
||||
def test_empty_dmatrix_prediction(self):
|
||||
# FIXME(trivialfis): This should be done with all updaters
|
||||
kRows = 0
|
||||
kCols = 100
|
||||
@@ -94,13 +91,10 @@ class TestGPU(unittest.TestCase):
|
||||
np.testing.assert_allclose(predictions, 0.5, 1e-6)
|
||||
|
||||
@pytest.mark.mgpu
|
||||
def test_specified_gpu_id_gpu_update(self):
|
||||
variable_param = {'gpu_id': [1],
|
||||
'max_depth': [8],
|
||||
'max_leaves': [255, 4],
|
||||
'max_bin': [2, 64],
|
||||
'grow_policy': ['lossguide'],
|
||||
'tree_method': ['gpu_hist']}
|
||||
for param in parameter_combinations(variable_param):
|
||||
gpu_results = run_suite(param, select_datasets=datasets)
|
||||
assert_results_non_increasing(gpu_results, 1e-2)
|
||||
@given(tm.dataset_strategy, strategies.integers(0, 10))
|
||||
@settings(deadline=None, max_examples=10)
|
||||
def test_specified_gpu_id_gpu_update(self, dataset, gpu_id):
|
||||
param = {'tree_method': 'gpu_hist', 'gpu_id': gpu_id}
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), 10)
|
||||
assert tm.non_increasing(result['train'][dataset.metric])
|
||||
|
||||
@@ -31,7 +31,8 @@ class TestDistributedGPU(unittest.TestCase):
|
||||
def test_dask_dataframe(self):
|
||||
with LocalCUDACluster() as cluster:
|
||||
with Client(cluster) as client:
|
||||
import cupy
|
||||
import cupy as cp
|
||||
cp.cuda.runtime.setDevice(0)
|
||||
X, y = generate_array()
|
||||
|
||||
X = dd.from_dask_array(X)
|
||||
@@ -59,8 +60,8 @@ class TestDistributedGPU(unittest.TestCase):
|
||||
single_node = out['booster'].predict(
|
||||
xgboost.DMatrix(X.compute()))
|
||||
|
||||
cupy.testing.assert_allclose(single_node, predictions)
|
||||
cupy.testing.assert_allclose(single_node, series_predictions)
|
||||
cp.testing.assert_allclose(single_node, predictions)
|
||||
np.testing.assert_allclose(single_node, series_predictions.to_array())
|
||||
|
||||
predt = dxgb.predict(client, out, X)
|
||||
assert isinstance(predt, dd.Series)
|
||||
@@ -73,7 +74,7 @@ class TestDistributedGPU(unittest.TestCase):
|
||||
is_df,
|
||||
meta=dd.utils.make_meta({'prediction': 'f4'}))
|
||||
|
||||
cupy.testing.assert_allclose(
|
||||
cp.testing.assert_allclose(
|
||||
predt.values.compute(), single_node)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@@ -81,11 +82,12 @@ class TestDistributedGPU(unittest.TestCase):
|
||||
def test_dask_array(self):
|
||||
with LocalCUDACluster() as cluster:
|
||||
with Client(cluster) as client:
|
||||
import cupy
|
||||
import cupy as cp
|
||||
cp.cuda.runtime.setDevice(0)
|
||||
X, y = generate_array()
|
||||
|
||||
X = X.map_blocks(cupy.asarray)
|
||||
y = y.map_blocks(cupy.asarray)
|
||||
X = X.map_blocks(cp.asarray)
|
||||
y = y.map_blocks(cp.asarray)
|
||||
dtrain = dxgb.DaskDMatrix(client, X, y)
|
||||
out = dxgb.train(client, {'tree_method': 'gpu_hist'},
|
||||
dtrain=dtrain,
|
||||
@@ -97,11 +99,11 @@ class TestDistributedGPU(unittest.TestCase):
|
||||
single_node = out['booster'].predict(
|
||||
xgboost.DMatrix(X.compute()))
|
||||
np.testing.assert_allclose(single_node, from_dmatrix)
|
||||
device = cupy.cuda.runtime.getDevice()
|
||||
device = cp.cuda.runtime.getDevice()
|
||||
assert device == inplace_predictions.device.id
|
||||
single_node = cupy.array(single_node)
|
||||
single_node = cp.array(single_node)
|
||||
assert device == single_node.device.id
|
||||
cupy.testing.assert_allclose(
|
||||
cp.testing.assert_allclose(
|
||||
single_node,
|
||||
inplace_predictions)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user