Use hypothesis (#5759)

* Use hypothesis

* Allow int64 array interface for groups

* Add packages to Windows CI

* Add to travis

* Make sure device index is set correctly

* Fix dask-cudf test

* appveyor
This commit is contained in:
Rory Mitchell
2020-06-16 12:45:59 +12:00
committed by GitHub
parent 02884b08aa
commit b47b5ac771
17 changed files with 411 additions and 439 deletions

View File

@@ -1,74 +1,71 @@
import numpy as np
import sys
import unittest
import pytest
import xgboost as xgb
from hypothesis import given, strategies, assume, settings, note
sys.path.append("tests/python")
import testing as tm
from regression_test_utilities import run_suite, parameter_combinations, \
assert_results_non_increasing
parameter_strategy = strategies.fixed_dictionaries({
'max_depth': strategies.integers(0, 11),
'max_leaves': strategies.integers(0, 256),
'max_bin': strategies.integers(2, 1024),
'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
'single_precision_histogram': strategies.booleans(),
'min_child_weight': strategies.floats(0.5, 2.0),
'seed': strategies.integers(0, 10),
# We cannot enable subsampling as the training loss can increase
# 'subsample': strategies.floats(0.5, 1.0),
'colsample_bytree': strategies.floats(0.5, 1.0),
'colsample_bylevel': strategies.floats(0.5, 1.0),
}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
def assert_gpu_results(cpu_results, gpu_results):
for cpu_res, gpu_res in zip(cpu_results, gpu_results):
# Check final eval result roughly equivalent
assert np.allclose(cpu_res["eval"][-1],
gpu_res["eval"][-1], 1e-1, 1e-1)
def train_result(param, dmat, num_rounds):
result = {}
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
evals_result=result)
return result
datasets = ["Boston", "Cancer", "Digits", "Sparse regression",
"Sparse regression with weights", "Small weights regression"]
test_param = parameter_combinations({
'gpu_id': [0],
'max_depth': [2, 8],
'max_leaves': [255, 4],
'max_bin': [4, 256],
'grow_policy': ['lossguide'],
'single_precision_histogram': [True],
'min_child_weight': [0],
'lambda': [0]})
class TestGPU(unittest.TestCase):
def test_gpu_hist(self):
for param in test_param:
param['tree_method'] = 'gpu_hist'
gpu_results = run_suite(param, select_datasets=datasets)
assert_results_non_increasing(gpu_results, 1e-2)
param['tree_method'] = 'hist'
cpu_results = run_suite(param, select_datasets=datasets)
assert_gpu_results(cpu_results, gpu_results)
class TestGPUUpdaters:
@given(parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@settings(deadline=None)
def test_gpu_hist(self, param, num_rounds, dataset):
param['tree_method'] = 'gpu_hist'
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(result)
assert tm.non_increasing(result['train'][dataset.metric])
@pytest.mark.skipif(**tm.no_cupy())
def test_gpu_hist_device_dmatrix(self):
# DeviceDMatrix does not currently accept sparse formats
device_dmatrix_datasets = ["Boston", "Cancer", "Digits"]
for param in test_param:
param['tree_method'] = 'gpu_hist'
gpu_results_device_dmatrix = run_suite(param, select_datasets=device_dmatrix_datasets,
DMatrixT=xgb.DeviceQuantileDMatrix,
dmatrix_params={'max_bin': param['max_bin']})
assert_results_non_increasing(gpu_results_device_dmatrix, 1e-2)
gpu_results = run_suite(param, select_datasets=device_dmatrix_datasets)
assert_gpu_results(gpu_results, gpu_results_device_dmatrix)
@given(parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@settings(deadline=None)
def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
# We cannot handle empty dataset yet
assume(len(dataset.y) > 0)
param['tree_method'] = 'gpu_hist'
param = dataset.set_params(param)
result = train_result(param, dataset.get_device_dmat(), num_rounds)
note(result)
assert tm.non_increasing(result['train'][dataset.metric])
# NOTE(rongou): Because the `Boston` dataset is too small, this only tests external memory mode
# with a single page. To test multiple pages, set DMatrix::kPageSize to, say, 1024.
def test_external_memory(self):
for param in reversed(test_param):
param['tree_method'] = 'gpu_hist'
param['gpu_page_size'] = 1024
gpu_results = run_suite(param, select_datasets=["Boston"])
assert_results_non_increasing(gpu_results, 1e-2)
ext_mem_results = run_suite(param, select_datasets=["Boston External Memory"])
assert_results_non_increasing(ext_mem_results, 1e-2)
assert_gpu_results(gpu_results, ext_mem_results)
break
@given(parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@settings(deadline=None)
def test_external_memory(self, param, num_rounds, dataset):
# We cannot handle empty dataset yet
assume(len(dataset.y) > 0)
param['tree_method'] = 'gpu_hist'
param = dataset.set_params(param)
external_result = train_result(param, dataset.get_external_dmat(), num_rounds)
assert tm.non_increasing(external_result['train'][dataset.metric])
def test_with_empty_dmatrix(self):
def test_empty_dmatrix_prediction(self):
# FIXME(trivialfis): This should be done with all updaters
kRows = 0
kCols = 100
@@ -94,13 +91,10 @@ class TestGPU(unittest.TestCase):
np.testing.assert_allclose(predictions, 0.5, 1e-6)
@pytest.mark.mgpu
def test_specified_gpu_id_gpu_update(self):
variable_param = {'gpu_id': [1],
'max_depth': [8],
'max_leaves': [255, 4],
'max_bin': [2, 64],
'grow_policy': ['lossguide'],
'tree_method': ['gpu_hist']}
for param in parameter_combinations(variable_param):
gpu_results = run_suite(param, select_datasets=datasets)
assert_results_non_increasing(gpu_results, 1e-2)
@given(tm.dataset_strategy, strategies.integers(0, 10))
@settings(deadline=None, max_examples=10)
def test_specified_gpu_id_gpu_update(self, dataset, gpu_id):
param = {'tree_method': 'gpu_hist', 'gpu_id': gpu_id}
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), 10)
assert tm.non_increasing(result['train'][dataset.metric])