Use hypothesis (#5759)

* Use hypothesis

* Allow int64 array interface for groups

* Add packages to Windows CI

* Add to travis

* Make sure device index is set correctly

* Fix dask-cudf test

* appveyor
This commit is contained in:
Rory Mitchell
2020-06-16 12:45:59 +12:00
committed by GitHub
parent 02884b08aa
commit b47b5ac771
17 changed files with 411 additions and 439 deletions

View File

@@ -3,28 +3,57 @@ import unittest
import pytest
import xgboost as xgb
import numpy as np
from hypothesis import given, strategies, settings, note
try:
from regression_test_utilities import run_suite, parameter_combinations, \
assert_results_non_increasing
except ImportError:
None
exact_parameter_strategy = strategies.fixed_dictionaries({
'nthread': strategies.integers(1, 4),
'max_depth': strategies.integers(1, 11),
'min_child_weight': strategies.floats(0.5, 2.0),
'alpha': strategies.floats(0.0, 2.0),
'lambda': strategies.floats(1e-5, 2.0),
'eta': strategies.floats(0.01, 0.5),
'gamma': strategies.floats(0.0, 2.0),
'seed': strategies.integers(0, 10),
# We cannot enable subsampling as the training loss can increase
# 'subsample': strategies.floats(0.5, 1.0),
'colsample_bytree': strategies.floats(0.5, 1.0),
'colsample_bylevel': strategies.floats(0.5, 1.0),
})
hist_parameter_strategy = strategies.fixed_dictionaries({
'max_depth': strategies.integers(1, 11),
'max_leaves': strategies.integers(0, 1024),
'max_bin': strategies.integers(2, 512),
'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
class TestUpdaters(unittest.TestCase):
@pytest.mark.skipif(**tm.no_sklearn())
def test_histmaker(self):
variable_param = {'updater': ['grow_histmaker'], 'max_depth': [2, 8]}
for param in parameter_combinations(variable_param):
result = run_suite(param)
assert_results_non_increasing(result, 1e-2)
def train_result(param, dmat, num_rounds):
result = {}
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
evals_result=result)
return result
@pytest.mark.skipif(**tm.no_sklearn())
def test_colmaker(self):
variable_param = {'updater': ['grow_colmaker'], 'max_depth': [2, 8]}
for param in parameter_combinations(variable_param):
result = run_suite(param)
assert_results_non_increasing(result, 1e-2)
class TestTreeMethod(unittest.TestCase):
@given(exact_parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@settings(deadline=None)
def test_exact(self, param, num_rounds, dataset):
param['tree_method'] = 'exact'
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
assert tm.non_increasing(result['train'][dataset.metric])
@given(exact_parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@settings(deadline=None)
def test_approx(self, param, num_rounds, dataset):
param['tree_method'] = 'approx'
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
assert tm.non_increasing(result['train'][dataset.metric], 1e-3)
@pytest.mark.skipif(**tm.no_sklearn())
def test_pruner(self):
@@ -50,19 +79,18 @@ class TestUpdaters(unittest.TestCase):
# Second prune should not change the tree
assert after_prune == second_prune
@pytest.mark.skipif(**tm.no_sklearn())
def test_fast_histmaker(self):
variable_param = {'tree_method': ['hist'],
'max_depth': [2, 8],
'max_bin': [2, 256],
'grow_policy': ['depthwise', 'lossguide'],
'max_leaves': [64, 0],
'verbosity': [0],
'single_precision_histogram': [True, False]}
for param in parameter_combinations(variable_param):
result = run_suite(param)
assert_results_non_increasing(result, 1e-2)
@given(exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@settings(deadline=None)
def test_hist(self, param, hist_param, num_rounds, dataset):
param['tree_method'] = 'hist'
param = dataset.set_params(param)
param.update(hist_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(result)
assert tm.non_increasing(result['train'][dataset.metric])
def test_hist_categorical(self):
# hist must be same as exact on all-categorial data
dpath = 'demo/data/'
ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
@@ -87,7 +115,7 @@ class TestUpdaters(unittest.TestCase):
assert hist_res['test']['auc'] == exact_res['test']['auc']
@pytest.mark.skipif(**tm.no_sklearn())
def test_fast_histmaker_degenerate_case(self):
def test_hist_degenerate_case(self):
# Test a degenerate case where the quantile sketcher won't return any
# quantile points for a particular feature (the second feature in
# this example). Source: https://github.com/dmlc/xgboost/issues/2943