Use hypothesis (#5759)
* Use hypothesis * Allow int64 array interface for groups * Add packages to Windows CI * Add to travis * Make sure device index is set correctly * Fix dask-cudf test * appveyor
This commit is contained in:
@@ -3,28 +3,57 @@ import unittest
|
||||
import pytest
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
from hypothesis import given, strategies, settings, note
|
||||
|
||||
try:
|
||||
from regression_test_utilities import run_suite, parameter_combinations, \
|
||||
assert_results_non_increasing
|
||||
except ImportError:
|
||||
None
|
||||
exact_parameter_strategy = strategies.fixed_dictionaries({
|
||||
'nthread': strategies.integers(1, 4),
|
||||
'max_depth': strategies.integers(1, 11),
|
||||
'min_child_weight': strategies.floats(0.5, 2.0),
|
||||
'alpha': strategies.floats(0.0, 2.0),
|
||||
'lambda': strategies.floats(1e-5, 2.0),
|
||||
'eta': strategies.floats(0.01, 0.5),
|
||||
'gamma': strategies.floats(0.0, 2.0),
|
||||
'seed': strategies.integers(0, 10),
|
||||
# We cannot enable subsampling as the training loss can increase
|
||||
# 'subsample': strategies.floats(0.5, 1.0),
|
||||
'colsample_bytree': strategies.floats(0.5, 1.0),
|
||||
'colsample_bylevel': strategies.floats(0.5, 1.0),
|
||||
})
|
||||
|
||||
hist_parameter_strategy = strategies.fixed_dictionaries({
|
||||
'max_depth': strategies.integers(1, 11),
|
||||
'max_leaves': strategies.integers(0, 1024),
|
||||
'max_bin': strategies.integers(2, 512),
|
||||
'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
|
||||
}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
|
||||
x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
|
||||
|
||||
|
||||
class TestUpdaters(unittest.TestCase):
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_histmaker(self):
|
||||
variable_param = {'updater': ['grow_histmaker'], 'max_depth': [2, 8]}
|
||||
for param in parameter_combinations(variable_param):
|
||||
result = run_suite(param)
|
||||
assert_results_non_increasing(result, 1e-2)
|
||||
def train_result(param, dmat, num_rounds):
|
||||
result = {}
|
||||
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
|
||||
evals_result=result)
|
||||
return result
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_colmaker(self):
|
||||
variable_param = {'updater': ['grow_colmaker'], 'max_depth': [2, 8]}
|
||||
for param in parameter_combinations(variable_param):
|
||||
result = run_suite(param)
|
||||
assert_results_non_increasing(result, 1e-2)
|
||||
|
||||
class TestTreeMethod(unittest.TestCase):
|
||||
@given(exact_parameter_strategy, strategies.integers(1, 20),
|
||||
tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
def test_exact(self, param, num_rounds, dataset):
|
||||
param['tree_method'] = 'exact'
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
assert tm.non_increasing(result['train'][dataset.metric])
|
||||
|
||||
@given(exact_parameter_strategy, strategies.integers(1, 20),
|
||||
tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
def test_approx(self, param, num_rounds, dataset):
|
||||
param['tree_method'] = 'approx'
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
assert tm.non_increasing(result['train'][dataset.metric], 1e-3)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_pruner(self):
|
||||
@@ -50,19 +79,18 @@ class TestUpdaters(unittest.TestCase):
|
||||
# Second prune should not change the tree
|
||||
assert after_prune == second_prune
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_fast_histmaker(self):
|
||||
variable_param = {'tree_method': ['hist'],
|
||||
'max_depth': [2, 8],
|
||||
'max_bin': [2, 256],
|
||||
'grow_policy': ['depthwise', 'lossguide'],
|
||||
'max_leaves': [64, 0],
|
||||
'verbosity': [0],
|
||||
'single_precision_histogram': [True, False]}
|
||||
for param in parameter_combinations(variable_param):
|
||||
result = run_suite(param)
|
||||
assert_results_non_increasing(result, 1e-2)
|
||||
@given(exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20),
|
||||
tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
def test_hist(self, param, hist_param, num_rounds, dataset):
|
||||
param['tree_method'] = 'hist'
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result['train'][dataset.metric])
|
||||
|
||||
def test_hist_categorical(self):
|
||||
# hist must be same as exact on all-categorial data
|
||||
dpath = 'demo/data/'
|
||||
ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
@@ -87,7 +115,7 @@ class TestUpdaters(unittest.TestCase):
|
||||
assert hist_res['test']['auc'] == exact_res['test']['auc']
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_fast_histmaker_degenerate_case(self):
|
||||
def test_hist_degenerate_case(self):
|
||||
# Test a degenerate case where the quantile sketcher won't return any
|
||||
# quantile points for a particular feature (the second feature in
|
||||
# this example). Source: https://github.com/dmlc/xgboost/issues/2943
|
||||
|
||||
Reference in New Issue
Block a user