Use hypothesis (#5759)

* Use hypothesis * Allow int64 array interface for groups * Add packages to Windows CI * Add to travis * Make sure device index is set correctly * Fix dask-cudf test * appveyor
2020-06-16 12:45:59 +12:00
parent 02884b08aa
commit b47b5ac771
17 changed files with 411 additions and 439 deletions
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -3,28 +3,57 @@ import unittest
 import pytest
 import xgboost as xgb
 import numpy as np
+from hypothesis import given, strategies, settings, note

-try:
-    from regression_test_utilities import run_suite, parameter_combinations, \
-        assert_results_non_increasing
-except ImportError:
-    None
+exact_parameter_strategy = strategies.fixed_dictionaries({
+    'nthread': strategies.integers(1, 4),
+    'max_depth': strategies.integers(1, 11),
+    'min_child_weight': strategies.floats(0.5, 2.0),
+    'alpha': strategies.floats(0.0, 2.0),
+    'lambda': strategies.floats(1e-5, 2.0),
+    'eta': strategies.floats(0.01, 0.5),
+    'gamma': strategies.floats(0.0, 2.0),
+    'seed': strategies.integers(0, 10),
+    # We cannot enable subsampling as the training loss can increase
+    # 'subsample': strategies.floats(0.5, 1.0),
+    'colsample_bytree': strategies.floats(0.5, 1.0),
+    'colsample_bylevel': strategies.floats(0.5, 1.0),
+})
+
+hist_parameter_strategy = strategies.fixed_dictionaries({
+    'max_depth': strategies.integers(1, 11),
+    'max_leaves': strategies.integers(0, 1024),
+    'max_bin': strategies.integers(2, 512),
+    'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
+}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
+    x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))


-class TestUpdaters(unittest.TestCase):
-    @pytest.mark.skipif(**tm.no_sklearn())
-    def test_histmaker(self):
-        variable_param = {'updater': ['grow_histmaker'], 'max_depth': [2, 8]}
-        for param in parameter_combinations(variable_param):
-            result = run_suite(param)
-            assert_results_non_increasing(result, 1e-2)
+def train_result(param, dmat, num_rounds):
+    result = {}
+    xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
+              evals_result=result)
+    return result

-    @pytest.mark.skipif(**tm.no_sklearn())
-    def test_colmaker(self):
-        variable_param = {'updater': ['grow_colmaker'], 'max_depth': [2, 8]}
-        for param in parameter_combinations(variable_param):
-            result = run_suite(param)
-            assert_results_non_increasing(result, 1e-2)
+
+class TestTreeMethod(unittest.TestCase):
+    @given(exact_parameter_strategy, strategies.integers(1, 20),
+           tm.dataset_strategy)
+    @settings(deadline=None)
+    def test_exact(self, param, num_rounds, dataset):
+        param['tree_method'] = 'exact'
+        param = dataset.set_params(param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        assert tm.non_increasing(result['train'][dataset.metric])
+
+    @given(exact_parameter_strategy, strategies.integers(1, 20),
+           tm.dataset_strategy)
+    @settings(deadline=None)
+    def test_approx(self, param, num_rounds, dataset):
+        param['tree_method'] = 'approx'
+        param = dataset.set_params(param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        assert tm.non_increasing(result['train'][dataset.metric], 1e-3)

    @pytest.mark.skipif(**tm.no_sklearn())
    def test_pruner(self):
@@ -50,19 +79,18 @@ class TestUpdaters(unittest.TestCase):
        # Second prune should not change the tree
        assert after_prune == second_prune

-    @pytest.mark.skipif(**tm.no_sklearn())
-    def test_fast_histmaker(self):
-        variable_param = {'tree_method': ['hist'],
-                          'max_depth': [2, 8],
-                          'max_bin': [2, 256],
-                          'grow_policy': ['depthwise', 'lossguide'],
-                          'max_leaves': [64, 0],
-                          'verbosity': [0],
-                          'single_precision_histogram': [True, False]}
-        for param in parameter_combinations(variable_param):
-            result = run_suite(param)
-            assert_results_non_increasing(result, 1e-2)
+    @given(exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20),
+           tm.dataset_strategy)
+    @settings(deadline=None)
+    def test_hist(self, param, hist_param, num_rounds, dataset):
+        param['tree_method'] = 'hist'
+        param = dataset.set_params(param)
+        param.update(hist_param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        note(result)
+        assert tm.non_increasing(result['train'][dataset.metric])

+    def test_hist_categorical(self):
        # hist must be same as exact on all-categorial data
        dpath = 'demo/data/'
        ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
@@ -87,7 +115,7 @@ class TestUpdaters(unittest.TestCase):
        assert hist_res['test']['auc'] == exact_res['test']['auc']

    @pytest.mark.skipif(**tm.no_sklearn())
-    def test_fast_histmaker_degenerate_case(self):
+    def test_hist_degenerate_case(self):
        # Test a degenerate case where the quantile sketcher won't return any
        # quantile points for a particular feature (the second feature in
        # this example). Source: https://github.com/dmlc/xgboost/issues/2943