More categorical tests and disable shap sparse test. (#6219)

* Fix tree load with 32 category.
2020-10-10 16:12:37 +08:00
parent c991eb612d
commit b5b24354b8
5 changed files with 120 additions and 27 deletions
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -212,6 +212,10 @@ class TestGPUPredict(unittest.TestCase):
           tm.dataset_strategy, shap_parameter_strategy)
    @settings(deadline=None, max_examples=20)
    def test_shap_interactions(self, num_rounds, dataset, param):
+        if dataset.name == 'sparse':
+            issue = 'https://github.com/dmlc/xgboost/issues/6074'
+            pytest.xfail(reason=f'GPU shap with sparse is flaky: {issue}')
+
        param.update({"predictor": "gpu_predictor", "gpu_id": 0})
        param = dataset.set_params(param)
        dmat = dataset.get_dmat()
@@ -220,5 +224,6 @@ class TestGPUPredict(unittest.TestCase):
        shap = bst.predict(test_dmat, pred_interactions=True)
        margin = bst.predict(test_dmat, output_margin=True)
        assume(len(dataset.y) > 0)
-        assert np.allclose(np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)), margin,
+        assert np.allclose(np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
+                           margin,
                           1e-3, 1e-3)
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -41,7 +41,24 @@ class TestGPUUpdaters:
        note(result)
        assert tm.non_increasing(result['train'][dataset.metric])

-    def run_categorical_basic(self, cat, onehot, label, rounds):
+    def run_categorical_basic(self, rows, cols, rounds, cats):
+        import pandas as pd
+        rng = np.random.RandomState(1994)
+
+        pd_dict = {}
+        for i in range(cols):
+            c = rng.randint(low=0, high=cats+1, size=rows)
+            pd_dict[str(i)] = pd.Series(c, dtype=np.int64)
+
+        df = pd.DataFrame(pd_dict)
+        label = df.iloc[:, 0]
+        for i in range(0, cols-1):
+            label += df.iloc[:, i]
+        label += 1
+        df = df.astype('category')
+        onehot = pd.get_dummies(df)
+        cat = df
+
        by_etl_results = {}
        by_builtin_results = {}

@@ -64,28 +81,20 @@ class TestGPUUpdaters:
            rtol=1e-3)
        assert tm.non_increasing(by_builtin_results['Train']['rmse'])

-    @given(strategies.integers(10, 400), strategies.integers(5, 10),
-           strategies.integers(1, 5), strategies.integers(4, 8))
+    @given(strategies.integers(10, 400), strategies.integers(3, 8),
+           strategies.integers(1, 5), strategies.integers(4, 7))
    @settings(deadline=None)
    @pytest.mark.skipif(**tm.no_pandas())
    def test_categorical(self, rows, cols, rounds, cats):
-        import pandas as pd
-        rng = np.random.RandomState(1994)
+        self.run_categorical_basic(rows, cols, rounds, cats)

-        pd_dict = {}
-        for i in range(cols):
-            c = rng.randint(low=0, high=cats+1, size=rows)
-            pd_dict[str(i)] = pd.Series(c, dtype=np.int64)
-
-        df = pd.DataFrame(pd_dict)
-        label = df.iloc[:, 0]
-        for i in range(0, cols-1):
-            label += df.iloc[:, i]
-        label += 1
-        df = df.astype('category')
-        x = pd.get_dummies(df)
-
-        self.run_categorical_basic(df, x, label, rounds)
+    def test_categorical_32_cat(self):
+        '''32 hits the bound of integer bitset, so special test'''
+        rows = 1000
+        cols = 10
+        cats = 32
+        rounds = 4
+        self.run_categorical_basic(rows, cols, rounds, cats)

    @pytest.mark.skipif(**tm.no_cupy())
    @given(parameter_strategy, strategies.integers(1, 20),