Cover approx tree method for categorical data tests. (#7569)

* Add tree to df tests. * Add plotting tests. * Add histogram tests.
2022-01-16 11:31:40 +08:00 · 2022-01-16 11:31:40 +08:00 · d6ea5cc1ed
commit d6ea5cc1ed
parent 465dc63833
4 changed files with 55 additions and 45 deletions
--- a/tests/python-gpu/test_gpu_parse_tree.py
+++ b/tests/python-gpu/test_gpu_parse_tree.py
@ -1,25 +1,14 @@
 import sys
 import pytest
 import xgboost as xgb
 sys.path.append("tests/python")
-import testing as tm
+from test_parse_tree import TestTreesToDataFrame
 def test_tree_to_df_categorical():
-    X, y = tm.make_categorical(100, 10, 31, False)
+    cputest = TestTreesToDataFrame()
-    Xy = xgb.DMatrix(X, y, enable_categorical=True)
+    cputest.run_tree_to_df_categorical("gpu_hist")
    booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
    df = booster.trees_to_dataframe()
    for _, x in df.iterrows():
        if x["Feature"] != "Leaf":
            assert len(x["Category"]) == 1
 def test_split_value_histograms():
-    X, y = tm.make_categorical(1000, 10, 13, False)
+    cputest = TestTreesToDataFrame()
-    reg = xgb.XGBRegressor(tree_method="gpu_hist", enable_categorical=True)
+    cputest.run_split_value_histograms("gpu_hist")
    reg.fit(X, y)
    with pytest.raises(ValueError, match="doesn't"):
        reg.get_booster().get_split_value_histogram("3", bins=5)
--- a/tests/python-gpu/test_gpu_plotting.py
+++ b/tests/python-gpu/test_gpu_plotting.py
@ -1,40 +1,17 @@
 import sys
 import xgboost as xgb
 import pytest
 import json
 sys.path.append("tests/python")
 import testing as tm
-
+import test_plotting as tp
 try:
    import matplotlib
    matplotlib.use("Agg")
    from matplotlib.axes import Axes
    from graphviz import Source
 except ImportError:
    pass
 pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))
 class TestPlotting:
    cputest = tp.TestPlotting()
    @pytest.mark.skipif(**tm.no_pandas())
    def test_categorical(self):
-        X, y = tm.make_categorical(1000, 31, 19, onehot=False)
+        self.cputest.run_categorical("gpu_hist")
        reg = xgb.XGBRegressor(
            enable_categorical=True, n_estimators=10, tree_method="gpu_hist"
        )
        reg.fit(X, y)
        trees = reg.get_booster().get_dump(dump_format="json")
        for tree in trees:
            j_tree = json.loads(tree)
            assert "leaf" in j_tree.keys() or isinstance(
                j_tree["split_condition"], list
            )
        graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1)
        assert isinstance(graph, Source)
        ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1)
        assert isinstance(ax, Axes)
--- a/tests/python/test_parse_tree.py
+++ b/tests/python/test_parse_tree.py
@ -12,7 +12,6 @@ rng = np.random.RandomState(1994)
 class TestTreesToDataFrame:
    def build_model(self, max_depth, num_round):
        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
        param = {'max_depth': max_depth, 'objective': 'binary:logistic',
@ -48,3 +47,26 @@ class TestTreesToDataFrame:
        # test for equality of covers
        cover_from_df = df.Cover.sum()
        assert np.allclose(cover_from_dump, cover_from_df)
    def run_tree_to_df_categorical(self, tree_method: str) -> None:
        X, y = tm.make_categorical(100, 10, 31, False)
        Xy = xgb.DMatrix(X, y, enable_categorical=True)
        booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
        df = booster.trees_to_dataframe()
        for _, x in df.iterrows():
            if x["Feature"] != "Leaf":
                assert len(x["Category"]) >= 1
    def test_tree_to_df_categorical(self) -> None:
        self.run_tree_to_df_categorical("approx")
    def run_split_value_histograms(self, tree_method) -> None:
        X, y = tm.make_categorical(1000, 10, 13, False)
        reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
        reg.fit(X, y)
        with pytest.raises(ValueError, match="doesn't"):
            reg.get_booster().get_split_value_histogram("3", bins=5)
    def test_split_value_histograms(self):
        self.run_split_value_histograms("approx")
--- a/tests/python/test_plotting.py
+++ b/tests/python/test_plotting.py
@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+import json
 import numpy as np
 import xgboost as xgb
 import testing as tm
@ -73,3 +73,25 @@ class TestPlotting:
        ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
        assert ax.get_xlim() == (0., 5.)
        assert ax.get_ylim() == (10., 71.)
    def run_categorical(self, tree_method: str) -> None:
        X, y = tm.make_categorical(1000, 31, 19, onehot=False)
        reg = xgb.XGBRegressor(
            enable_categorical=True, n_estimators=10, tree_method=tree_method
        )
        reg.fit(X, y)
        trees = reg.get_booster().get_dump(dump_format="json")
        for tree in trees:
            j_tree = json.loads(tree)
            assert "leaf" in j_tree.keys() or isinstance(
                j_tree["split_condition"], list
            )
        graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1)
        assert isinstance(graph, Source)
        ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1)
        assert isinstance(ax, Axes)
    @pytest.mark.skipif(**tm.no_pandas())
    def test_categorical(self) -> None:
        self.run_categorical("approx")