Cover approx tree method for categorical data tests. (#7569)

* Add tree to df tests.
* Add plotting tests.
* Add histogram tests.
This commit is contained in:
Jiaming Yuan 2022-01-16 11:31:40 +08:00 committed by GitHub
parent 465dc63833
commit d6ea5cc1ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 55 additions and 45 deletions

View File

@ -1,25 +1,14 @@
import sys import sys
import pytest
import xgboost as xgb
sys.path.append("tests/python") sys.path.append("tests/python")
import testing as tm from test_parse_tree import TestTreesToDataFrame
def test_tree_to_df_categorical(): def test_tree_to_df_categorical():
X, y = tm.make_categorical(100, 10, 31, False) cputest = TestTreesToDataFrame()
Xy = xgb.DMatrix(X, y, enable_categorical=True) cputest.run_tree_to_df_categorical("gpu_hist")
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
df = booster.trees_to_dataframe()
for _, x in df.iterrows():
if x["Feature"] != "Leaf":
assert len(x["Category"]) == 1
def test_split_value_histograms(): def test_split_value_histograms():
X, y = tm.make_categorical(1000, 10, 13, False) cputest = TestTreesToDataFrame()
reg = xgb.XGBRegressor(tree_method="gpu_hist", enable_categorical=True) cputest.run_split_value_histograms("gpu_hist")
reg.fit(X, y)
with pytest.raises(ValueError, match="doesn't"):
reg.get_booster().get_split_value_histogram("3", bins=5)

View File

@ -1,40 +1,17 @@
import sys import sys
import xgboost as xgb
import pytest import pytest
import json
sys.path.append("tests/python") sys.path.append("tests/python")
import testing as tm import testing as tm
import test_plotting as tp
try:
import matplotlib
matplotlib.use("Agg")
from matplotlib.axes import Axes
from graphviz import Source
except ImportError:
pass
pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz())) pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))
class TestPlotting: class TestPlotting:
cputest = tp.TestPlotting()
@pytest.mark.skipif(**tm.no_pandas()) @pytest.mark.skipif(**tm.no_pandas())
def test_categorical(self): def test_categorical(self):
X, y = tm.make_categorical(1000, 31, 19, onehot=False) self.cputest.run_categorical("gpu_hist")
reg = xgb.XGBRegressor(
enable_categorical=True, n_estimators=10, tree_method="gpu_hist"
)
reg.fit(X, y)
trees = reg.get_booster().get_dump(dump_format="json")
for tree in trees:
j_tree = json.loads(tree)
assert "leaf" in j_tree.keys() or isinstance(
j_tree["split_condition"], list
)
graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1)
assert isinstance(graph, Source)
ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1)
assert isinstance(ax, Axes)

View File

@ -12,7 +12,6 @@ rng = np.random.RandomState(1994)
class TestTreesToDataFrame: class TestTreesToDataFrame:
def build_model(self, max_depth, num_round): def build_model(self, max_depth, num_round):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
param = {'max_depth': max_depth, 'objective': 'binary:logistic', param = {'max_depth': max_depth, 'objective': 'binary:logistic',
@ -48,3 +47,26 @@ class TestTreesToDataFrame:
# test for equality of covers # test for equality of covers
cover_from_df = df.Cover.sum() cover_from_df = df.Cover.sum()
assert np.allclose(cover_from_dump, cover_from_df) assert np.allclose(cover_from_dump, cover_from_df)
def run_tree_to_df_categorical(self, tree_method: str) -> None:
X, y = tm.make_categorical(100, 10, 31, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
df = booster.trees_to_dataframe()
for _, x in df.iterrows():
if x["Feature"] != "Leaf":
assert len(x["Category"]) >= 1
def test_tree_to_df_categorical(self) -> None:
self.run_tree_to_df_categorical("approx")
def run_split_value_histograms(self, tree_method) -> None:
X, y = tm.make_categorical(1000, 10, 13, False)
reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
reg.fit(X, y)
with pytest.raises(ValueError, match="doesn't"):
reg.get_booster().get_split_value_histogram("3", bins=5)
def test_split_value_histograms(self):
self.run_split_value_histograms("approx")

View File

@ -1,4 +1,4 @@
# -*- coding: utf-8 -*- import json
import numpy as np import numpy as np
import xgboost as xgb import xgboost as xgb
import testing as tm import testing as tm
@ -73,3 +73,25 @@ class TestPlotting:
ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71)) ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
assert ax.get_xlim() == (0., 5.) assert ax.get_xlim() == (0., 5.)
assert ax.get_ylim() == (10., 71.) assert ax.get_ylim() == (10., 71.)
def run_categorical(self, tree_method: str) -> None:
X, y = tm.make_categorical(1000, 31, 19, onehot=False)
reg = xgb.XGBRegressor(
enable_categorical=True, n_estimators=10, tree_method=tree_method
)
reg.fit(X, y)
trees = reg.get_booster().get_dump(dump_format="json")
for tree in trees:
j_tree = json.loads(tree)
assert "leaf" in j_tree.keys() or isinstance(
j_tree["split_condition"], list
)
graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1)
assert isinstance(graph, Source)
ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1)
assert isinstance(ax, Axes)
@pytest.mark.skipif(**tm.no_pandas())
def test_categorical(self) -> None:
self.run_categorical("approx")