Cover approx tree method for categorical data tests. (#7569)
* Add tree to df tests. * Add plotting tests. * Add histogram tests.
This commit is contained in:
parent
465dc63833
commit
d6ea5cc1ed
@ -1,25 +1,14 @@
|
||||
import sys
|
||||
import pytest
|
||||
import xgboost as xgb
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
from test_parse_tree import TestTreesToDataFrame
|
||||
|
||||
|
||||
def test_tree_to_df_categorical():
|
||||
X, y = tm.make_categorical(100, 10, 31, False)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
|
||||
df = booster.trees_to_dataframe()
|
||||
for _, x in df.iterrows():
|
||||
if x["Feature"] != "Leaf":
|
||||
assert len(x["Category"]) == 1
|
||||
cputest = TestTreesToDataFrame()
|
||||
cputest.run_tree_to_df_categorical("gpu_hist")
|
||||
|
||||
|
||||
def test_split_value_histograms():
|
||||
X, y = tm.make_categorical(1000, 10, 13, False)
|
||||
reg = xgb.XGBRegressor(tree_method="gpu_hist", enable_categorical=True)
|
||||
reg.fit(X, y)
|
||||
|
||||
with pytest.raises(ValueError, match="doesn't"):
|
||||
reg.get_booster().get_split_value_histogram("3", bins=5)
|
||||
cputest = TestTreesToDataFrame()
|
||||
cputest.run_split_value_histograms("gpu_hist")
|
||||
|
||||
@ -1,40 +1,17 @@
|
||||
import sys
|
||||
import xgboost as xgb
|
||||
import pytest
|
||||
import json
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
|
||||
try:
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use("Agg")
|
||||
from matplotlib.axes import Axes
|
||||
from graphviz import Source
|
||||
except ImportError:
|
||||
pass
|
||||
import test_plotting as tp
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))
|
||||
|
||||
|
||||
class TestPlotting:
|
||||
cputest = tp.TestPlotting()
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_categorical(self):
|
||||
X, y = tm.make_categorical(1000, 31, 19, onehot=False)
|
||||
reg = xgb.XGBRegressor(
|
||||
enable_categorical=True, n_estimators=10, tree_method="gpu_hist"
|
||||
)
|
||||
reg.fit(X, y)
|
||||
trees = reg.get_booster().get_dump(dump_format="json")
|
||||
for tree in trees:
|
||||
j_tree = json.loads(tree)
|
||||
assert "leaf" in j_tree.keys() or isinstance(
|
||||
j_tree["split_condition"], list
|
||||
)
|
||||
|
||||
graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1)
|
||||
assert isinstance(graph, Source)
|
||||
ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1)
|
||||
assert isinstance(ax, Axes)
|
||||
self.cputest.run_categorical("gpu_hist")
|
||||
|
||||
@ -12,7 +12,6 @@ rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
class TestTreesToDataFrame:
|
||||
|
||||
def build_model(self, max_depth, num_round):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
param = {'max_depth': max_depth, 'objective': 'binary:logistic',
|
||||
@ -48,3 +47,26 @@ class TestTreesToDataFrame:
|
||||
# test for equality of covers
|
||||
cover_from_df = df.Cover.sum()
|
||||
assert np.allclose(cover_from_dump, cover_from_df)
|
||||
|
||||
def run_tree_to_df_categorical(self, tree_method: str) -> None:
|
||||
X, y = tm.make_categorical(100, 10, 31, False)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
|
||||
df = booster.trees_to_dataframe()
|
||||
for _, x in df.iterrows():
|
||||
if x["Feature"] != "Leaf":
|
||||
assert len(x["Category"]) >= 1
|
||||
|
||||
def test_tree_to_df_categorical(self) -> None:
|
||||
self.run_tree_to_df_categorical("approx")
|
||||
|
||||
def run_split_value_histograms(self, tree_method) -> None:
|
||||
X, y = tm.make_categorical(1000, 10, 13, False)
|
||||
reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
|
||||
reg.fit(X, y)
|
||||
|
||||
with pytest.raises(ValueError, match="doesn't"):
|
||||
reg.get_booster().get_split_value_histogram("3", bins=5)
|
||||
|
||||
def test_split_value_histograms(self):
|
||||
self.run_split_value_histograms("approx")
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import json
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import testing as tm
|
||||
@ -73,3 +73,25 @@ class TestPlotting:
|
||||
ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
|
||||
assert ax.get_xlim() == (0., 5.)
|
||||
assert ax.get_ylim() == (10., 71.)
|
||||
|
||||
def run_categorical(self, tree_method: str) -> None:
|
||||
X, y = tm.make_categorical(1000, 31, 19, onehot=False)
|
||||
reg = xgb.XGBRegressor(
|
||||
enable_categorical=True, n_estimators=10, tree_method=tree_method
|
||||
)
|
||||
reg.fit(X, y)
|
||||
trees = reg.get_booster().get_dump(dump_format="json")
|
||||
for tree in trees:
|
||||
j_tree = json.loads(tree)
|
||||
assert "leaf" in j_tree.keys() or isinstance(
|
||||
j_tree["split_condition"], list
|
||||
)
|
||||
|
||||
graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1)
|
||||
assert isinstance(graph, Source)
|
||||
ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1)
|
||||
assert isinstance(ax, Axes)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_categorical(self) -> None:
|
||||
self.run_categorical("approx")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user