[backport] Use maximum category in sketch. (#7853) (#7866)

This commit is contained in:
Jiaming Yuan
2022-05-06 21:11:33 +08:00
committed by GitHub
parent b1b6246e35
commit c2508814ff
6 changed files with 178 additions and 78 deletions

View File

@@ -1,3 +1,5 @@
from random import choice
from string import ascii_lowercase
import testing as tm
import pytest
import xgboost as xgb
@@ -167,6 +169,30 @@ class TestTreeMethod:
def test_invalid_category(self) -> None:
self.run_invalid_category("approx")
self.run_invalid_category("hist")
def run_max_cat(self, tree_method: str) -> None:
"""Test data with size smaller than number of categories."""
import pandas as pd
n_cat = 100
n = 5
X = pd.Series(
["".join(choice(ascii_lowercase) for i in range(3)) for i in range(n_cat)],
dtype="category",
)[:n].to_frame()
reg = xgb.XGBRegressor(
enable_categorical=True,
tree_method=tree_method,
n_estimators=10,
)
y = pd.Series(range(n))
reg.fit(X=X, y=y, eval_set=[(X, y)])
assert tm.non_increasing(reg.evals_result()["validation_0"]["rmse"])
@pytest.mark.parametrize("tree_method", ["hist", "approx"])
def test_max_cat(self, tree_method) -> None:
self.run_max_cat(tree_method)
def run_categorical_basic(self, rows, cols, rounds, cats, tree_method):
onehot, label = tm.make_categorical(rows, cols, cats, True)