From e964654b8fcbfe36cacfe66f044d5d52ea135eb5 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 3 Jul 2023 22:06:17 +0800 Subject: [PATCH] [skl] Enable cat feature without specifying tree method. (#9353) --- python-package/xgboost/sklearn.py | 3 +-- tests/python/test_with_sklearn.py | 3 +-- tests/test_distributed/test_with_dask/test_with_dask.py | 6 +++--- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 440cd34be..a46ba14d0 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -930,8 +930,7 @@ class XGBModel(XGBModelBase): callbacks = self.callbacks if self.callbacks is not None else callbacks tree_method = params.get("tree_method", None) - cat_support = {"gpu_hist", "approx", "hist"} - if self.enable_categorical and tree_method not in cat_support: + if self.enable_categorical and tree_method == "exact": raise ValueError( "Experimental support for categorical data is not implemented for" " current tree method yet." diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index f897d8afc..b4550dab2 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -1390,7 +1390,6 @@ def test_categorical(): X, y = tm.make_categorical(n_samples=32, n_features=2, n_categories=3, onehot=False) ft = ["c"] * X.shape[1] reg = xgb.XGBRegressor( - tree_method="hist", feature_types=ft, max_cat_to_onehot=1, enable_categorical=True, @@ -1409,7 +1408,7 @@ def test_categorical(): onehot, y = tm.make_categorical( n_samples=32, n_features=2, n_categories=3, onehot=True ) - reg = xgb.XGBRegressor(tree_method="hist") + reg = xgb.XGBRegressor() reg.fit(onehot, y, eval_set=[(onehot, y)]) from_enc = reg.evals_result()["validation_0"]["rmse"] predt_enc = reg.predict(onehot) diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index d6075481f..cab4188a8 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -308,7 +308,7 @@ def test_dask_sparse(client: "Client") -> None: def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None: - parameters = {"tree_method": tree_method, "max_cat_to_onehot": 9999} # force onehot + parameters = {"tree_method": tree_method, "max_cat_to_onehot": 9999} # force onehot rounds = 10 m = xgb.dask.DaskDMatrix(client, X_onehot, y, enable_categorical=True) by_etl_results = xgb.dask.train( @@ -364,9 +364,9 @@ def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None: check_model_output(reg.get_booster()) reg = xgb.dask.DaskXGBRegressor( - enable_categorical=True, n_estimators=10 + enable_categorical=True, n_estimators=10, tree_method="exact" ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="categorical data"): reg.fit(X, y) # check partition based reg = xgb.dask.DaskXGBRegressor(