diff --git a/demo/guide-python/cat_in_the_dat.py b/demo/guide-python/cat_in_the_dat.py index 29f55aba7..bd0381d13 100644 --- a/demo/guide-python/cat_in_the_dat.py +++ b/demo/guide-python/cat_in_the_dat.py @@ -63,7 +63,6 @@ def load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]: params = { "tree_method": "gpu_hist", - "use_label_encoder": False, "n_estimators": 32, "colsample_bylevel": 0.7, } diff --git a/demo/guide-python/continuation.py b/demo/guide-python/continuation.py index 22fbfc3f7..5cddc3108 100644 --- a/demo/guide-python/continuation.py +++ b/demo/guide-python/continuation.py @@ -14,13 +14,13 @@ def training_continuation(tmpdir: str, use_pickle: bool) -> None: """Basic training continuation.""" # Train 128 iterations in 1 session X, y = load_breast_cancer(return_X_y=True) - clf = xgboost.XGBClassifier(n_estimators=128, use_label_encoder=False) + clf = xgboost.XGBClassifier(n_estimators=128) clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss") print("Total boosted rounds:", clf.get_booster().num_boosted_rounds()) # Train 128 iterations in 2 sessions, with the first one runs for 32 iterations and # the second one runs for 96 iterations - clf = xgboost.XGBClassifier(n_estimators=32, use_label_encoder=False) + clf = xgboost.XGBClassifier(n_estimators=32) clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss") assert clf.get_booster().num_boosted_rounds() == 32 @@ -54,14 +54,14 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None: n_estimators = 512 X, y = load_breast_cancer(return_X_y=True) - clf = xgboost.XGBClassifier(n_estimators=n_estimators, use_label_encoder=False) + clf = xgboost.XGBClassifier(n_estimators=n_estimators) clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop]) print("Total boosted rounds:", clf.get_booster().num_boosted_rounds()) best = clf.best_iteration # Train 512 iterations in 2 sessions, with the first one runs for 128 iterations and # the second one runs until early stop. - clf = xgboost.XGBClassifier(n_estimators=128, use_label_encoder=False) + clf = xgboost.XGBClassifier(n_estimators=128) # Reinitialize the early stop callback early_stop = xgboost.callback.EarlyStopping( rounds=early_stopping_rounds, save_best=True @@ -79,15 +79,13 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None: else: path = os.path.join(tmpdir, "model-first-128.json") clf.save_model(path) - loaded = xgboost.XGBClassifier(use_label_encoder=False) + loaded = xgboost.XGBClassifier() loaded.load_model(path) early_stop = xgboost.callback.EarlyStopping( rounds=early_stopping_rounds, save_best=True ) - clf = xgboost.XGBClassifier( - n_estimators=n_estimators - 128, use_label_encoder=False - ) + clf = xgboost.XGBClassifier(n_estimators=n_estimators - 128) clf.fit( X, y, diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py index b56de0200..fb7837728 100644 --- a/demo/guide-python/predict_first_ntree.py +++ b/demo/guide-python/predict_first_ntree.py @@ -35,7 +35,7 @@ def native_interface(): def sklearn_interface(): X_train, y_train = load_svmlight_file(train) X_test, y_test = load_svmlight_file(test) - clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1, use_label_encoder=False) + clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1) clf.fit(X_train, y_train, eval_set=[(X_test, y_test)]) assert clf.n_classes_ == 2 diff --git a/doc/tutorials/categorical.rst b/doc/tutorials/categorical.rst index 7a185a113..3f106962d 100644 --- a/doc/tutorials/categorical.rst +++ b/doc/tutorials/categorical.rst @@ -36,9 +36,7 @@ parameter ``enable_categorical``: .. code:: python # Supported tree methods are `gpu_hist`, `approx`, and `hist`. - clf = xgb.XGBClassifier( - tree_method="gpu_hist", enable_categorical=True, use_label_encoder=False - ) + clf = xgb.XGBClassifier(tree_method="gpu_hist", enable_categorical=True) # X is the dataframe we created in previous snippet clf.fit(X, y) # Must use JSON/UBJSON for serialization, otherwise the information is lost. diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index d27cc6354..0b4d4f5a9 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -1304,13 +1304,15 @@ class XGBClassifier(XGBModel, XGBClassifierBase): self, *, objective: _SklObjective = "binary:logistic", - use_label_encoder: bool = False, + use_label_encoder: Optional[bool] = None, **kwargs: Any ) -> None: # must match the parameters for `get_params` self.use_label_encoder = use_label_encoder if use_label_encoder is True: raise ValueError("Label encoder was removed in 1.6.") + if use_label_encoder is not None: + warnings.warn("`use_label_encoder` is deprecated in 2.0.0.") super().__init__(objective=objective, **kwargs) @_deprecate_positional_args diff --git a/tests/python/test_training_continuation.py b/tests/python/test_training_continuation.py index 44de6bed4..31a408170 100644 --- a/tests/python/test_training_continuation.py +++ b/tests/python/test_training_continuation.py @@ -152,16 +152,16 @@ class TestTrainingContinuation: def test_changed_parameter(self): from sklearn.datasets import load_breast_cancer X, y = load_breast_cancer(return_X_y=True) - clf = xgb.XGBClassifier(n_estimators=2, use_label_encoder=False) + clf = xgb.XGBClassifier(n_estimators=2) clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss") assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"]) with tempfile.TemporaryDirectory() as tmpdir: clf.save_model(os.path.join(tmpdir, "clf.json")) - loaded = xgb.XGBClassifier(use_label_encoder=False) + loaded = xgb.XGBClassifier() loaded.load_model(os.path.join(tmpdir, "clf.json")) - clf = xgb.XGBClassifier(n_estimators=2, use_label_encoder=False) + clf = xgb.XGBClassifier(n_estimators=2) # change metric to error clf.fit(X, y, eval_set=[(X, y)], eval_metric="error") assert tm.non_increasing(clf.evals_result()["validation_0"]["error"]) diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py index 4e80409d4..c20291d74 100644 --- a/tests/python/test_with_dask.py +++ b/tests/python/test_with_dask.py @@ -777,9 +777,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) -> valid_X = dd.from_array(valid_X_, chunksize=n_samples) valid_y = dd.from_array(valid_y_, chunksize=n_samples) - cls = xgb.dask.DaskXGBClassifier( - tree_method=tree_method, n_estimators=2, use_label_encoder=False - ) + cls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2) cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)]) # multiclass @@ -808,9 +806,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) -> valid_X = dd.from_array(valid_X_, chunksize=n_samples) valid_y = dd.from_array(valid_y_, chunksize=n_samples) - cls = xgb.dask.DaskXGBClassifier( - tree_method=tree_method, n_estimators=2, use_label_encoder=False - ) + cls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2) cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)]) @@ -837,14 +833,10 @@ def run_auc(client: "Client", tree_method: str) -> None: valid_X = dd.from_array(valid_X_, chunksize=10) valid_y = dd.from_array(valid_y_, chunksize=10) - cls = xgb.XGBClassifier( - tree_method=tree_method, n_estimators=2, use_label_encoder=False - ) + cls = xgb.XGBClassifier(tree_method=tree_method, n_estimators=2) cls.fit(X_, y_, eval_metric="auc", eval_set=[(valid_X_, valid_y_)]) - dcls = xgb.dask.DaskXGBClassifier( - tree_method=tree_method, n_estimators=2, use_label_encoder=False - ) + dcls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2) dcls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)]) approx = dcls.evals_result()["validation_0"]["auc"] @@ -1693,7 +1685,6 @@ def test_parallel_submits(client: "Client") -> None: verbosity=1, n_estimators=i + 1, eval_metric="merror", - use_label_encoder=False, ) f = client.submit(cls.fit, X, y, pure=False) futures.append(f) @@ -1786,7 +1777,6 @@ def test_parallel_submit_multi_clients() -> None: verbosity=1, n_estimators=i + 1, eval_metric="merror", - use_label_encoder=False, ) f = client.submit(cls.fit, X, y, pure=False) futures.append((client, f)) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index a2e70ae6d..cd1297f70 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -774,13 +774,12 @@ def save_load_model(model_path): X = digits['data'] kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X, y): - xgb_model = xgb.XGBClassifier(use_label_encoder=False).fit(X[train_index], y[train_index]) + xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index]) xgb_model.save_model(model_path) xgb_model = xgb.XGBClassifier() xgb_model.load_model(model_path) - assert xgb_model.use_label_encoder is False assert isinstance(xgb_model.classes_, np.ndarray) assert isinstance(xgb_model._Booster, xgb.Booster) @@ -972,8 +971,8 @@ def test_deprecate_position_arg(): model.fit(X, y, w) with pytest.warns(FutureWarning): - xgb.XGBClassifier(1, use_label_encoder=False) - model = xgb.XGBClassifier(n_estimators=1, use_label_encoder=False) + xgb.XGBClassifier(1) + model = xgb.XGBClassifier(n_estimators=1) with pytest.warns(FutureWarning): model.fit(X, y, w) @@ -990,9 +989,6 @@ def test_deprecate_position_arg(): with pytest.warns(FutureWarning): model.fit(X, y, w) - with pytest.raises(ValueError): - xgb.XGBRFClassifier(1, use_label_encoder=True) - model = xgb.XGBRFClassifier(n_estimators=1) with pytest.warns(FutureWarning): model.fit(X, y, w) @@ -1334,7 +1330,6 @@ def test_evaluation_metric(): X, y = load_digits(n_class=10, return_X_y=True) clf = xgb.XGBClassifier( - use_label_encoder=False, tree_method="hist", eval_metric=merror, n_estimators=16, @@ -1344,7 +1339,6 @@ def test_evaluation_metric(): custom = clf.evals_result() clf = xgb.XGBClassifier( - use_label_encoder=False, tree_method="hist", eval_metric="merror", n_estimators=16, @@ -1360,7 +1354,6 @@ def test_evaluation_metric(): ) clf = xgb.XGBRFClassifier( - use_label_encoder=False, tree_method="hist", n_estimators=16, objective=tm.softprob_obj(10), eval_metric=merror,