Deprecate use_label_encoder in XGBClassifier. (#7822)
* Deprecate `use_label_encoder` in XGBClassifier. * We have removed the encoder, now prepare to remove the indicator.
This commit is contained in:
parent
5815df4c46
commit
52d4eda786
@ -63,7 +63,6 @@ def load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]:
|
||||
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"use_label_encoder": False,
|
||||
"n_estimators": 32,
|
||||
"colsample_bylevel": 0.7,
|
||||
}
|
||||
|
||||
@ -14,13 +14,13 @@ def training_continuation(tmpdir: str, use_pickle: bool) -> None:
|
||||
"""Basic training continuation."""
|
||||
# Train 128 iterations in 1 session
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
clf = xgboost.XGBClassifier(n_estimators=128, use_label_encoder=False)
|
||||
clf = xgboost.XGBClassifier(n_estimators=128)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||
|
||||
# Train 128 iterations in 2 sessions, with the first one runs for 32 iterations and
|
||||
# the second one runs for 96 iterations
|
||||
clf = xgboost.XGBClassifier(n_estimators=32, use_label_encoder=False)
|
||||
clf = xgboost.XGBClassifier(n_estimators=32)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||
assert clf.get_booster().num_boosted_rounds() == 32
|
||||
|
||||
@ -54,14 +54,14 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:
|
||||
n_estimators = 512
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
clf = xgboost.XGBClassifier(n_estimators=n_estimators, use_label_encoder=False)
|
||||
clf = xgboost.XGBClassifier(n_estimators=n_estimators)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop])
|
||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||
best = clf.best_iteration
|
||||
|
||||
# Train 512 iterations in 2 sessions, with the first one runs for 128 iterations and
|
||||
# the second one runs until early stop.
|
||||
clf = xgboost.XGBClassifier(n_estimators=128, use_label_encoder=False)
|
||||
clf = xgboost.XGBClassifier(n_estimators=128)
|
||||
# Reinitialize the early stop callback
|
||||
early_stop = xgboost.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=True
|
||||
@ -79,15 +79,13 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:
|
||||
else:
|
||||
path = os.path.join(tmpdir, "model-first-128.json")
|
||||
clf.save_model(path)
|
||||
loaded = xgboost.XGBClassifier(use_label_encoder=False)
|
||||
loaded = xgboost.XGBClassifier()
|
||||
loaded.load_model(path)
|
||||
|
||||
early_stop = xgboost.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=True
|
||||
)
|
||||
clf = xgboost.XGBClassifier(
|
||||
n_estimators=n_estimators - 128, use_label_encoder=False
|
||||
)
|
||||
clf = xgboost.XGBClassifier(n_estimators=n_estimators - 128)
|
||||
clf.fit(
|
||||
X,
|
||||
y,
|
||||
|
||||
@ -35,7 +35,7 @@ def native_interface():
|
||||
def sklearn_interface():
|
||||
X_train, y_train = load_svmlight_file(train)
|
||||
X_test, y_test = load_svmlight_file(test)
|
||||
clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1, use_label_encoder=False)
|
||||
clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1)
|
||||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||
assert clf.n_classes_ == 2
|
||||
|
||||
|
||||
@ -36,9 +36,7 @@ parameter ``enable_categorical``:
|
||||
.. code:: python
|
||||
|
||||
# Supported tree methods are `gpu_hist`, `approx`, and `hist`.
|
||||
clf = xgb.XGBClassifier(
|
||||
tree_method="gpu_hist", enable_categorical=True, use_label_encoder=False
|
||||
)
|
||||
clf = xgb.XGBClassifier(tree_method="gpu_hist", enable_categorical=True)
|
||||
# X is the dataframe we created in previous snippet
|
||||
clf.fit(X, y)
|
||||
# Must use JSON/UBJSON for serialization, otherwise the information is lost.
|
||||
|
||||
@ -1304,13 +1304,15 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
self,
|
||||
*,
|
||||
objective: _SklObjective = "binary:logistic",
|
||||
use_label_encoder: bool = False,
|
||||
use_label_encoder: Optional[bool] = None,
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
# must match the parameters for `get_params`
|
||||
self.use_label_encoder = use_label_encoder
|
||||
if use_label_encoder is True:
|
||||
raise ValueError("Label encoder was removed in 1.6.")
|
||||
if use_label_encoder is not None:
|
||||
warnings.warn("`use_label_encoder` is deprecated in 2.0.0.")
|
||||
super().__init__(objective=objective, **kwargs)
|
||||
|
||||
@_deprecate_positional_args
|
||||
|
||||
@ -152,16 +152,16 @@ class TestTrainingContinuation:
|
||||
def test_changed_parameter(self):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
clf = xgb.XGBClassifier(n_estimators=2, use_label_encoder=False)
|
||||
clf = xgb.XGBClassifier(n_estimators=2)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||
assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
clf.save_model(os.path.join(tmpdir, "clf.json"))
|
||||
loaded = xgb.XGBClassifier(use_label_encoder=False)
|
||||
loaded = xgb.XGBClassifier()
|
||||
loaded.load_model(os.path.join(tmpdir, "clf.json"))
|
||||
|
||||
clf = xgb.XGBClassifier(n_estimators=2, use_label_encoder=False)
|
||||
clf = xgb.XGBClassifier(n_estimators=2)
|
||||
# change metric to error
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="error")
|
||||
assert tm.non_increasing(clf.evals_result()["validation_0"]["error"])
|
||||
|
||||
@ -777,9 +777,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
|
||||
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
|
||||
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
|
||||
|
||||
cls = xgb.dask.DaskXGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
cls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2)
|
||||
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
|
||||
|
||||
# multiclass
|
||||
@ -808,9 +806,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
|
||||
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
|
||||
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
|
||||
|
||||
cls = xgb.dask.DaskXGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
cls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2)
|
||||
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
|
||||
|
||||
|
||||
@ -837,14 +833,10 @@ def run_auc(client: "Client", tree_method: str) -> None:
|
||||
valid_X = dd.from_array(valid_X_, chunksize=10)
|
||||
valid_y = dd.from_array(valid_y_, chunksize=10)
|
||||
|
||||
cls = xgb.XGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
cls = xgb.XGBClassifier(tree_method=tree_method, n_estimators=2)
|
||||
cls.fit(X_, y_, eval_metric="auc", eval_set=[(valid_X_, valid_y_)])
|
||||
|
||||
dcls = xgb.dask.DaskXGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
dcls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2)
|
||||
dcls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
|
||||
|
||||
approx = dcls.evals_result()["validation_0"]["auc"]
|
||||
@ -1693,7 +1685,6 @@ def test_parallel_submits(client: "Client") -> None:
|
||||
verbosity=1,
|
||||
n_estimators=i + 1,
|
||||
eval_metric="merror",
|
||||
use_label_encoder=False,
|
||||
)
|
||||
f = client.submit(cls.fit, X, y, pure=False)
|
||||
futures.append(f)
|
||||
@ -1786,7 +1777,6 @@ def test_parallel_submit_multi_clients() -> None:
|
||||
verbosity=1,
|
||||
n_estimators=i + 1,
|
||||
eval_metric="merror",
|
||||
use_label_encoder=False,
|
||||
)
|
||||
f = client.submit(cls.fit, X, y, pure=False)
|
||||
futures.append((client, f))
|
||||
|
||||
@ -774,13 +774,12 @@ def save_load_model(model_path):
|
||||
X = digits['data']
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X, y):
|
||||
xgb_model = xgb.XGBClassifier(use_label_encoder=False).fit(X[train_index], y[train_index])
|
||||
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
||||
xgb_model.save_model(model_path)
|
||||
|
||||
xgb_model = xgb.XGBClassifier()
|
||||
xgb_model.load_model(model_path)
|
||||
|
||||
assert xgb_model.use_label_encoder is False
|
||||
assert isinstance(xgb_model.classes_, np.ndarray)
|
||||
assert isinstance(xgb_model._Booster, xgb.Booster)
|
||||
|
||||
@ -972,8 +971,8 @@ def test_deprecate_position_arg():
|
||||
model.fit(X, y, w)
|
||||
|
||||
with pytest.warns(FutureWarning):
|
||||
xgb.XGBClassifier(1, use_label_encoder=False)
|
||||
model = xgb.XGBClassifier(n_estimators=1, use_label_encoder=False)
|
||||
xgb.XGBClassifier(1)
|
||||
model = xgb.XGBClassifier(n_estimators=1)
|
||||
with pytest.warns(FutureWarning):
|
||||
model.fit(X, y, w)
|
||||
|
||||
@ -990,9 +989,6 @@ def test_deprecate_position_arg():
|
||||
with pytest.warns(FutureWarning):
|
||||
model.fit(X, y, w)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
xgb.XGBRFClassifier(1, use_label_encoder=True)
|
||||
|
||||
model = xgb.XGBRFClassifier(n_estimators=1)
|
||||
with pytest.warns(FutureWarning):
|
||||
model.fit(X, y, w)
|
||||
@ -1334,7 +1330,6 @@ def test_evaluation_metric():
|
||||
X, y = load_digits(n_class=10, return_X_y=True)
|
||||
|
||||
clf = xgb.XGBClassifier(
|
||||
use_label_encoder=False,
|
||||
tree_method="hist",
|
||||
eval_metric=merror,
|
||||
n_estimators=16,
|
||||
@ -1344,7 +1339,6 @@ def test_evaluation_metric():
|
||||
custom = clf.evals_result()
|
||||
|
||||
clf = xgb.XGBClassifier(
|
||||
use_label_encoder=False,
|
||||
tree_method="hist",
|
||||
eval_metric="merror",
|
||||
n_estimators=16,
|
||||
@ -1360,7 +1354,6 @@ def test_evaluation_metric():
|
||||
)
|
||||
|
||||
clf = xgb.XGBRFClassifier(
|
||||
use_label_encoder=False,
|
||||
tree_method="hist", n_estimators=16,
|
||||
objective=tm.softprob_obj(10),
|
||||
eval_metric=merror,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user