Deprecate use_label_encoder in XGBClassifier. (#7822)
* Deprecate `use_label_encoder` in XGBClassifier. * We have removed the encoder, now prepare to remove the indicator.
This commit is contained in:
parent
5815df4c46
commit
52d4eda786
@ -63,7 +63,6 @@ def load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]:
|
|||||||
|
|
||||||
params = {
|
params = {
|
||||||
"tree_method": "gpu_hist",
|
"tree_method": "gpu_hist",
|
||||||
"use_label_encoder": False,
|
|
||||||
"n_estimators": 32,
|
"n_estimators": 32,
|
||||||
"colsample_bylevel": 0.7,
|
"colsample_bylevel": 0.7,
|
||||||
}
|
}
|
||||||
|
|||||||
@ -14,13 +14,13 @@ def training_continuation(tmpdir: str, use_pickle: bool) -> None:
|
|||||||
"""Basic training continuation."""
|
"""Basic training continuation."""
|
||||||
# Train 128 iterations in 1 session
|
# Train 128 iterations in 1 session
|
||||||
X, y = load_breast_cancer(return_X_y=True)
|
X, y = load_breast_cancer(return_X_y=True)
|
||||||
clf = xgboost.XGBClassifier(n_estimators=128, use_label_encoder=False)
|
clf = xgboost.XGBClassifier(n_estimators=128)
|
||||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||||
|
|
||||||
# Train 128 iterations in 2 sessions, with the first one runs for 32 iterations and
|
# Train 128 iterations in 2 sessions, with the first one runs for 32 iterations and
|
||||||
# the second one runs for 96 iterations
|
# the second one runs for 96 iterations
|
||||||
clf = xgboost.XGBClassifier(n_estimators=32, use_label_encoder=False)
|
clf = xgboost.XGBClassifier(n_estimators=32)
|
||||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||||
assert clf.get_booster().num_boosted_rounds() == 32
|
assert clf.get_booster().num_boosted_rounds() == 32
|
||||||
|
|
||||||
@ -54,14 +54,14 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:
|
|||||||
n_estimators = 512
|
n_estimators = 512
|
||||||
|
|
||||||
X, y = load_breast_cancer(return_X_y=True)
|
X, y = load_breast_cancer(return_X_y=True)
|
||||||
clf = xgboost.XGBClassifier(n_estimators=n_estimators, use_label_encoder=False)
|
clf = xgboost.XGBClassifier(n_estimators=n_estimators)
|
||||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop])
|
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop])
|
||||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||||
best = clf.best_iteration
|
best = clf.best_iteration
|
||||||
|
|
||||||
# Train 512 iterations in 2 sessions, with the first one runs for 128 iterations and
|
# Train 512 iterations in 2 sessions, with the first one runs for 128 iterations and
|
||||||
# the second one runs until early stop.
|
# the second one runs until early stop.
|
||||||
clf = xgboost.XGBClassifier(n_estimators=128, use_label_encoder=False)
|
clf = xgboost.XGBClassifier(n_estimators=128)
|
||||||
# Reinitialize the early stop callback
|
# Reinitialize the early stop callback
|
||||||
early_stop = xgboost.callback.EarlyStopping(
|
early_stop = xgboost.callback.EarlyStopping(
|
||||||
rounds=early_stopping_rounds, save_best=True
|
rounds=early_stopping_rounds, save_best=True
|
||||||
@ -79,15 +79,13 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:
|
|||||||
else:
|
else:
|
||||||
path = os.path.join(tmpdir, "model-first-128.json")
|
path = os.path.join(tmpdir, "model-first-128.json")
|
||||||
clf.save_model(path)
|
clf.save_model(path)
|
||||||
loaded = xgboost.XGBClassifier(use_label_encoder=False)
|
loaded = xgboost.XGBClassifier()
|
||||||
loaded.load_model(path)
|
loaded.load_model(path)
|
||||||
|
|
||||||
early_stop = xgboost.callback.EarlyStopping(
|
early_stop = xgboost.callback.EarlyStopping(
|
||||||
rounds=early_stopping_rounds, save_best=True
|
rounds=early_stopping_rounds, save_best=True
|
||||||
)
|
)
|
||||||
clf = xgboost.XGBClassifier(
|
clf = xgboost.XGBClassifier(n_estimators=n_estimators - 128)
|
||||||
n_estimators=n_estimators - 128, use_label_encoder=False
|
|
||||||
)
|
|
||||||
clf.fit(
|
clf.fit(
|
||||||
X,
|
X,
|
||||||
y,
|
y,
|
||||||
|
|||||||
@ -35,7 +35,7 @@ def native_interface():
|
|||||||
def sklearn_interface():
|
def sklearn_interface():
|
||||||
X_train, y_train = load_svmlight_file(train)
|
X_train, y_train = load_svmlight_file(train)
|
||||||
X_test, y_test = load_svmlight_file(test)
|
X_test, y_test = load_svmlight_file(test)
|
||||||
clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1, use_label_encoder=False)
|
clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1)
|
||||||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||||
assert clf.n_classes_ == 2
|
assert clf.n_classes_ == 2
|
||||||
|
|
||||||
|
|||||||
@ -36,9 +36,7 @@ parameter ``enable_categorical``:
|
|||||||
.. code:: python
|
.. code:: python
|
||||||
|
|
||||||
# Supported tree methods are `gpu_hist`, `approx`, and `hist`.
|
# Supported tree methods are `gpu_hist`, `approx`, and `hist`.
|
||||||
clf = xgb.XGBClassifier(
|
clf = xgb.XGBClassifier(tree_method="gpu_hist", enable_categorical=True)
|
||||||
tree_method="gpu_hist", enable_categorical=True, use_label_encoder=False
|
|
||||||
)
|
|
||||||
# X is the dataframe we created in previous snippet
|
# X is the dataframe we created in previous snippet
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
# Must use JSON/UBJSON for serialization, otherwise the information is lost.
|
# Must use JSON/UBJSON for serialization, otherwise the information is lost.
|
||||||
|
|||||||
@ -1304,13 +1304,15 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
objective: _SklObjective = "binary:logistic",
|
objective: _SklObjective = "binary:logistic",
|
||||||
use_label_encoder: bool = False,
|
use_label_encoder: Optional[bool] = None,
|
||||||
**kwargs: Any
|
**kwargs: Any
|
||||||
) -> None:
|
) -> None:
|
||||||
# must match the parameters for `get_params`
|
# must match the parameters for `get_params`
|
||||||
self.use_label_encoder = use_label_encoder
|
self.use_label_encoder = use_label_encoder
|
||||||
if use_label_encoder is True:
|
if use_label_encoder is True:
|
||||||
raise ValueError("Label encoder was removed in 1.6.")
|
raise ValueError("Label encoder was removed in 1.6.")
|
||||||
|
if use_label_encoder is not None:
|
||||||
|
warnings.warn("`use_label_encoder` is deprecated in 2.0.0.")
|
||||||
super().__init__(objective=objective, **kwargs)
|
super().__init__(objective=objective, **kwargs)
|
||||||
|
|
||||||
@_deprecate_positional_args
|
@_deprecate_positional_args
|
||||||
|
|||||||
@ -152,16 +152,16 @@ class TestTrainingContinuation:
|
|||||||
def test_changed_parameter(self):
|
def test_changed_parameter(self):
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
X, y = load_breast_cancer(return_X_y=True)
|
X, y = load_breast_cancer(return_X_y=True)
|
||||||
clf = xgb.XGBClassifier(n_estimators=2, use_label_encoder=False)
|
clf = xgb.XGBClassifier(n_estimators=2)
|
||||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||||
assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
|
assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
clf.save_model(os.path.join(tmpdir, "clf.json"))
|
clf.save_model(os.path.join(tmpdir, "clf.json"))
|
||||||
loaded = xgb.XGBClassifier(use_label_encoder=False)
|
loaded = xgb.XGBClassifier()
|
||||||
loaded.load_model(os.path.join(tmpdir, "clf.json"))
|
loaded.load_model(os.path.join(tmpdir, "clf.json"))
|
||||||
|
|
||||||
clf = xgb.XGBClassifier(n_estimators=2, use_label_encoder=False)
|
clf = xgb.XGBClassifier(n_estimators=2)
|
||||||
# change metric to error
|
# change metric to error
|
||||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="error")
|
clf.fit(X, y, eval_set=[(X, y)], eval_metric="error")
|
||||||
assert tm.non_increasing(clf.evals_result()["validation_0"]["error"])
|
assert tm.non_increasing(clf.evals_result()["validation_0"]["error"])
|
||||||
|
|||||||
@ -777,9 +777,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
|
|||||||
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
|
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
|
||||||
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
|
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
|
||||||
|
|
||||||
cls = xgb.dask.DaskXGBClassifier(
|
cls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2)
|
||||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
|
||||||
)
|
|
||||||
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
|
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
|
||||||
|
|
||||||
# multiclass
|
# multiclass
|
||||||
@ -808,9 +806,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
|
|||||||
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
|
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
|
||||||
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
|
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
|
||||||
|
|
||||||
cls = xgb.dask.DaskXGBClassifier(
|
cls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2)
|
||||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
|
||||||
)
|
|
||||||
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
|
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
|
||||||
|
|
||||||
|
|
||||||
@ -837,14 +833,10 @@ def run_auc(client: "Client", tree_method: str) -> None:
|
|||||||
valid_X = dd.from_array(valid_X_, chunksize=10)
|
valid_X = dd.from_array(valid_X_, chunksize=10)
|
||||||
valid_y = dd.from_array(valid_y_, chunksize=10)
|
valid_y = dd.from_array(valid_y_, chunksize=10)
|
||||||
|
|
||||||
cls = xgb.XGBClassifier(
|
cls = xgb.XGBClassifier(tree_method=tree_method, n_estimators=2)
|
||||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
|
||||||
)
|
|
||||||
cls.fit(X_, y_, eval_metric="auc", eval_set=[(valid_X_, valid_y_)])
|
cls.fit(X_, y_, eval_metric="auc", eval_set=[(valid_X_, valid_y_)])
|
||||||
|
|
||||||
dcls = xgb.dask.DaskXGBClassifier(
|
dcls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2)
|
||||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
|
||||||
)
|
|
||||||
dcls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
|
dcls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
|
||||||
|
|
||||||
approx = dcls.evals_result()["validation_0"]["auc"]
|
approx = dcls.evals_result()["validation_0"]["auc"]
|
||||||
@ -1693,7 +1685,6 @@ def test_parallel_submits(client: "Client") -> None:
|
|||||||
verbosity=1,
|
verbosity=1,
|
||||||
n_estimators=i + 1,
|
n_estimators=i + 1,
|
||||||
eval_metric="merror",
|
eval_metric="merror",
|
||||||
use_label_encoder=False,
|
|
||||||
)
|
)
|
||||||
f = client.submit(cls.fit, X, y, pure=False)
|
f = client.submit(cls.fit, X, y, pure=False)
|
||||||
futures.append(f)
|
futures.append(f)
|
||||||
@ -1786,7 +1777,6 @@ def test_parallel_submit_multi_clients() -> None:
|
|||||||
verbosity=1,
|
verbosity=1,
|
||||||
n_estimators=i + 1,
|
n_estimators=i + 1,
|
||||||
eval_metric="merror",
|
eval_metric="merror",
|
||||||
use_label_encoder=False,
|
|
||||||
)
|
)
|
||||||
f = client.submit(cls.fit, X, y, pure=False)
|
f = client.submit(cls.fit, X, y, pure=False)
|
||||||
futures.append((client, f))
|
futures.append((client, f))
|
||||||
|
|||||||
@ -774,13 +774,12 @@ def save_load_model(model_path):
|
|||||||
X = digits['data']
|
X = digits['data']
|
||||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf.split(X, y):
|
for train_index, test_index in kf.split(X, y):
|
||||||
xgb_model = xgb.XGBClassifier(use_label_encoder=False).fit(X[train_index], y[train_index])
|
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
||||||
xgb_model.save_model(model_path)
|
xgb_model.save_model(model_path)
|
||||||
|
|
||||||
xgb_model = xgb.XGBClassifier()
|
xgb_model = xgb.XGBClassifier()
|
||||||
xgb_model.load_model(model_path)
|
xgb_model.load_model(model_path)
|
||||||
|
|
||||||
assert xgb_model.use_label_encoder is False
|
|
||||||
assert isinstance(xgb_model.classes_, np.ndarray)
|
assert isinstance(xgb_model.classes_, np.ndarray)
|
||||||
assert isinstance(xgb_model._Booster, xgb.Booster)
|
assert isinstance(xgb_model._Booster, xgb.Booster)
|
||||||
|
|
||||||
@ -972,8 +971,8 @@ def test_deprecate_position_arg():
|
|||||||
model.fit(X, y, w)
|
model.fit(X, y, w)
|
||||||
|
|
||||||
with pytest.warns(FutureWarning):
|
with pytest.warns(FutureWarning):
|
||||||
xgb.XGBClassifier(1, use_label_encoder=False)
|
xgb.XGBClassifier(1)
|
||||||
model = xgb.XGBClassifier(n_estimators=1, use_label_encoder=False)
|
model = xgb.XGBClassifier(n_estimators=1)
|
||||||
with pytest.warns(FutureWarning):
|
with pytest.warns(FutureWarning):
|
||||||
model.fit(X, y, w)
|
model.fit(X, y, w)
|
||||||
|
|
||||||
@ -990,9 +989,6 @@ def test_deprecate_position_arg():
|
|||||||
with pytest.warns(FutureWarning):
|
with pytest.warns(FutureWarning):
|
||||||
model.fit(X, y, w)
|
model.fit(X, y, w)
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
xgb.XGBRFClassifier(1, use_label_encoder=True)
|
|
||||||
|
|
||||||
model = xgb.XGBRFClassifier(n_estimators=1)
|
model = xgb.XGBRFClassifier(n_estimators=1)
|
||||||
with pytest.warns(FutureWarning):
|
with pytest.warns(FutureWarning):
|
||||||
model.fit(X, y, w)
|
model.fit(X, y, w)
|
||||||
@ -1334,7 +1330,6 @@ def test_evaluation_metric():
|
|||||||
X, y = load_digits(n_class=10, return_X_y=True)
|
X, y = load_digits(n_class=10, return_X_y=True)
|
||||||
|
|
||||||
clf = xgb.XGBClassifier(
|
clf = xgb.XGBClassifier(
|
||||||
use_label_encoder=False,
|
|
||||||
tree_method="hist",
|
tree_method="hist",
|
||||||
eval_metric=merror,
|
eval_metric=merror,
|
||||||
n_estimators=16,
|
n_estimators=16,
|
||||||
@ -1344,7 +1339,6 @@ def test_evaluation_metric():
|
|||||||
custom = clf.evals_result()
|
custom = clf.evals_result()
|
||||||
|
|
||||||
clf = xgb.XGBClassifier(
|
clf = xgb.XGBClassifier(
|
||||||
use_label_encoder=False,
|
|
||||||
tree_method="hist",
|
tree_method="hist",
|
||||||
eval_metric="merror",
|
eval_metric="merror",
|
||||||
n_estimators=16,
|
n_estimators=16,
|
||||||
@ -1360,7 +1354,6 @@ def test_evaluation_metric():
|
|||||||
)
|
)
|
||||||
|
|
||||||
clf = xgb.XGBRFClassifier(
|
clf = xgb.XGBRFClassifier(
|
||||||
use_label_encoder=False,
|
|
||||||
tree_method="hist", n_estimators=16,
|
tree_method="hist", n_estimators=16,
|
||||||
objective=tm.softprob_obj(10),
|
objective=tm.softprob_obj(10),
|
||||||
eval_metric=merror,
|
eval_metric=merror,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user