Deprecate use_label_encoder in XGBClassifier. (#7822)

* Deprecate `use_label_encoder` in XGBClassifier.

* We have removed the encoder, now prepare to remove the indicator.
This commit is contained in:
Jiaming Yuan
2022-04-21 13:14:02 +08:00
committed by GitHub
parent 5815df4c46
commit 52d4eda786
8 changed files with 21 additions and 41 deletions

View File

@@ -152,16 +152,16 @@ class TestTrainingContinuation:
def test_changed_parameter(self):
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
clf = xgb.XGBClassifier(n_estimators=2, use_label_encoder=False)
clf = xgb.XGBClassifier(n_estimators=2)
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
with tempfile.TemporaryDirectory() as tmpdir:
clf.save_model(os.path.join(tmpdir, "clf.json"))
loaded = xgb.XGBClassifier(use_label_encoder=False)
loaded = xgb.XGBClassifier()
loaded.load_model(os.path.join(tmpdir, "clf.json"))
clf = xgb.XGBClassifier(n_estimators=2, use_label_encoder=False)
clf = xgb.XGBClassifier(n_estimators=2)
# change metric to error
clf.fit(X, y, eval_set=[(X, y)], eval_metric="error")
assert tm.non_increasing(clf.evals_result()["validation_0"]["error"])

View File

@@ -777,9 +777,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
cls = xgb.dask.DaskXGBClassifier(
tree_method=tree_method, n_estimators=2, use_label_encoder=False
)
cls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2)
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
# multiclass
@@ -808,9 +806,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
cls = xgb.dask.DaskXGBClassifier(
tree_method=tree_method, n_estimators=2, use_label_encoder=False
)
cls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2)
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
@@ -837,14 +833,10 @@ def run_auc(client: "Client", tree_method: str) -> None:
valid_X = dd.from_array(valid_X_, chunksize=10)
valid_y = dd.from_array(valid_y_, chunksize=10)
cls = xgb.XGBClassifier(
tree_method=tree_method, n_estimators=2, use_label_encoder=False
)
cls = xgb.XGBClassifier(tree_method=tree_method, n_estimators=2)
cls.fit(X_, y_, eval_metric="auc", eval_set=[(valid_X_, valid_y_)])
dcls = xgb.dask.DaskXGBClassifier(
tree_method=tree_method, n_estimators=2, use_label_encoder=False
)
dcls = xgb.dask.DaskXGBClassifier(tree_method=tree_method, n_estimators=2)
dcls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
approx = dcls.evals_result()["validation_0"]["auc"]
@@ -1693,7 +1685,6 @@ def test_parallel_submits(client: "Client") -> None:
verbosity=1,
n_estimators=i + 1,
eval_metric="merror",
use_label_encoder=False,
)
f = client.submit(cls.fit, X, y, pure=False)
futures.append(f)
@@ -1786,7 +1777,6 @@ def test_parallel_submit_multi_clients() -> None:
verbosity=1,
n_estimators=i + 1,
eval_metric="merror",
use_label_encoder=False,
)
f = client.submit(cls.fit, X, y, pure=False)
futures.append((client, f))

View File

@@ -774,13 +774,12 @@ def save_load_model(model_path):
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBClassifier(use_label_encoder=False).fit(X[train_index], y[train_index])
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
xgb_model.save_model(model_path)
xgb_model = xgb.XGBClassifier()
xgb_model.load_model(model_path)
assert xgb_model.use_label_encoder is False
assert isinstance(xgb_model.classes_, np.ndarray)
assert isinstance(xgb_model._Booster, xgb.Booster)
@@ -972,8 +971,8 @@ def test_deprecate_position_arg():
model.fit(X, y, w)
with pytest.warns(FutureWarning):
xgb.XGBClassifier(1, use_label_encoder=False)
model = xgb.XGBClassifier(n_estimators=1, use_label_encoder=False)
xgb.XGBClassifier(1)
model = xgb.XGBClassifier(n_estimators=1)
with pytest.warns(FutureWarning):
model.fit(X, y, w)
@@ -990,9 +989,6 @@ def test_deprecate_position_arg():
with pytest.warns(FutureWarning):
model.fit(X, y, w)
with pytest.raises(ValueError):
xgb.XGBRFClassifier(1, use_label_encoder=True)
model = xgb.XGBRFClassifier(n_estimators=1)
with pytest.warns(FutureWarning):
model.fit(X, y, w)
@@ -1334,7 +1330,6 @@ def test_evaluation_metric():
X, y = load_digits(n_class=10, return_X_y=True)
clf = xgb.XGBClassifier(
use_label_encoder=False,
tree_method="hist",
eval_metric=merror,
n_estimators=16,
@@ -1344,7 +1339,6 @@ def test_evaluation_metric():
custom = clf.evals_result()
clf = xgb.XGBClassifier(
use_label_encoder=False,
tree_method="hist",
eval_metric="merror",
n_estimators=16,
@@ -1360,7 +1354,6 @@ def test_evaluation_metric():
)
clf = xgb.XGBRFClassifier(
use_label_encoder=False,
tree_method="hist", n_estimators=16,
objective=tm.softprob_obj(10),
eval_metric=merror,