[breaking] Remove deprecated parameters in the skl interface. (#9986)

This commit is contained in:
Jiaming Yuan
2024-01-15 20:40:05 +08:00
committed by GitHub
parent 2de85d3241
commit 0798e36d73
16 changed files with 418 additions and 462 deletions

View File

@@ -16,13 +16,14 @@ class TestCallbacks:
@classmethod
def setup_class(cls):
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
cls.X = X
cls.y = y
split = int(X.shape[0]*0.8)
cls.X_train = X[: split, ...]
cls.y_train = y[: split, ...]
split = int(X.shape[0] * 0.8)
cls.X_train = X[:split, ...]
cls.y_train = y[:split, ...]
cls.X_valid = X[split:, ...]
cls.y_valid = y[split:, ...]
@@ -31,31 +32,32 @@ class TestCallbacks:
D_train: xgb.DMatrix,
D_valid: xgb.DMatrix,
rounds: int,
verbose_eval: Union[bool, int]
verbose_eval: Union[bool, int],
):
def check_output(output: str) -> None:
if int(verbose_eval) == 1:
# Should print each iteration info
assert len(output.split('\n')) == rounds
assert len(output.split("\n")) == rounds
elif int(verbose_eval) > rounds:
# Should print first and latest iteration info
assert len(output.split('\n')) == 2
assert len(output.split("\n")) == 2
else:
# Should print info by each period additionaly to first and latest
# iteration
num_periods = rounds // int(verbose_eval)
# Extra information is required for latest iteration
is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
assert len(output.split('\n')) == (
assert len(output.split("\n")) == (
1 + num_periods + int(is_extra_info_required)
)
evals_result: xgb.callback.TrainingCallback.EvalsLog = {}
params = {'objective': 'binary:logistic', 'eval_metric': 'error'}
params = {"objective": "binary:logistic", "eval_metric": "error"}
with tm.captured_output() as (out, err):
xgb.train(
params, D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
params,
D_train,
evals=[(D_train, "Train"), (D_valid, "Valid")],
num_boost_round=rounds,
evals_result=evals_result,
verbose_eval=verbose_eval,
@@ -73,14 +75,16 @@ class TestCallbacks:
D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
evals_result = {}
rounds = 10
xgb.train({'objective': 'binary:logistic',
'eval_metric': 'error'}, D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
num_boost_round=rounds,
evals_result=evals_result,
verbose_eval=True)
assert len(evals_result['Train']['error']) == rounds
assert len(evals_result['Valid']['error']) == rounds
xgb.train(
{"objective": "binary:logistic", "eval_metric": "error"},
D_train,
evals=[(D_train, "Train"), (D_valid, "Valid")],
num_boost_round=rounds,
evals_result=evals_result,
verbose_eval=True,
)
assert len(evals_result["Train"]["error"]) == rounds
assert len(evals_result["Valid"]["error"]) == rounds
self.run_evaluation_monitor(D_train, D_valid, rounds, True)
self.run_evaluation_monitor(D_train, D_valid, rounds, 2)
@@ -93,72 +97,83 @@ class TestCallbacks:
evals_result = {}
rounds = 30
early_stopping_rounds = 5
booster = xgb.train({'objective': 'binary:logistic',
'eval_metric': 'error'}, D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
num_boost_round=rounds,
evals_result=evals_result,
verbose_eval=True,
early_stopping_rounds=early_stopping_rounds)
dump = booster.get_dump(dump_format='json')
booster = xgb.train(
{"objective": "binary:logistic", "eval_metric": "error"},
D_train,
evals=[(D_train, "Train"), (D_valid, "Valid")],
num_boost_round=rounds,
evals_result=evals_result,
verbose_eval=True,
early_stopping_rounds=early_stopping_rounds,
)
dump = booster.get_dump(dump_format="json")
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
def test_early_stopping_custom_eval(self):
D_train = xgb.DMatrix(self.X_train, self.y_train)
D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
early_stopping_rounds = 5
booster = xgb.train({'objective': 'binary:logistic',
'eval_metric': 'error',
'tree_method': 'hist'}, D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
feval=tm.eval_error_metric,
num_boost_round=1000,
early_stopping_rounds=early_stopping_rounds,
verbose_eval=False)
dump = booster.get_dump(dump_format='json')
booster = xgb.train(
{
"objective": "binary:logistic",
"eval_metric": "error",
"tree_method": "hist",
},
D_train,
evals=[(D_train, "Train"), (D_valid, "Valid")],
feval=tm.eval_error_metric,
num_boost_round=1000,
early_stopping_rounds=early_stopping_rounds,
verbose_eval=False,
)
dump = booster.get_dump(dump_format="json")
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
def test_early_stopping_customize(self):
D_train = xgb.DMatrix(self.X_train, self.y_train)
D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
early_stopping_rounds = 5
early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
metric_name='CustomErr',
data_name='Train')
early_stop = xgb.callback.EarlyStopping(
rounds=early_stopping_rounds, metric_name="CustomErr", data_name="Train"
)
# Specify which dataset and which metric should be used for early stopping.
booster = xgb.train(
{'objective': 'binary:logistic',
'eval_metric': ['error', 'rmse'],
'tree_method': 'hist'}, D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
{
"objective": "binary:logistic",
"eval_metric": ["error", "rmse"],
"tree_method": "hist",
},
D_train,
evals=[(D_train, "Train"), (D_valid, "Valid")],
feval=tm.eval_error_metric,
num_boost_round=1000,
callbacks=[early_stop],
verbose_eval=False)
dump = booster.get_dump(dump_format='json')
verbose_eval=False,
)
dump = booster.get_dump(dump_format="json")
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
assert len(early_stop.stopping_history['Train']['CustomErr']) == len(dump)
assert len(early_stop.stopping_history["Train"]["CustomErr"]) == len(dump)
rounds = 100
early_stop = xgb.callback.EarlyStopping(
rounds=early_stopping_rounds,
metric_name='CustomErr',
data_name='Train',
metric_name="CustomErr",
data_name="Train",
min_delta=100,
save_best=True,
)
booster = xgb.train(
{
'objective': 'binary:logistic',
'eval_metric': ['error', 'rmse'],
'tree_method': 'hist'
"objective": "binary:logistic",
"eval_metric": ["error", "rmse"],
"tree_method": "hist",
},
D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
evals=[(D_train, "Train"), (D_valid, "Valid")],
feval=tm.eval_error_metric,
num_boost_round=rounds,
callbacks=[early_stop],
verbose_eval=False
verbose_eval=False,
)
# No iteration can be made with min_delta == 100
assert booster.best_iteration == 0
@@ -166,18 +181,20 @@ class TestCallbacks:
def test_early_stopping_skl(self):
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
early_stopping_rounds = 5
cls = xgb.XGBClassifier(
early_stopping_rounds=early_stopping_rounds, eval_metric='error'
early_stopping_rounds=early_stopping_rounds, eval_metric="error"
)
cls.fit(X, y, eval_set=[(X, y)])
booster = cls.get_booster()
dump = booster.get_dump(dump_format='json')
dump = booster.get_dump(dump_format="json")
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
def test_early_stopping_custom_eval_skl(self):
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
early_stopping_rounds = 5
early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds)
@@ -186,11 +203,12 @@ class TestCallbacks:
)
cls.fit(X, y, eval_set=[(X, y)])
booster = cls.get_booster()
dump = booster.get_dump(dump_format='json')
dump = booster.get_dump(dump_format="json")
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
def test_early_stopping_save_best_model(self):
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
n_estimators = 100
early_stopping_rounds = 5
@@ -200,11 +218,11 @@ class TestCallbacks:
cls = xgb.XGBClassifier(
n_estimators=n_estimators,
eval_metric=tm.eval_error_metric_skl,
callbacks=[early_stop]
callbacks=[early_stop],
)
cls.fit(X, y, eval_set=[(X, y)])
booster = cls.get_booster()
dump = booster.get_dump(dump_format='json')
dump = booster.get_dump(dump_format="json")
assert len(dump) == booster.best_iteration + 1
early_stop = xgb.callback.EarlyStopping(
@@ -220,8 +238,9 @@ class TestCallbacks:
cls.fit(X, y, eval_set=[(X, y)])
# No error
early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
save_best=False)
early_stop = xgb.callback.EarlyStopping(
rounds=early_stopping_rounds, save_best=False
)
xgb.XGBClassifier(
booster="gblinear",
n_estimators=10,
@@ -231,14 +250,17 @@ class TestCallbacks:
def test_early_stopping_continuation(self):
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
cls = xgb.XGBClassifier(eval_metric=tm.eval_error_metric_skl)
early_stopping_rounds = 5
early_stop = xgb.callback.EarlyStopping(
rounds=early_stopping_rounds, save_best=True
)
with pytest.warns(UserWarning):
cls.fit(X, y, eval_set=[(X, y)], callbacks=[early_stop])
cls = xgb.XGBClassifier(
eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]
)
cls.fit(X, y, eval_set=[(X, y)])
booster = cls.get_booster()
assert booster.num_boosted_rounds() == booster.best_iteration + 1
@@ -256,21 +278,10 @@ class TestCallbacks:
)
cls.fit(X, y, eval_set=[(X, y)])
booster = cls.get_booster()
assert booster.num_boosted_rounds() == \
booster.best_iteration + early_stopping_rounds + 1
def test_deprecated(self):
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
early_stopping_rounds = 5
early_stop = xgb.callback.EarlyStopping(
rounds=early_stopping_rounds, save_best=True
)
clf = xgb.XGBClassifier(
eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]
)
with pytest.raises(ValueError, match=r".*set_params.*"):
clf.fit(X, y, eval_set=[(X, y)], callbacks=[early_stop])
assert (
booster.num_boosted_rounds()
== booster.best_iteration + early_stopping_rounds + 1
)
def run_eta_decay(self, tree_method):
"""Test learning rate scheduler, used by both CPU and GPU tests."""
@@ -343,7 +354,7 @@ class TestCallbacks:
callbacks=[scheduler([0, 0, 0, 0])],
evals_result=evals_result,
)
eval_errors_2 = list(map(float, evals_result['eval']['error']))
eval_errors_2 = list(map(float, evals_result["eval"]["error"]))
assert isinstance(bst, xgb.core.Booster)
# validation error should not decrease, if eta/learning_rate = 0
assert eval_errors_2[0] == eval_errors_2[-1]
@@ -361,7 +372,7 @@ class TestCallbacks:
callbacks=[scheduler(eta_decay)],
evals_result=evals_result,
)
eval_errors_3 = list(map(float, evals_result['eval']['error']))
eval_errors_3 = list(map(float, evals_result["eval"]["error"]))
assert isinstance(bst, xgb.core.Booster)