[breaking] Remove deprecated parameters in the skl interface. (#9986)
This commit is contained in:
@@ -16,13 +16,14 @@ class TestCallbacks:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
cls.X = X
|
||||
cls.y = y
|
||||
|
||||
split = int(X.shape[0]*0.8)
|
||||
cls.X_train = X[: split, ...]
|
||||
cls.y_train = y[: split, ...]
|
||||
split = int(X.shape[0] * 0.8)
|
||||
cls.X_train = X[:split, ...]
|
||||
cls.y_train = y[:split, ...]
|
||||
cls.X_valid = X[split:, ...]
|
||||
cls.y_valid = y[split:, ...]
|
||||
|
||||
@@ -31,31 +32,32 @@ class TestCallbacks:
|
||||
D_train: xgb.DMatrix,
|
||||
D_valid: xgb.DMatrix,
|
||||
rounds: int,
|
||||
verbose_eval: Union[bool, int]
|
||||
verbose_eval: Union[bool, int],
|
||||
):
|
||||
def check_output(output: str) -> None:
|
||||
if int(verbose_eval) == 1:
|
||||
# Should print each iteration info
|
||||
assert len(output.split('\n')) == rounds
|
||||
assert len(output.split("\n")) == rounds
|
||||
elif int(verbose_eval) > rounds:
|
||||
# Should print first and latest iteration info
|
||||
assert len(output.split('\n')) == 2
|
||||
assert len(output.split("\n")) == 2
|
||||
else:
|
||||
# Should print info by each period additionaly to first and latest
|
||||
# iteration
|
||||
num_periods = rounds // int(verbose_eval)
|
||||
# Extra information is required for latest iteration
|
||||
is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
|
||||
assert len(output.split('\n')) == (
|
||||
assert len(output.split("\n")) == (
|
||||
1 + num_periods + int(is_extra_info_required)
|
||||
)
|
||||
|
||||
evals_result: xgb.callback.TrainingCallback.EvalsLog = {}
|
||||
params = {'objective': 'binary:logistic', 'eval_metric': 'error'}
|
||||
params = {"objective": "binary:logistic", "eval_metric": "error"}
|
||||
with tm.captured_output() as (out, err):
|
||||
xgb.train(
|
||||
params, D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
params,
|
||||
D_train,
|
||||
evals=[(D_train, "Train"), (D_valid, "Valid")],
|
||||
num_boost_round=rounds,
|
||||
evals_result=evals_result,
|
||||
verbose_eval=verbose_eval,
|
||||
@@ -73,14 +75,16 @@ class TestCallbacks:
|
||||
D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
|
||||
evals_result = {}
|
||||
rounds = 10
|
||||
xgb.train({'objective': 'binary:logistic',
|
||||
'eval_metric': 'error'}, D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
num_boost_round=rounds,
|
||||
evals_result=evals_result,
|
||||
verbose_eval=True)
|
||||
assert len(evals_result['Train']['error']) == rounds
|
||||
assert len(evals_result['Valid']['error']) == rounds
|
||||
xgb.train(
|
||||
{"objective": "binary:logistic", "eval_metric": "error"},
|
||||
D_train,
|
||||
evals=[(D_train, "Train"), (D_valid, "Valid")],
|
||||
num_boost_round=rounds,
|
||||
evals_result=evals_result,
|
||||
verbose_eval=True,
|
||||
)
|
||||
assert len(evals_result["Train"]["error"]) == rounds
|
||||
assert len(evals_result["Valid"]["error"]) == rounds
|
||||
|
||||
self.run_evaluation_monitor(D_train, D_valid, rounds, True)
|
||||
self.run_evaluation_monitor(D_train, D_valid, rounds, 2)
|
||||
@@ -93,72 +97,83 @@ class TestCallbacks:
|
||||
evals_result = {}
|
||||
rounds = 30
|
||||
early_stopping_rounds = 5
|
||||
booster = xgb.train({'objective': 'binary:logistic',
|
||||
'eval_metric': 'error'}, D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
num_boost_round=rounds,
|
||||
evals_result=evals_result,
|
||||
verbose_eval=True,
|
||||
early_stopping_rounds=early_stopping_rounds)
|
||||
dump = booster.get_dump(dump_format='json')
|
||||
booster = xgb.train(
|
||||
{"objective": "binary:logistic", "eval_metric": "error"},
|
||||
D_train,
|
||||
evals=[(D_train, "Train"), (D_valid, "Valid")],
|
||||
num_boost_round=rounds,
|
||||
evals_result=evals_result,
|
||||
verbose_eval=True,
|
||||
early_stopping_rounds=early_stopping_rounds,
|
||||
)
|
||||
dump = booster.get_dump(dump_format="json")
|
||||
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
|
||||
|
||||
def test_early_stopping_custom_eval(self):
|
||||
D_train = xgb.DMatrix(self.X_train, self.y_train)
|
||||
D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
|
||||
early_stopping_rounds = 5
|
||||
booster = xgb.train({'objective': 'binary:logistic',
|
||||
'eval_metric': 'error',
|
||||
'tree_method': 'hist'}, D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
feval=tm.eval_error_metric,
|
||||
num_boost_round=1000,
|
||||
early_stopping_rounds=early_stopping_rounds,
|
||||
verbose_eval=False)
|
||||
dump = booster.get_dump(dump_format='json')
|
||||
booster = xgb.train(
|
||||
{
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "error",
|
||||
"tree_method": "hist",
|
||||
},
|
||||
D_train,
|
||||
evals=[(D_train, "Train"), (D_valid, "Valid")],
|
||||
feval=tm.eval_error_metric,
|
||||
num_boost_round=1000,
|
||||
early_stopping_rounds=early_stopping_rounds,
|
||||
verbose_eval=False,
|
||||
)
|
||||
dump = booster.get_dump(dump_format="json")
|
||||
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
|
||||
|
||||
def test_early_stopping_customize(self):
|
||||
D_train = xgb.DMatrix(self.X_train, self.y_train)
|
||||
D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
|
||||
early_stopping_rounds = 5
|
||||
early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
|
||||
metric_name='CustomErr',
|
||||
data_name='Train')
|
||||
early_stop = xgb.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, metric_name="CustomErr", data_name="Train"
|
||||
)
|
||||
# Specify which dataset and which metric should be used for early stopping.
|
||||
booster = xgb.train(
|
||||
{'objective': 'binary:logistic',
|
||||
'eval_metric': ['error', 'rmse'],
|
||||
'tree_method': 'hist'}, D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
{
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["error", "rmse"],
|
||||
"tree_method": "hist",
|
||||
},
|
||||
D_train,
|
||||
evals=[(D_train, "Train"), (D_valid, "Valid")],
|
||||
feval=tm.eval_error_metric,
|
||||
num_boost_round=1000,
|
||||
callbacks=[early_stop],
|
||||
verbose_eval=False)
|
||||
dump = booster.get_dump(dump_format='json')
|
||||
verbose_eval=False,
|
||||
)
|
||||
dump = booster.get_dump(dump_format="json")
|
||||
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
|
||||
assert len(early_stop.stopping_history['Train']['CustomErr']) == len(dump)
|
||||
assert len(early_stop.stopping_history["Train"]["CustomErr"]) == len(dump)
|
||||
|
||||
rounds = 100
|
||||
early_stop = xgb.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds,
|
||||
metric_name='CustomErr',
|
||||
data_name='Train',
|
||||
metric_name="CustomErr",
|
||||
data_name="Train",
|
||||
min_delta=100,
|
||||
save_best=True,
|
||||
)
|
||||
booster = xgb.train(
|
||||
{
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': ['error', 'rmse'],
|
||||
'tree_method': 'hist'
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["error", "rmse"],
|
||||
"tree_method": "hist",
|
||||
},
|
||||
D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
evals=[(D_train, "Train"), (D_valid, "Valid")],
|
||||
feval=tm.eval_error_metric,
|
||||
num_boost_round=rounds,
|
||||
callbacks=[early_stop],
|
||||
verbose_eval=False
|
||||
verbose_eval=False,
|
||||
)
|
||||
# No iteration can be made with min_delta == 100
|
||||
assert booster.best_iteration == 0
|
||||
@@ -166,18 +181,20 @@ class TestCallbacks:
|
||||
|
||||
def test_early_stopping_skl(self):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
early_stopping_rounds = 5
|
||||
cls = xgb.XGBClassifier(
|
||||
early_stopping_rounds=early_stopping_rounds, eval_metric='error'
|
||||
early_stopping_rounds=early_stopping_rounds, eval_metric="error"
|
||||
)
|
||||
cls.fit(X, y, eval_set=[(X, y)])
|
||||
booster = cls.get_booster()
|
||||
dump = booster.get_dump(dump_format='json')
|
||||
dump = booster.get_dump(dump_format="json")
|
||||
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
|
||||
|
||||
def test_early_stopping_custom_eval_skl(self):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
early_stopping_rounds = 5
|
||||
early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds)
|
||||
@@ -186,11 +203,12 @@ class TestCallbacks:
|
||||
)
|
||||
cls.fit(X, y, eval_set=[(X, y)])
|
||||
booster = cls.get_booster()
|
||||
dump = booster.get_dump(dump_format='json')
|
||||
dump = booster.get_dump(dump_format="json")
|
||||
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
|
||||
|
||||
def test_early_stopping_save_best_model(self):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
n_estimators = 100
|
||||
early_stopping_rounds = 5
|
||||
@@ -200,11 +218,11 @@ class TestCallbacks:
|
||||
cls = xgb.XGBClassifier(
|
||||
n_estimators=n_estimators,
|
||||
eval_metric=tm.eval_error_metric_skl,
|
||||
callbacks=[early_stop]
|
||||
callbacks=[early_stop],
|
||||
)
|
||||
cls.fit(X, y, eval_set=[(X, y)])
|
||||
booster = cls.get_booster()
|
||||
dump = booster.get_dump(dump_format='json')
|
||||
dump = booster.get_dump(dump_format="json")
|
||||
assert len(dump) == booster.best_iteration + 1
|
||||
|
||||
early_stop = xgb.callback.EarlyStopping(
|
||||
@@ -220,8 +238,9 @@ class TestCallbacks:
|
||||
cls.fit(X, y, eval_set=[(X, y)])
|
||||
|
||||
# No error
|
||||
early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
|
||||
save_best=False)
|
||||
early_stop = xgb.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=False
|
||||
)
|
||||
xgb.XGBClassifier(
|
||||
booster="gblinear",
|
||||
n_estimators=10,
|
||||
@@ -231,14 +250,17 @@ class TestCallbacks:
|
||||
|
||||
def test_early_stopping_continuation(self):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
cls = xgb.XGBClassifier(eval_metric=tm.eval_error_metric_skl)
|
||||
|
||||
early_stopping_rounds = 5
|
||||
early_stop = xgb.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=True
|
||||
)
|
||||
with pytest.warns(UserWarning):
|
||||
cls.fit(X, y, eval_set=[(X, y)], callbacks=[early_stop])
|
||||
cls = xgb.XGBClassifier(
|
||||
eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]
|
||||
)
|
||||
cls.fit(X, y, eval_set=[(X, y)])
|
||||
|
||||
booster = cls.get_booster()
|
||||
assert booster.num_boosted_rounds() == booster.best_iteration + 1
|
||||
@@ -256,21 +278,10 @@ class TestCallbacks:
|
||||
)
|
||||
cls.fit(X, y, eval_set=[(X, y)])
|
||||
booster = cls.get_booster()
|
||||
assert booster.num_boosted_rounds() == \
|
||||
booster.best_iteration + early_stopping_rounds + 1
|
||||
|
||||
def test_deprecated(self):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
early_stopping_rounds = 5
|
||||
early_stop = xgb.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=True
|
||||
)
|
||||
clf = xgb.XGBClassifier(
|
||||
eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]
|
||||
)
|
||||
with pytest.raises(ValueError, match=r".*set_params.*"):
|
||||
clf.fit(X, y, eval_set=[(X, y)], callbacks=[early_stop])
|
||||
assert (
|
||||
booster.num_boosted_rounds()
|
||||
== booster.best_iteration + early_stopping_rounds + 1
|
||||
)
|
||||
|
||||
def run_eta_decay(self, tree_method):
|
||||
"""Test learning rate scheduler, used by both CPU and GPU tests."""
|
||||
@@ -343,7 +354,7 @@ class TestCallbacks:
|
||||
callbacks=[scheduler([0, 0, 0, 0])],
|
||||
evals_result=evals_result,
|
||||
)
|
||||
eval_errors_2 = list(map(float, evals_result['eval']['error']))
|
||||
eval_errors_2 = list(map(float, evals_result["eval"]["error"]))
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
# validation error should not decrease, if eta/learning_rate = 0
|
||||
assert eval_errors_2[0] == eval_errors_2[-1]
|
||||
@@ -361,7 +372,7 @@ class TestCallbacks:
|
||||
callbacks=[scheduler(eta_decay)],
|
||||
evals_result=evals_result,
|
||||
)
|
||||
eval_errors_3 = list(map(float, evals_result['eval']['error']))
|
||||
eval_errors_3 = list(map(float, evals_result["eval"]["error"]))
|
||||
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user