Fix learning rate scheduler with cv. (#6720)
* Expose more methods in cvpack and packed booster. * Fix cv context in deprecated callbacks. * Fix document.
This commit is contained in:
parent
9c8523432a
commit
a9b4a95225
@ -20,6 +20,8 @@ def _get_callback_context(env):
|
|||||||
context = 'train'
|
context = 'train'
|
||||||
elif env.model is None and env.cvfolds is not None:
|
elif env.model is None and env.cvfolds is not None:
|
||||||
context = 'cv'
|
context = 'cv'
|
||||||
|
else:
|
||||||
|
raise ValueError("Unexpected input with both model and cvfolds.")
|
||||||
return context
|
return context
|
||||||
|
|
||||||
|
|
||||||
@ -751,7 +753,7 @@ class LegacyCallbacks:
|
|||||||
'''Called before each iteration.'''
|
'''Called before each iteration.'''
|
||||||
for cb in self.callbacks_before_iter:
|
for cb in self.callbacks_before_iter:
|
||||||
rank = rabit.get_rank()
|
rank = rabit.get_rank()
|
||||||
cb(CallbackEnv(model=model,
|
cb(CallbackEnv(model=None if self.cvfolds is not None else model,
|
||||||
cvfolds=self.cvfolds,
|
cvfolds=self.cvfolds,
|
||||||
iteration=epoch,
|
iteration=epoch,
|
||||||
begin_iteration=self.start_iteration,
|
begin_iteration=self.start_iteration,
|
||||||
@ -764,6 +766,7 @@ class LegacyCallbacks:
|
|||||||
'''Called after each iteration.'''
|
'''Called after each iteration.'''
|
||||||
evaluation_result_list = []
|
evaluation_result_list = []
|
||||||
if self.cvfolds is not None:
|
if self.cvfolds is not None:
|
||||||
|
# dtrain is not used here.
|
||||||
scores = model.eval(epoch, self.feval)
|
scores = model.eval(epoch, self.feval)
|
||||||
self.aggregated_cv = _aggcv(scores)
|
self.aggregated_cv = _aggcv(scores)
|
||||||
evaluation_result_list = self.aggregated_cv
|
evaluation_result_list = self.aggregated_cv
|
||||||
@ -782,7 +785,7 @@ class LegacyCallbacks:
|
|||||||
try:
|
try:
|
||||||
for cb in self.callbacks_after_iter:
|
for cb in self.callbacks_after_iter:
|
||||||
rank = rabit.get_rank()
|
rank = rabit.get_rank()
|
||||||
cb(CallbackEnv(model=model,
|
cb(CallbackEnv(model=None if self.cvfolds is not None else model,
|
||||||
cvfolds=self.cvfolds,
|
cvfolds=self.cvfolds,
|
||||||
iteration=epoch,
|
iteration=epoch,
|
||||||
begin_iteration=self.start_iteration,
|
begin_iteration=self.start_iteration,
|
||||||
|
|||||||
@ -180,7 +180,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
|||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
[xgb.callback.reset_learning_rate(custom_rates)]
|
[xgb.callback.LearningRateScheduler(custom_rates)]
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@ -207,6 +207,11 @@ class CVPack(object):
|
|||||||
self.watchlist = [(dtrain, 'train'), (dtest, 'test')]
|
self.watchlist = [(dtrain, 'train'), (dtest, 'test')]
|
||||||
self.bst = Booster(param, [dtrain, dtest])
|
self.bst = Booster(param, [dtrain, dtest])
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
def _inner(*args, **kwargs):
|
||||||
|
return getattr(self.bst, name)(*args, **kwargs)
|
||||||
|
return _inner
|
||||||
|
|
||||||
def update(self, iteration, fobj):
|
def update(self, iteration, fobj):
|
||||||
""""Update the boosters for one iteration"""
|
""""Update the boosters for one iteration"""
|
||||||
self.bst.update(self.dtrain, iteration, fobj)
|
self.bst.update(self.dtrain, iteration, fobj)
|
||||||
@ -239,15 +244,24 @@ class _PackedBooster:
|
|||||||
'''Redirect to booster attr.'''
|
'''Redirect to booster attr.'''
|
||||||
return self.cvfolds[0].bst.attr(key)
|
return self.cvfolds[0].bst.attr(key)
|
||||||
|
|
||||||
|
def set_param(self, params, value=None):
|
||||||
|
"""Iterate through folds for set_param"""
|
||||||
|
for f in self.cvfolds:
|
||||||
|
f.bst.set_param(params, value)
|
||||||
|
|
||||||
|
def num_boosted_rounds(self):
|
||||||
|
'''Number of boosted rounds.'''
|
||||||
|
return self.cvfolds[0].num_boosted_rounds()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def best_iteration(self):
|
def best_iteration(self):
|
||||||
'''Get best_iteration'''
|
'''Get best_iteration'''
|
||||||
ret = self.cvfolds[0].bst.attr('best_iteration')
|
return int(self.cvfolds[0].bst.attr("best_iteration"))
|
||||||
return int(ret)
|
|
||||||
|
|
||||||
def num_boosted_rounds(self) -> int:
|
@property
|
||||||
'''Number of boosted rounds.'''
|
def best_score(self):
|
||||||
return self.cvfolds[0].bst.num_boosted_rounds()
|
"""Get best_score."""
|
||||||
|
return float(self.cvfolds[0].bst.attr("best_score"))
|
||||||
|
|
||||||
|
|
||||||
def groups_to_rows(groups, boundaries):
|
def groups_to_rows(groups, boundaries):
|
||||||
@ -419,7 +433,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
|||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
[xgb.callback.reset_learning_rate(custom_rates)]
|
[xgb.callback.LearningRateScheduler(custom_rates)]
|
||||||
shuffle : bool
|
shuffle : bool
|
||||||
Shuffle data before creating folds.
|
Shuffle data before creating folds.
|
||||||
|
|
||||||
|
|||||||
@ -206,6 +206,7 @@ class TestCallbacks:
|
|||||||
booster.best_iteration + early_stopping_rounds + 1
|
booster.best_iteration + early_stopping_rounds + 1
|
||||||
|
|
||||||
def run_eta_decay(self, tree_method, deprecated_callback):
|
def run_eta_decay(self, tree_method, deprecated_callback):
|
||||||
|
"""Test learning rate scheduler, used by both CPU and GPU tests."""
|
||||||
if deprecated_callback:
|
if deprecated_callback:
|
||||||
scheduler = xgb.callback.reset_learning_rate
|
scheduler = xgb.callback.reset_learning_rate
|
||||||
else:
|
else:
|
||||||
@ -217,7 +218,10 @@ class TestCallbacks:
|
|||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
num_round = 4
|
num_round = 4
|
||||||
|
|
||||||
warning_check = pytest.warns(UserWarning) if deprecated_callback else tm.noop_context()
|
if deprecated_callback:
|
||||||
|
warning_check = pytest.warns(UserWarning)
|
||||||
|
else:
|
||||||
|
warning_check = tm.noop_context()
|
||||||
|
|
||||||
# learning_rates as a list
|
# learning_rates as a list
|
||||||
# init eta with 0 to check whether learning_rates work
|
# init eta with 0 to check whether learning_rates work
|
||||||
@ -288,17 +292,22 @@ class TestCallbacks:
|
|||||||
for i in range(1, len(eval_errors_0)):
|
for i in range(1, len(eval_errors_0)):
|
||||||
assert eval_errors_3[i] != eval_errors_2[i]
|
assert eval_errors_3[i] != eval_errors_2[i]
|
||||||
|
|
||||||
def test_eta_decay_hist(self):
|
with warning_check:
|
||||||
self.run_eta_decay('hist', True)
|
xgb.cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)])
|
||||||
self.run_eta_decay('hist', False)
|
|
||||||
|
|
||||||
def test_eta_decay_approx(self):
|
@pytest.mark.parametrize(
|
||||||
self.run_eta_decay('approx', True)
|
"tree_method, deprecated_callback",
|
||||||
self.run_eta_decay('approx', False)
|
[
|
||||||
|
("hist", True),
|
||||||
def test_eta_decay_exact(self):
|
("hist", False),
|
||||||
self.run_eta_decay('exact', True)
|
("approx", True),
|
||||||
self.run_eta_decay('exact', False)
|
("approx", False),
|
||||||
|
("exact", True),
|
||||||
|
("exact", False),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_eta_decay(self, tree_method, deprecated_callback):
|
||||||
|
self.run_eta_decay(tree_method, deprecated_callback)
|
||||||
|
|
||||||
def test_check_point(self):
|
def test_check_point(self):
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user