From a9b4a9522579d89e9bed2d7b011dd92a9a20b721 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 28 Feb 2021 13:57:42 +0800 Subject: [PATCH] Fix learning rate scheduler with cv. (#6720) * Expose more methods in cvpack and packed booster. * Fix cv context in deprecated callbacks. * Fix document. --- python-package/xgboost/callback.py | 7 +++++-- python-package/xgboost/training.py | 28 ++++++++++++++++++++------- tests/python/test_callback.py | 31 +++++++++++++++++++----------- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py index 3c66ccfb8..0be6a9a37 100644 --- a/python-package/xgboost/callback.py +++ b/python-package/xgboost/callback.py @@ -20,6 +20,8 @@ def _get_callback_context(env): context = 'train' elif env.model is None and env.cvfolds is not None: context = 'cv' + else: + raise ValueError("Unexpected input with both model and cvfolds.") return context @@ -751,7 +753,7 @@ class LegacyCallbacks: '''Called before each iteration.''' for cb in self.callbacks_before_iter: rank = rabit.get_rank() - cb(CallbackEnv(model=model, + cb(CallbackEnv(model=None if self.cvfolds is not None else model, cvfolds=self.cvfolds, iteration=epoch, begin_iteration=self.start_iteration, @@ -764,6 +766,7 @@ class LegacyCallbacks: '''Called after each iteration.''' evaluation_result_list = [] if self.cvfolds is not None: + # dtrain is not used here. scores = model.eval(epoch, self.feval) self.aggregated_cv = _aggcv(scores) evaluation_result_list = self.aggregated_cv @@ -782,7 +785,7 @@ class LegacyCallbacks: try: for cb in self.callbacks_after_iter: rank = rabit.get_rank() - cb(CallbackEnv(model=model, + cb(CallbackEnv(model=None if self.cvfolds is not None else model, cvfolds=self.cvfolds, iteration=epoch, begin_iteration=self.start_iteration, diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index ef488baa6..65b4303c5 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -180,7 +180,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, .. code-block:: python - [xgb.callback.reset_learning_rate(custom_rates)] + [xgb.callback.LearningRateScheduler(custom_rates)] Returns ------- @@ -207,6 +207,11 @@ class CVPack(object): self.watchlist = [(dtrain, 'train'), (dtest, 'test')] self.bst = Booster(param, [dtrain, dtest]) + def __getattr__(self, name): + def _inner(*args, **kwargs): + return getattr(self.bst, name)(*args, **kwargs) + return _inner + def update(self, iteration, fobj): """"Update the boosters for one iteration""" self.bst.update(self.dtrain, iteration, fobj) @@ -239,15 +244,24 @@ class _PackedBooster: '''Redirect to booster attr.''' return self.cvfolds[0].bst.attr(key) + def set_param(self, params, value=None): + """Iterate through folds for set_param""" + for f in self.cvfolds: + f.bst.set_param(params, value) + + def num_boosted_rounds(self): + '''Number of boosted rounds.''' + return self.cvfolds[0].num_boosted_rounds() + @property def best_iteration(self): '''Get best_iteration''' - ret = self.cvfolds[0].bst.attr('best_iteration') - return int(ret) + return int(self.cvfolds[0].bst.attr("best_iteration")) - def num_boosted_rounds(self) -> int: - '''Number of boosted rounds.''' - return self.cvfolds[0].bst.num_boosted_rounds() + @property + def best_score(self): + """Get best_score.""" + return float(self.cvfolds[0].bst.attr("best_score")) def groups_to_rows(groups, boundaries): @@ -419,7 +433,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None .. code-block:: python - [xgb.callback.reset_learning_rate(custom_rates)] + [xgb.callback.LearningRateScheduler(custom_rates)] shuffle : bool Shuffle data before creating folds. diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py index d1b7f17ab..e9214822e 100644 --- a/tests/python/test_callback.py +++ b/tests/python/test_callback.py @@ -206,6 +206,7 @@ class TestCallbacks: booster.best_iteration + early_stopping_rounds + 1 def run_eta_decay(self, tree_method, deprecated_callback): + """Test learning rate scheduler, used by both CPU and GPU tests.""" if deprecated_callback: scheduler = xgb.callback.reset_learning_rate else: @@ -217,7 +218,10 @@ class TestCallbacks: watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 4 - warning_check = pytest.warns(UserWarning) if deprecated_callback else tm.noop_context() + if deprecated_callback: + warning_check = pytest.warns(UserWarning) + else: + warning_check = tm.noop_context() # learning_rates as a list # init eta with 0 to check whether learning_rates work @@ -288,17 +292,22 @@ class TestCallbacks: for i in range(1, len(eval_errors_0)): assert eval_errors_3[i] != eval_errors_2[i] - def test_eta_decay_hist(self): - self.run_eta_decay('hist', True) - self.run_eta_decay('hist', False) + with warning_check: + xgb.cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)]) - def test_eta_decay_approx(self): - self.run_eta_decay('approx', True) - self.run_eta_decay('approx', False) - - def test_eta_decay_exact(self): - self.run_eta_decay('exact', True) - self.run_eta_decay('exact', False) + @pytest.mark.parametrize( + "tree_method, deprecated_callback", + [ + ("hist", True), + ("hist", False), + ("approx", True), + ("approx", False), + ("exact", True), + ("exact", False), + ], + ) + def test_eta_decay(self, tree_method, deprecated_callback): + self.run_eta_decay(tree_method, deprecated_callback) def test_check_point(self): from sklearn.datasets import load_breast_cancer