From 184e2eac7dd7b1df324ab5c7fca32c810981fd8b Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 10 Nov 2020 07:47:48 +0800 Subject: [PATCH] Add period to evaluation monitor. (#6348) --- python-package/xgboost/callback.py | 22 ++++++++++++++++++++-- python-package/xgboost/training.py | 7 +++++-- tests/python-gpu/load_pickle.py | 4 ++-- tests/python-gpu/test_gpu_pickling.py | 5 ++--- tests/python/test_basic.py | 26 ++------------------------ tests/python/test_callback.py | 19 ++++++++++++++++++- tests/python/test_with_sklearn.py | 7 ++----- tests/python/testing.py | 22 +++++++++++++++++++++- 8 files changed, 72 insertions(+), 40 deletions(-) diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py index 7c8bdaff3..b69ada690 100644 --- a/python-package/xgboost/callback.py +++ b/python-package/xgboost/callback.py @@ -583,12 +583,18 @@ class EvaluationMonitor(TrainingCallback): Extra user defined metric. rank : int Which worker should be used for printing the result. + period : int + How many epoches between printing. show_stdv : bool Used in cv to show standard deviation. Users should not specify it. ''' - def __init__(self, rank=0, show_stdv=False): + def __init__(self, rank=0, period=1, show_stdv=False): self.printer_rank = rank self.show_stdv = show_stdv + self.period = period + assert period > 0 + # last error message, useful when early stopping and period are used together. + self._lastest = None super().__init__() def _fmt_metric(self, data, metric, score, std): @@ -601,6 +607,7 @@ class EvaluationMonitor(TrainingCallback): def after_iteration(self, model, epoch, evals_log): if not evals_log: return False + msg = f'[{epoch}]' if rabit.get_rank() == self.printer_rank: for data, metric in evals_log.items(): @@ -613,9 +620,20 @@ class EvaluationMonitor(TrainingCallback): stdv = None msg += self._fmt_metric(data, metric_name, score, stdv) msg += '\n' - rabit.tracker_print(msg) + + if (epoch % self.period) != 0: + rabit.tracker_print(msg) + self._lastest = None + else: + # There is skipped message + self._lastest = msg return False + def after_training(self, model): + if rabit.get_rank() == self.printer_rank and self._lastest is not None: + rabit.tracker_print(self._lastest) + return model + class TrainingCheckPoint(TrainingCallback): '''Checkpointing operation. diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index cd7097832..c13c8dc70 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -92,7 +92,8 @@ def _train_internal(params, dtrain, assert all(isinstance(c, callback.TrainingCallback) for c in callbacks), "You can't mix new and old callback styles." if verbose_eval: - callbacks.append(callback.EvaluationMonitor()) + verbose_eval = 1 if verbose_eval is True else verbose_eval + callbacks.append(callback.EvaluationMonitor(period=verbose_eval)) if early_stopping_rounds: callbacks.append(callback.EarlyStopping( rounds=early_stopping_rounds, maximize=maximize)) @@ -485,7 +486,9 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None assert all(isinstance(c, callback.TrainingCallback) for c in callbacks), "You can't mix new and old callback styles." if isinstance(verbose_eval, bool) and verbose_eval: - callbacks.append(callback.EvaluationMonitor(show_stdv=show_stdv)) + verbose_eval = 1 if verbose_eval is True else verbose_eval + callbacks.append(callback.EvaluationMonitor(period=verbose_eval, + show_stdv=show_stdv)) if early_stopping_rounds: callbacks.append(callback.EarlyStopping( rounds=early_stopping_rounds, maximize=maximize)) diff --git a/tests/python-gpu/load_pickle.py b/tests/python-gpu/load_pickle.py index 2a75e612d..d36a26264 100644 --- a/tests/python-gpu/load_pickle.py +++ b/tests/python-gpu/load_pickle.py @@ -11,7 +11,7 @@ import sys from test_gpu_pickling import build_dataset, model_path, load_pickle sys.path.append("tests/python") -import test_basic as tb +import testing as tm class TestLoadPickle(unittest.TestCase): @@ -61,7 +61,7 @@ class TestLoadPickle(unittest.TestCase): rng = np.random.RandomState(1994) X = rng.randn(10, 10) y = rng.randn(10) - with tb.captured_output() as (out, err): + with tm.captured_output() as (out, err): # Test no thrust exception is thrown with pytest.raises(xgb.core.XGBoostError): xgb.train({'tree_method': 'gpu_hist'}, xgb.DMatrix(X, y)) diff --git a/tests/python-gpu/test_gpu_pickling.py b/tests/python-gpu/test_gpu_pickling.py index b0aacea90..049ace98f 100644 --- a/tests/python-gpu/test_gpu_pickling.py +++ b/tests/python-gpu/test_gpu_pickling.py @@ -7,13 +7,12 @@ import os import sys import json import pytest +import xgboost as xgb +from xgboost import XGBClassifier sys.path.append("tests/python") import testing as tm -import xgboost as xgb -from xgboost import XGBClassifier - model_path = './model.pkl' diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py index 1e3505788..e0af63d49 100644 --- a/tests/python/test_basic.py +++ b/tests/python/test_basic.py @@ -1,7 +1,4 @@ # -*- coding: utf-8 -*- -import sys -from contextlib import contextmanager -from io import StringIO import numpy as np import os import xgboost as xgb @@ -9,29 +6,12 @@ import unittest import json from pathlib import Path import tempfile +import testing as tm dpath = 'demo/data/' rng = np.random.RandomState(1994) -@contextmanager -def captured_output(): - """Reassign stdout temporarily in order to test printed statements - Taken from: - https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python - - Also works for pytest. - - """ - new_out, new_err = StringIO(), StringIO() - old_out, old_err = sys.stdout, sys.stderr - try: - sys.stdout, sys.stderr = new_out, new_err - yield sys.stdout, sys.stderr - finally: - sys.stdout, sys.stderr = old_out, old_err - - class TestBasic(unittest.TestCase): def test_compat(self): from xgboost.compat import lazy_isinstance @@ -181,7 +161,6 @@ class TestBasic(unittest.TestCase): assert dm.num_row() == row assert dm.num_col() == cols - def test_cv(self): dm = xgb.DMatrix(dpath + 'agaricus.txt.train') params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, @@ -236,7 +215,7 @@ class TestBasic(unittest.TestCase): print([fold.dtest.get_label() for fold in cbackenv.cvfolds]) # Run cross validation and capture standard out to test callback result - with captured_output() as (out, err): + with tm.captured_output() as (out, err): xgb.cv( params, dm, num_boost_round=1, folds=folds, callbacks=[cb], as_pandas=False @@ -257,7 +236,6 @@ class TestBasicPathLike(unittest.TestCase): assert dtrain.num_row() == 6513 assert dtrain.num_col() == 127 - def test_DMatrix_save_to_path(self): """Saving to a binary file using pathlib from a DMatrix.""" data = np.random.randn(100, 2) diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py index b12354556..114f1203c 100644 --- a/tests/python/test_callback.py +++ b/tests/python/test_callback.py @@ -34,10 +34,27 @@ class TestCallbacks(unittest.TestCase): num_boost_round=rounds, evals_result=evals_result, verbose_eval=True) - print('evals_result:', evals_result) assert len(evals_result['Train']['error']) == rounds assert len(evals_result['Valid']['error']) == rounds + with tm.captured_output() as (out, err): + xgb.train({'objective': 'binary:logistic', + 'eval_metric': 'error'}, D_train, + evals=[(D_train, 'Train'), (D_valid, 'Valid')], + num_boost_round=rounds, + evals_result=evals_result, + verbose_eval=2) + output: str = out.getvalue().strip() + + pos = 0 + msg = 'Train-error' + for i in range(rounds // 2): + pos = output.find('Train-error', pos) + assert pos != -1 + pos += len(msg) + + assert output.find('Train-error', pos) == -1 + def test_early_stopping(self): D_train = xgb.DMatrix(self.X_train, self.y_train) D_valid = xgb.DMatrix(self.X_valid, self.y_valid) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 7ffcb67b8..1514517f8 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -2,7 +2,6 @@ import collections import importlib.util import numpy as np import xgboost as xgb -from xgboost.sklearn import XGBoostLabelEncoder import testing as tm import tempfile import os @@ -11,8 +10,6 @@ import pytest import unittest import json -from test_basic import captured_output - rng = np.random.RandomState(1994) pytestmark = pytest.mark.skipif(**tm.no_sklearn()) @@ -872,7 +869,7 @@ def test_parameter_validation(): reg = xgb.XGBRegressor(foo='bar', verbosity=1) X = np.random.randn(10, 10) y = np.random.randn(10) - with captured_output() as (out, err): + with tm.captured_output() as (out, err): reg.fit(X, y) output = out.getvalue().strip() @@ -882,7 +879,7 @@ def test_parameter_validation(): importance_type='gain', verbosity=1) X = np.random.randn(10, 10) y = np.random.randn(10) - with captured_output() as (out, err): + with tm.captured_output() as (out, err): reg.fit(X, y) output = out.getvalue().strip() diff --git a/tests/python/testing.py b/tests/python/testing.py index f4b8654e3..feb5f4a07 100644 --- a/tests/python/testing.py +++ b/tests/python/testing.py @@ -1,6 +1,8 @@ # coding: utf-8 import os -import platform +import sys +from contextlib import contextmanager +from io import StringIO from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED from xgboost.compat import DASK_INSTALLED import pytest @@ -281,6 +283,24 @@ class DirectoryExcursion: os.remove(f) +@contextmanager +def captured_output(): + """Reassign stdout temporarily in order to test printed statements + Taken from: + https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python + + Also works for pytest. + + """ + new_out, new_err = StringIO(), StringIO() + old_out, old_err = sys.stdout, sys.stderr + try: + sys.stdout, sys.stderr = new_out, new_err + yield sys.stdout, sys.stderr + finally: + sys.stdout, sys.stderr = old_out, old_err + + CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) PROJECT_ROOT = os.path.normpath( os.path.join(CURDIR, os.path.pardir, os.path.pardir))