Add period to evaluation monitor. (#6348)

This commit is contained in:
Jiaming Yuan 2020-11-10 07:47:48 +08:00 committed by GitHub
parent d411f98d26
commit 184e2eac7d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 72 additions and 40 deletions

View File

@ -583,12 +583,18 @@ class EvaluationMonitor(TrainingCallback):
Extra user defined metric.
rank : int
Which worker should be used for printing the result.
period : int
How many epoches between printing.
show_stdv : bool
Used in cv to show standard deviation. Users should not specify it.
'''
def __init__(self, rank=0, show_stdv=False):
def __init__(self, rank=0, period=1, show_stdv=False):
self.printer_rank = rank
self.show_stdv = show_stdv
self.period = period
assert period > 0
# last error message, useful when early stopping and period are used together.
self._lastest = None
super().__init__()
def _fmt_metric(self, data, metric, score, std):
@ -601,6 +607,7 @@ class EvaluationMonitor(TrainingCallback):
def after_iteration(self, model, epoch, evals_log):
if not evals_log:
return False
msg = f'[{epoch}]'
if rabit.get_rank() == self.printer_rank:
for data, metric in evals_log.items():
@ -613,9 +620,20 @@ class EvaluationMonitor(TrainingCallback):
stdv = None
msg += self._fmt_metric(data, metric_name, score, stdv)
msg += '\n'
rabit.tracker_print(msg)
if (epoch % self.period) != 0:
rabit.tracker_print(msg)
self._lastest = None
else:
# There is skipped message
self._lastest = msg
return False
def after_training(self, model):
if rabit.get_rank() == self.printer_rank and self._lastest is not None:
rabit.tracker_print(self._lastest)
return model
class TrainingCheckPoint(TrainingCallback):
'''Checkpointing operation.

View File

@ -92,7 +92,8 @@ def _train_internal(params, dtrain,
assert all(isinstance(c, callback.TrainingCallback)
for c in callbacks), "You can't mix new and old callback styles."
if verbose_eval:
callbacks.append(callback.EvaluationMonitor())
verbose_eval = 1 if verbose_eval is True else verbose_eval
callbacks.append(callback.EvaluationMonitor(period=verbose_eval))
if early_stopping_rounds:
callbacks.append(callback.EarlyStopping(
rounds=early_stopping_rounds, maximize=maximize))
@ -485,7 +486,9 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
assert all(isinstance(c, callback.TrainingCallback)
for c in callbacks), "You can't mix new and old callback styles."
if isinstance(verbose_eval, bool) and verbose_eval:
callbacks.append(callback.EvaluationMonitor(show_stdv=show_stdv))
verbose_eval = 1 if verbose_eval is True else verbose_eval
callbacks.append(callback.EvaluationMonitor(period=verbose_eval,
show_stdv=show_stdv))
if early_stopping_rounds:
callbacks.append(callback.EarlyStopping(
rounds=early_stopping_rounds, maximize=maximize))

View File

@ -11,7 +11,7 @@ import sys
from test_gpu_pickling import build_dataset, model_path, load_pickle
sys.path.append("tests/python")
import test_basic as tb
import testing as tm
class TestLoadPickle(unittest.TestCase):
@ -61,7 +61,7 @@ class TestLoadPickle(unittest.TestCase):
rng = np.random.RandomState(1994)
X = rng.randn(10, 10)
y = rng.randn(10)
with tb.captured_output() as (out, err):
with tm.captured_output() as (out, err):
# Test no thrust exception is thrown
with pytest.raises(xgb.core.XGBoostError):
xgb.train({'tree_method': 'gpu_hist'}, xgb.DMatrix(X, y))

View File

@ -7,13 +7,12 @@ import os
import sys
import json
import pytest
import xgboost as xgb
from xgboost import XGBClassifier
sys.path.append("tests/python")
import testing as tm
import xgboost as xgb
from xgboost import XGBClassifier
model_path = './model.pkl'

View File

@ -1,7 +1,4 @@
# -*- coding: utf-8 -*-
import sys
from contextlib import contextmanager
from io import StringIO
import numpy as np
import os
import xgboost as xgb
@ -9,29 +6,12 @@ import unittest
import json
from pathlib import Path
import tempfile
import testing as tm
dpath = 'demo/data/'
rng = np.random.RandomState(1994)
@contextmanager
def captured_output():
"""Reassign stdout temporarily in order to test printed statements
Taken from:
https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
Also works for pytest.
"""
new_out, new_err = StringIO(), StringIO()
old_out, old_err = sys.stdout, sys.stderr
try:
sys.stdout, sys.stderr = new_out, new_err
yield sys.stdout, sys.stderr
finally:
sys.stdout, sys.stderr = old_out, old_err
class TestBasic(unittest.TestCase):
def test_compat(self):
from xgboost.compat import lazy_isinstance
@ -181,7 +161,6 @@ class TestBasic(unittest.TestCase):
assert dm.num_row() == row
assert dm.num_col() == cols
def test_cv(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
@ -236,7 +215,7 @@ class TestBasic(unittest.TestCase):
print([fold.dtest.get_label() for fold in cbackenv.cvfolds])
# Run cross validation and capture standard out to test callback result
with captured_output() as (out, err):
with tm.captured_output() as (out, err):
xgb.cv(
params, dm, num_boost_round=1, folds=folds, callbacks=[cb],
as_pandas=False
@ -257,7 +236,6 @@ class TestBasicPathLike(unittest.TestCase):
assert dtrain.num_row() == 6513
assert dtrain.num_col() == 127
def test_DMatrix_save_to_path(self):
"""Saving to a binary file using pathlib from a DMatrix."""
data = np.random.randn(100, 2)

View File

@ -34,10 +34,27 @@ class TestCallbacks(unittest.TestCase):
num_boost_round=rounds,
evals_result=evals_result,
verbose_eval=True)
print('evals_result:', evals_result)
assert len(evals_result['Train']['error']) == rounds
assert len(evals_result['Valid']['error']) == rounds
with tm.captured_output() as (out, err):
xgb.train({'objective': 'binary:logistic',
'eval_metric': 'error'}, D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
num_boost_round=rounds,
evals_result=evals_result,
verbose_eval=2)
output: str = out.getvalue().strip()
pos = 0
msg = 'Train-error'
for i in range(rounds // 2):
pos = output.find('Train-error', pos)
assert pos != -1
pos += len(msg)
assert output.find('Train-error', pos) == -1
def test_early_stopping(self):
D_train = xgb.DMatrix(self.X_train, self.y_train)
D_valid = xgb.DMatrix(self.X_valid, self.y_valid)

View File

@ -2,7 +2,6 @@ import collections
import importlib.util
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBoostLabelEncoder
import testing as tm
import tempfile
import os
@ -11,8 +10,6 @@ import pytest
import unittest
import json
from test_basic import captured_output
rng = np.random.RandomState(1994)
pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@ -872,7 +869,7 @@ def test_parameter_validation():
reg = xgb.XGBRegressor(foo='bar', verbosity=1)
X = np.random.randn(10, 10)
y = np.random.randn(10)
with captured_output() as (out, err):
with tm.captured_output() as (out, err):
reg.fit(X, y)
output = out.getvalue().strip()
@ -882,7 +879,7 @@ def test_parameter_validation():
importance_type='gain', verbosity=1)
X = np.random.randn(10, 10)
y = np.random.randn(10)
with captured_output() as (out, err):
with tm.captured_output() as (out, err):
reg.fit(X, y)
output = out.getvalue().strip()

View File

@ -1,6 +1,8 @@
# coding: utf-8
import os
import platform
import sys
from contextlib import contextmanager
from io import StringIO
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
from xgboost.compat import DASK_INSTALLED
import pytest
@ -281,6 +283,24 @@ class DirectoryExcursion:
os.remove(f)
@contextmanager
def captured_output():
"""Reassign stdout temporarily in order to test printed statements
Taken from:
https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
Also works for pytest.
"""
new_out, new_err = StringIO(), StringIO()
old_out, old_err = sys.stdout, sys.stderr
try:
sys.stdout, sys.stderr = new_out, new_err
yield sys.stdout, sys.stderr
finally:
sys.stdout, sys.stderr = old_out, old_err
CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
PROJECT_ROOT = os.path.normpath(
os.path.join(CURDIR, os.path.pardir, os.path.pardir))