From 184e2eac7dd7b1df324ab5c7fca32c810981fd8b Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Tue, 10 Nov 2020 07:47:48 +0800
Subject: [PATCH] Add period to evaluation monitor. (#6348)

---
 python-package/xgboost/callback.py    | 22 ++++++++++++++++++++--
 python-package/xgboost/training.py    |  7 +++++--
 tests/python-gpu/load_pickle.py       |  4 ++--
 tests/python-gpu/test_gpu_pickling.py |  5 ++---
 tests/python/test_basic.py            | 26 ++------------------------
 tests/python/test_callback.py         | 19 ++++++++++++++++++-
 tests/python/test_with_sklearn.py     |  7 ++-----
 tests/python/testing.py               | 22 +++++++++++++++++++++-
 8 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py
index 7c8bdaff3..b69ada690 100644
--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@@ -583,12 +583,18 @@ class EvaluationMonitor(TrainingCallback):
         Extra user defined metric.
     rank : int
         Which worker should be used for printing the result.
+    period : int
+        How many epoches between printing.
     show_stdv : bool
         Used in cv to show standard deviation.  Users should not specify it.
     '''
-    def __init__(self, rank=0, show_stdv=False):
+    def __init__(self, rank=0, period=1, show_stdv=False):
         self.printer_rank = rank
         self.show_stdv = show_stdv
+        self.period = period
+        assert period > 0
+        # last error message, useful when early stopping and period are used together.
+        self._lastest = None
         super().__init__()
 
     def _fmt_metric(self, data, metric, score, std):
@@ -601,6 +607,7 @@ class EvaluationMonitor(TrainingCallback):
     def after_iteration(self, model, epoch, evals_log):
         if not evals_log:
             return False
+
         msg = f'[{epoch}]'
         if rabit.get_rank() == self.printer_rank:
             for data, metric in evals_log.items():
@@ -613,9 +620,20 @@ class EvaluationMonitor(TrainingCallback):
                         stdv = None
                     msg += self._fmt_metric(data, metric_name, score, stdv)
             msg += '\n'
-            rabit.tracker_print(msg)
+
+            if (epoch % self.period) != 0:
+                rabit.tracker_print(msg)
+                self._lastest = None
+            else:
+                # There is skipped message
+                self._lastest = msg
         return False
 
+    def after_training(self, model):
+        if rabit.get_rank() == self.printer_rank and self._lastest is not None:
+            rabit.tracker_print(self._lastest)
+        return model
+
 
 class TrainingCheckPoint(TrainingCallback):
     '''Checkpointing operation.
diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py
index cd7097832..c13c8dc70 100644
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -92,7 +92,8 @@ def _train_internal(params, dtrain,
         assert all(isinstance(c, callback.TrainingCallback)
                    for c in callbacks), "You can't mix new and old callback styles."
         if verbose_eval:
-            callbacks.append(callback.EvaluationMonitor())
+            verbose_eval = 1 if verbose_eval is True else verbose_eval
+            callbacks.append(callback.EvaluationMonitor(period=verbose_eval))
         if early_stopping_rounds:
             callbacks.append(callback.EarlyStopping(
                 rounds=early_stopping_rounds, maximize=maximize))
@@ -485,7 +486,9 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
         assert all(isinstance(c, callback.TrainingCallback)
                    for c in callbacks), "You can't mix new and old callback styles."
         if isinstance(verbose_eval, bool) and verbose_eval:
-            callbacks.append(callback.EvaluationMonitor(show_stdv=show_stdv))
+            verbose_eval = 1 if verbose_eval is True else verbose_eval
+            callbacks.append(callback.EvaluationMonitor(period=verbose_eval,
+                                                        show_stdv=show_stdv))
         if early_stopping_rounds:
             callbacks.append(callback.EarlyStopping(
                 rounds=early_stopping_rounds, maximize=maximize))
diff --git a/tests/python-gpu/load_pickle.py b/tests/python-gpu/load_pickle.py
index 2a75e612d..d36a26264 100644
--- a/tests/python-gpu/load_pickle.py
+++ b/tests/python-gpu/load_pickle.py
@@ -11,7 +11,7 @@ import sys
 from test_gpu_pickling import build_dataset, model_path, load_pickle
 
 sys.path.append("tests/python")
-import test_basic as tb
+import testing as tm
 
 
 class TestLoadPickle(unittest.TestCase):
@@ -61,7 +61,7 @@ class TestLoadPickle(unittest.TestCase):
         rng = np.random.RandomState(1994)
         X = rng.randn(10, 10)
         y = rng.randn(10)
-        with tb.captured_output() as (out, err):
+        with tm.captured_output() as (out, err):
             # Test no thrust exception is thrown
             with pytest.raises(xgb.core.XGBoostError):
                 xgb.train({'tree_method': 'gpu_hist'}, xgb.DMatrix(X, y))
diff --git a/tests/python-gpu/test_gpu_pickling.py b/tests/python-gpu/test_gpu_pickling.py
index b0aacea90..049ace98f 100644
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@@ -7,13 +7,12 @@ import os
 import sys
 import json
 import pytest
+import xgboost as xgb
+from xgboost import XGBClassifier
 
 sys.path.append("tests/python")
 import testing as tm
 
-import xgboost as xgb
-from xgboost import XGBClassifier
-
 model_path = './model.pkl'
 
 
diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py
index 1e3505788..e0af63d49 100644
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -1,7 +1,4 @@
 # -*- coding: utf-8 -*-
-import sys
-from contextlib import contextmanager
-from io import StringIO
 import numpy as np
 import os
 import xgboost as xgb
@@ -9,29 +6,12 @@ import unittest
 import json
 from pathlib import Path
 import tempfile
+import testing as tm
 
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
 
 
-@contextmanager
-def captured_output():
-    """Reassign stdout temporarily in order to test printed statements
-    Taken from:
-    https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
-
-    Also works for pytest.
-
-    """
-    new_out, new_err = StringIO(), StringIO()
-    old_out, old_err = sys.stdout, sys.stderr
-    try:
-        sys.stdout, sys.stderr = new_out, new_err
-        yield sys.stdout, sys.stderr
-    finally:
-        sys.stdout, sys.stderr = old_out, old_err
-
-
 class TestBasic(unittest.TestCase):
     def test_compat(self):
         from xgboost.compat import lazy_isinstance
@@ -181,7 +161,6 @@ class TestBasic(unittest.TestCase):
             assert dm.num_row() == row
             assert dm.num_col() == cols
 
-
     def test_cv(self):
         dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
         params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
@@ -236,7 +215,7 @@ class TestBasic(unittest.TestCase):
             print([fold.dtest.get_label() for fold in cbackenv.cvfolds])
 
         # Run cross validation and capture standard out to test callback result
-        with captured_output() as (out, err):
+        with tm.captured_output() as (out, err):
             xgb.cv(
                 params, dm, num_boost_round=1, folds=folds, callbacks=[cb],
                 as_pandas=False
@@ -257,7 +236,6 @@ class TestBasicPathLike(unittest.TestCase):
         assert dtrain.num_row() == 6513
         assert dtrain.num_col() == 127
 
-
     def test_DMatrix_save_to_path(self):
         """Saving to a binary file using pathlib from a DMatrix."""
         data = np.random.randn(100, 2)
diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py
index b12354556..114f1203c 100644
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -34,10 +34,27 @@ class TestCallbacks(unittest.TestCase):
                   num_boost_round=rounds,
                   evals_result=evals_result,
                   verbose_eval=True)
-        print('evals_result:', evals_result)
         assert len(evals_result['Train']['error']) == rounds
         assert len(evals_result['Valid']['error']) == rounds
 
+        with tm.captured_output() as (out, err):
+            xgb.train({'objective': 'binary:logistic',
+                       'eval_metric': 'error'}, D_train,
+                      evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+                      num_boost_round=rounds,
+                      evals_result=evals_result,
+                      verbose_eval=2)
+            output: str = out.getvalue().strip()
+
+        pos = 0
+        msg = 'Train-error'
+        for i in range(rounds // 2):
+            pos = output.find('Train-error', pos)
+            assert pos != -1
+            pos += len(msg)
+
+        assert output.find('Train-error', pos) == -1
+
     def test_early_stopping(self):
         D_train = xgb.DMatrix(self.X_train, self.y_train)
         D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 7ffcb67b8..1514517f8 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -2,7 +2,6 @@ import collections
 import importlib.util
 import numpy as np
 import xgboost as xgb
-from xgboost.sklearn import XGBoostLabelEncoder
 import testing as tm
 import tempfile
 import os
@@ -11,8 +10,6 @@ import pytest
 import unittest
 import json
 
-from test_basic import captured_output
-
 rng = np.random.RandomState(1994)
 
 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@@ -872,7 +869,7 @@ def test_parameter_validation():
     reg = xgb.XGBRegressor(foo='bar', verbosity=1)
     X = np.random.randn(10, 10)
     y = np.random.randn(10)
-    with captured_output() as (out, err):
+    with tm.captured_output() as (out, err):
         reg.fit(X, y)
         output = out.getvalue().strip()
 
@@ -882,7 +879,7 @@ def test_parameter_validation():
                            importance_type='gain', verbosity=1)
     X = np.random.randn(10, 10)
     y = np.random.randn(10)
-    with captured_output() as (out, err):
+    with tm.captured_output() as (out, err):
         reg.fit(X, y)
         output = out.getvalue().strip()
 
diff --git a/tests/python/testing.py b/tests/python/testing.py
index f4b8654e3..feb5f4a07 100644
--- a/tests/python/testing.py
+++ b/tests/python/testing.py
@@ -1,6 +1,8 @@
 # coding: utf-8
 import os
-import platform
+import sys
+from contextlib import contextmanager
+from io import StringIO
 from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
 from xgboost.compat import DASK_INSTALLED
 import pytest
@@ -281,6 +283,24 @@ class DirectoryExcursion:
                 os.remove(f)
 
 
+@contextmanager
+def captured_output():
+    """Reassign stdout temporarily in order to test printed statements
+    Taken from:
+    https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
+
+    Also works for pytest.
+
+    """
+    new_out, new_err = StringIO(), StringIO()
+    old_out, old_err = sys.stdout, sys.stderr
+    try:
+        sys.stdout, sys.stderr = new_out, new_err
+        yield sys.stdout, sys.stderr
+    finally:
+        sys.stdout, sys.stderr = old_out, old_err
+
+
 CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
 PROJECT_ROOT = os.path.normpath(
     os.path.join(CURDIR, os.path.pardir, os.path.pardir))