Merge pull request #690 from rcarneva/master

modifying cv show_progress to allow print-every-n behavior
This commit is contained in:
Yuan (Terry) Tang 2015-12-16 17:29:21 -06:00
commit 4a15939c13

View File

@ -283,11 +283,14 @@ def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None):
ret.append(CVPack(dtrain, dtest, plst))
return ret
def aggcv(rlist, show_stdv=True, show_progress=None, as_pandas=True):
def aggcv(rlist, show_stdv=True, show_progress=None, as_pandas=True, trial=0):
# pylint: disable=invalid-name
"""
Aggregate cross-validation results.
If show_progress is true, progress is displayed in every call. If
show_progress is an integer, progress will only be displayed every
`show_progress` trees, tracked via trial.
"""
cvmap = {}
idx = rlist[0].split()[0]
@ -321,8 +324,6 @@ def aggcv(rlist, show_stdv=True, show_progress=None, as_pandas=True):
index.extend([k + '-mean', k + '-std'])
results.extend([mean, std])
if as_pandas:
try:
import pandas as pd
@ -336,8 +337,9 @@ def aggcv(rlist, show_stdv=True, show_progress=None, as_pandas=True):
if show_progress is None:
show_progress = True
if show_progress:
if (isinstance(show_progress, int) and trial % show_progress == 0) or (isinstance(show_progress, bool) and show_progress):
sys.stderr.write(msg + '\n')
sys.stderr.flush()
return results
@ -376,9 +378,11 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
as_pandas : bool, default True
Return pd.DataFrame when pandas is installed.
If False or pandas is not installed, return np.ndarray
show_progress : bool or None, default None
show_progress : bool, int, or None, default None
Whether to display the progress. If None, progress will be displayed
when np.ndarray is returned.
when np.ndarray is returned. If True, progress will be displayed at
boosting stage. If an integer is given, progress will be displayed
at every given `show_progress` boosting stage.
show_stdv : bool, default True
Whether to display the standard deviation in progress.
Results are not affected, and always contains std.
@ -418,7 +422,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
fold.update(i, obj)
res = aggcv([f.eval(i, feval) for f in cvfolds],
show_stdv=show_stdv, show_progress=show_progress,
as_pandas=as_pandas)
as_pandas=as_pandas, trial=i)
results.append(res)
if early_stopping_rounds is not None: