Merge pull request #690 from rcarneva/master

modifying cv show_progress to allow print-every-n behavior
This commit is contained in:
Yuan (Terry) Tang 2015-12-16 17:29:21 -06:00
commit 4a15939c13

View File

@ -283,11 +283,14 @@ def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None):
ret.append(CVPack(dtrain, dtest, plst)) ret.append(CVPack(dtrain, dtest, plst))
return ret return ret
def aggcv(rlist, show_stdv=True, show_progress=None, as_pandas=True, trial=0):
def aggcv(rlist, show_stdv=True, show_progress=None, as_pandas=True):
# pylint: disable=invalid-name # pylint: disable=invalid-name
""" """
Aggregate cross-validation results. Aggregate cross-validation results.
If show_progress is true, progress is displayed in every call. If
show_progress is an integer, progress will only be displayed every
`show_progress` trees, tracked via trial.
""" """
cvmap = {} cvmap = {}
idx = rlist[0].split()[0] idx = rlist[0].split()[0]
@ -321,8 +324,6 @@ def aggcv(rlist, show_stdv=True, show_progress=None, as_pandas=True):
index.extend([k + '-mean', k + '-std']) index.extend([k + '-mean', k + '-std'])
results.extend([mean, std]) results.extend([mean, std])
if as_pandas: if as_pandas:
try: try:
import pandas as pd import pandas as pd
@ -336,8 +337,9 @@ def aggcv(rlist, show_stdv=True, show_progress=None, as_pandas=True):
if show_progress is None: if show_progress is None:
show_progress = True show_progress = True
if show_progress: if (isinstance(show_progress, int) and trial % show_progress == 0) or (isinstance(show_progress, bool) and show_progress):
sys.stderr.write(msg + '\n') sys.stderr.write(msg + '\n')
sys.stderr.flush()
return results return results
@ -376,9 +378,11 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
as_pandas : bool, default True as_pandas : bool, default True
Return pd.DataFrame when pandas is installed. Return pd.DataFrame when pandas is installed.
If False or pandas is not installed, return np.ndarray If False or pandas is not installed, return np.ndarray
show_progress : bool or None, default None show_progress : bool, int, or None, default None
Whether to display the progress. If None, progress will be displayed Whether to display the progress. If None, progress will be displayed
when np.ndarray is returned. when np.ndarray is returned. If True, progress will be displayed at
boosting stage. If an integer is given, progress will be displayed
at every given `show_progress` boosting stage.
show_stdv : bool, default True show_stdv : bool, default True
Whether to display the standard deviation in progress. Whether to display the standard deviation in progress.
Results are not affected, and always contains std. Results are not affected, and always contains std.
@ -418,7 +422,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
fold.update(i, obj) fold.update(i, obj)
res = aggcv([f.eval(i, feval) for f in cvfolds], res = aggcv([f.eval(i, feval) for f in cvfolds],
show_stdv=show_stdv, show_progress=show_progress, show_stdv=show_stdv, show_progress=show_progress,
as_pandas=as_pandas) as_pandas=as_pandas, trial=i)
results.append(res) results.append(res)
if early_stopping_rounds is not None: if early_stopping_rounds is not None: