From 1ca737ed55b8d634ada91077c73cc5a348097796 Mon Sep 17 00:00:00 2001 From: quansie Date: Sun, 11 Oct 2015 01:09:05 +0200 Subject: [PATCH] Update training.py Made changes to training.py to make sure all eval_metric information get passed to evals_result. Previous version lost and mislabeled data in evals_result when using more than one eval_metric. Structure of eval_metric is now: eval_metric[evals][eval_metric] = list of metrics Example: >>> dtrain = xgb.DMatrix('agaricus.txt.train', silent=True) >>> dtest = xgb.DMatrix('agaricus.txt.test', silent=True) >>> param = [('max_depth', 2), ('objective', 'binary:logistic'), ('bst:eta', 0.01), ('eval_metric', 'logloss'), ('eval_metric', 'error')] >>> watchlist = [(dtest,'eval'), (dtrain,'train')] >>> num_round = 3 >>> evals_result = {} >>> bst = xgb.train(param, dtrain, num_round, watchlist, evals_result=evals_result) >>> print(evals_result['eval']['logloss']) >>> print(evals_result) Prints: ['0.684877', '0.676767', '0.668817'] {'train': {'logloss': ['0.684954', '0.676917', '0.669036'], 'error': ['0.04652', '0.04652', '0.04652']}, 'eval': {'logloss': ['0.684877', '0.676767', '0.668817'], 'error': ['0.042831', '0.042831', '0.042831']}} --- python-package/xgboost/training.py | 32 +++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index a6a7c203b..8ad439678 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -56,7 +56,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, else: evals_name = [d[1] for d in evals] evals_result.clear() - evals_result.update({key: [] for key in evals_name}) + evals_result.update({key: {} for key in evals_name}) if not early_stopping_rounds: for i in range(num_boost_round): @@ -71,9 +71,18 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, if verbose_eval: sys.stderr.write(msg + '\n') if evals_result is not None: - res = re.findall(":-?([0-9.]+).", msg) - for key, val in zip(evals_name, res): - evals_result[key].append(val) + res = re.findall("([0-9a-zA-Z@]+[-]*):-?([0-9.]+).", msg) + for key in evals_name: + evals_idx = evals_name.index(key) + res_per_eval = len(res) / len(evals_name) + for r in range(res_per_eval): + res_item = res[(evals_idx*res_per_eval) + r] + res_key = res_item[0] + res_val = res_item[1] + if res_key in evals_result[key]: + evals_result[key][res_key].append(res_val) + else: + evals_result[key][res_key] = [res_val] return bst else: @@ -119,9 +128,18 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, sys.stderr.write(msg + '\n') if evals_result is not None: - res = re.findall(":-?([0-9.]+).", msg) - for key, val in zip(evals_name, res): - evals_result[key].append(val) + res = re.findall("([0-9a-zA-Z@]+[-]*):-?([0-9.]+).", msg) + for key in evals_name: + evals_idx = evals_name.index(key) + res_per_eval = len(res) / len(evals_name) + for r in range(res_per_eval): + res_item = res[(evals_idx*res_per_eval) + r] + res_key = res_item[0] + res_val = res_item[1] + if res_key in evals_result[key]: + evals_result[key][res_key].append(res_val) + else: + evals_result[key][res_key] = [res_val] score = float(msg.rsplit(':', 1)[1]) if (maximize_score and score > best_score) or \