python: multiple eval_metrics changes

- allows feval to return a list of tuples (name, error/score value)
- changed behavior for multiple eval_metrics in conjunction with
early_stopping: Instead of raising an error, the last passed evel_metric
(or last entry in return value of feval) is used for early stopping
- allows list of eval_metrics in dict-typed params
- unittest for new features / behavior

documentation updated

- example for assigning a list to 'eval_metric'
- note about early stopping on last passed eval metric

- info msg for used eval metric added
This commit is contained in:
FrozenFingerz
2015-11-03 11:22:00 +01:00
parent 190e58a8c6
commit b59018aa05
4 changed files with 129 additions and 10 deletions

View File

@@ -61,6 +61,17 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
booster : a trained booster model
"""
evals = list(evals)
if isinstance(params, dict) \
and 'eval_metric' in params \
and isinstance(params['eval_metric'], list):
params = dict((k, v) for k, v in params.items())
eval_metrics = params['eval_metric']
params.pop("eval_metric", None)
params = list(params.items())
for eval_metric in eval_metrics:
params += [('eval_metric', eval_metric)]
bst = Booster(params, [dtrain] + [d[0] for d in evals])
ntrees = 0
if xgb_model is not None:
if not isinstance(xgb_model, STRING_TYPES):
@@ -70,7 +81,6 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
else:
bst = Booster(params, [dtrain] + [d[0] for d in evals])
if evals_result is not None:
if not isinstance(evals_result, dict):
raise TypeError('evals_result has to be a dictionary')
@@ -120,9 +130,11 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
# is params a list of tuples? are we using multiple eval metrics?
if isinstance(params, list):
if len(params) != len(dict(params).items()):
raise ValueError('Check your params.'\
'Early stopping works with single eval metric only.')
params = dict(params)
params = dict(params)
sys.stderr.write("Multiple eval metrics has been passed: " \
"'{0}' will be used for early stopping.\n\n".format(params['eval_metric']))
else:
params = dict(params)
# either minimize loss or maximize AUC/MAP/NDCG
maximize_score = False