Made eval_results for sklearn output the same structure as in the new training.py

Changed the name of eval_results to evals_result, so that the naming is the same in training.py and sklearn.py

Made the structure of evals_result the same as in training.py, the names of the keys are different:

In sklearn.py you cannot name your evals_result, but they are automatically called 'validation_0', 'validation_1' etc.
The dict evals_result will output something like: {'validation_0': {'logloss': ['0.674800', '0.657121']}, 'validation_1': {'logloss': ['0.63776', '0.58372']}}

In training.py you can name your multiple evals_result with a watchlist like: watchlist  = [(dtest,'eval'), (dtrain,'train')]
The dict evals_result will output something like: {'train': {'logloss': ['0.68495', '0.67691']}, 'eval': {'logloss': ['0.684877', '0.676767']}}

You can access the evals_result using the evals_result() function.
This commit is contained in:
Johan Manders 2015-10-14 12:51:46 +02:00
parent e339cdec52
commit e960a09ff4

View File

@ -165,7 +165,7 @@ class XGBModel(XGBModelBase):
""" """
trainDmatrix = DMatrix(X, label=y, missing=self.missing) trainDmatrix = DMatrix(X, label=y, missing=self.missing)
eval_results = {} evals_result = {}
if eval_set is not None: if eval_set is not None:
evals = list(DMatrix(x[0], label=x[1]) for x in eval_set) evals = list(DMatrix(x[0], label=x[1]) for x in eval_set)
evals = list(zip(evals, ["validation_{}".format(i) for i in evals = list(zip(evals, ["validation_{}".format(i) for i in
@ -185,13 +185,14 @@ class XGBModel(XGBModelBase):
self._Booster = train(params, trainDmatrix, self._Booster = train(params, trainDmatrix,
self.n_estimators, evals=evals, self.n_estimators, evals=evals,
early_stopping_rounds=early_stopping_rounds, early_stopping_rounds=early_stopping_rounds,
evals_result=eval_results, feval=feval, evals_result=evals_result, feval=feval,
verbose_eval=verbose) verbose_eval=verbose)
if eval_results: if evals_result:
for val in eval_results.items(): for val in evals_result.items():
eval_results[val[0]] = [np.array(v[1], dtype=float) for v in val[1].items()] evals_result_key = val[1].keys()[0]
self.eval_results = eval_results evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
self.evals_result_ = evals_result
if early_stopping_rounds is not None: if early_stopping_rounds is not None:
self.best_score = self._Booster.best_score self.best_score = self._Booster.best_score
@ -203,6 +204,41 @@ class XGBModel(XGBModelBase):
test_dmatrix = DMatrix(data, missing=self.missing) test_dmatrix = DMatrix(data, missing=self.missing)
return self.booster().predict(test_dmatrix) return self.booster().predict(test_dmatrix)
def evals_result(self):
"""Return the evaluation results.
If eval_set is passed to the `fit` function, you can call evals_result() to
get evaluation results for all passed eval_sets. When eval_metric is also
passed to the `fit` function, the evals_result will contain the eval_metrics
passed to the `fit` function
Returns
-------
evals_result : dictionary
Example
-------
param_dist = {'objective':'binary:logistic', 'n_estimators':2}
clf = xgb.XGBModel(**param_dist)
clf.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
eval_metric='logloss',
verbose=True)
evals_result = clf.evals_result()
The variable evals_result will contain:
{'validation_0': {'logloss': ['0.604835', '0.531479']}, 'validation_1': {'logloss': ['0.41965', '0.17686']}}
"""
if self.evals_result_:
evals_result = self.evals_result_
else:
raise Error('No results.')
return evals_result
class XGBClassifier(XGBModel, XGBClassifierBase): class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,too-many-arguments,invalid-name # pylint: disable=missing-docstring,too-many-arguments,invalid-name
@ -259,7 +295,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
If `verbose` and an evaluation set is used, writes the evaluation If `verbose` and an evaluation set is used, writes the evaluation
metric measured on the validation set to stderr. metric measured on the validation set to stderr.
""" """
eval_results = {} evals_result = {}
self.classes_ = list(np.unique(y)) self.classes_ = list(np.unique(y))
self.n_classes_ = len(self.classes_) self.n_classes_ = len(self.classes_)
if self.n_classes_ > 2: if self.n_classes_ > 2:
@ -299,13 +335,14 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
self._Booster = train(xgb_options, train_dmatrix, self.n_estimators, self._Booster = train(xgb_options, train_dmatrix, self.n_estimators,
evals=evals, evals=evals,
early_stopping_rounds=early_stopping_rounds, early_stopping_rounds=early_stopping_rounds,
evals_result=eval_results, feval=feval, evals_result=evals_result, feval=feval,
verbose_eval=verbose) verbose_eval=verbose)
if eval_results: if evals_result:
for val in eval_results.items(): for val in evals_result.items():
eval_results[val[0]] = [np.array(v[1], dtype=float) for v in val[1].items()] evals_result_key = val[1].keys()[0]
self.eval_results = eval_results evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
self.evals_result_ = evals_result
if early_stopping_rounds is not None: if early_stopping_rounds is not None:
self.best_score = self._Booster.best_score self.best_score = self._Booster.best_score
@ -333,6 +370,41 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
classzero_probs = 1.0 - classone_probs classzero_probs = 1.0 - classone_probs
return np.vstack((classzero_probs, classone_probs)).transpose() return np.vstack((classzero_probs, classone_probs)).transpose()
def evals_result(self):
"""Return the evaluation results.
If eval_set is passed to the `fit` function, you can call evals_result() to
get evaluation results for all passed eval_sets. When eval_metric is also
passed to the `fit` function, the evals_result will contain the eval_metrics
passed to the `fit` function
Returns
-------
evals_result : dictionary
Example
-------
param_dist = {'objective':'binary:logistic', 'n_estimators':2}
clf = xgb.XGBClassifier(**param_dist)
clf.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
eval_metric='logloss',
verbose=True)
evals_result = clf.evals_result()
The variable evals_result will contain:
{'validation_0': {'logloss': ['0.604835', '0.531479']}, 'validation_1': {'logloss': ['0.41965', '0.17686']}}
"""
if self.evals_result_:
evals_result = self.evals_result_
else:
raise Error('No results.')
return evals_result
class XGBRegressor(XGBModel, XGBRegressorBase): class XGBRegressor(XGBModel, XGBRegressorBase):
# pylint: disable=missing-docstring # pylint: disable=missing-docstring
__doc__ = """Implementation of the scikit-learn API for XGBoost regression. __doc__ = """Implementation of the scikit-learn API for XGBoost regression.