From 40566cdbbafd5fa09e0945a5a155bc681988ed1b Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Mon, 12 Oct 2015 16:31:23 +0200 Subject: [PATCH 01/12] update sklearn.py because evals_result in training.py changed Because I changed the training.py, the sklearn.py had to be changed also to be able to read all the data form evals_result. --- python-package/xgboost/sklearn.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index a2761c5ab..b3d973928 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -187,10 +187,11 @@ class XGBModel(XGBModelBase): early_stopping_rounds=early_stopping_rounds, evals_result=eval_results, feval=feval, verbose_eval=verbose) + if eval_results: - eval_results = {k: np.array(v, dtype=float) - for k, v in eval_results.items()} - eval_results = {k: np.array(v) for k, v in eval_results.items()} + for val in eval_results.items(): + for k, v in val[1].items(): + eval_results[val[0]] = np.array(v, dtype=float) self.eval_results = eval_results if early_stopping_rounds is not None: @@ -303,8 +304,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase): verbose_eval=verbose) if eval_results: - eval_results = {k: np.array(v, dtype=float) - for k, v in eval_results.items()} + for val in eval_results.items(): + for k, v in val[1].items(): + eval_results[val[0]] = np.array(v, dtype=float) self.eval_results = eval_results if early_stopping_rounds is not None: From e339cdec5222fa858a090a905a35ecf22b47e997 Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Mon, 12 Oct 2015 16:47:24 +0200 Subject: [PATCH 02/12] Too many branches and unused key --- python-package/xgboost/sklearn.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index b3d973928..3c279bdb6 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -190,8 +190,7 @@ class XGBModel(XGBModelBase): if eval_results: for val in eval_results.items(): - for k, v in val[1].items(): - eval_results[val[0]] = np.array(v, dtype=float) + eval_results[val[0]] = [np.array(v[1], dtype=float) for v in val[1].items()] self.eval_results = eval_results if early_stopping_rounds is not None: @@ -305,8 +304,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): if eval_results: for val in eval_results.items(): - for k, v in val[1].items(): - eval_results[val[0]] = np.array(v, dtype=float) + eval_results[val[0]] = [np.array(v[1], dtype=float) for v in val[1].items()] self.eval_results = eval_results if early_stopping_rounds is not None: From e960a09ff4d6bfbd9dba8cbb0da22631938062b0 Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 12:51:46 +0200 Subject: [PATCH 03/12] Made eval_results for sklearn output the same structure as in the new training.py Changed the name of eval_results to evals_result, so that the naming is the same in training.py and sklearn.py Made the structure of evals_result the same as in training.py, the names of the keys are different: In sklearn.py you cannot name your evals_result, but they are automatically called 'validation_0', 'validation_1' etc. The dict evals_result will output something like: {'validation_0': {'logloss': ['0.674800', '0.657121']}, 'validation_1': {'logloss': ['0.63776', '0.58372']}} In training.py you can name your multiple evals_result with a watchlist like: watchlist = [(dtest,'eval'), (dtrain,'train')] The dict evals_result will output something like: {'train': {'logloss': ['0.68495', '0.67691']}, 'eval': {'logloss': ['0.684877', '0.676767']}} You can access the evals_result using the evals_result() function. --- python-package/xgboost/sklearn.py | 96 +++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 12 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 3c279bdb6..958866b49 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -165,7 +165,7 @@ class XGBModel(XGBModelBase): """ trainDmatrix = DMatrix(X, label=y, missing=self.missing) - eval_results = {} + evals_result = {} if eval_set is not None: evals = list(DMatrix(x[0], label=x[1]) for x in eval_set) evals = list(zip(evals, ["validation_{}".format(i) for i in @@ -185,13 +185,14 @@ class XGBModel(XGBModelBase): self._Booster = train(params, trainDmatrix, self.n_estimators, evals=evals, early_stopping_rounds=early_stopping_rounds, - evals_result=eval_results, feval=feval, + evals_result=evals_result, feval=feval, verbose_eval=verbose) - if eval_results: - for val in eval_results.items(): - eval_results[val[0]] = [np.array(v[1], dtype=float) for v in val[1].items()] - self.eval_results = eval_results + if evals_result: + for val in evals_result.items(): + evals_result_key = val[1].keys()[0] + evals_result[val[0]][evals_result_key] = val[1][evals_result_key] + self.evals_result_ = evals_result if early_stopping_rounds is not None: self.best_score = self._Booster.best_score @@ -202,6 +203,41 @@ class XGBModel(XGBModelBase): # pylint: disable=missing-docstring,invalid-name test_dmatrix = DMatrix(data, missing=self.missing) return self.booster().predict(test_dmatrix) + + def evals_result(self): + """Return the evaluation results. + + If eval_set is passed to the `fit` function, you can call evals_result() to + get evaluation results for all passed eval_sets. When eval_metric is also + passed to the `fit` function, the evals_result will contain the eval_metrics + passed to the `fit` function + + Returns + ------- + evals_result : dictionary + + Example + ------- + param_dist = {'objective':'binary:logistic', 'n_estimators':2} + + clf = xgb.XGBModel(**param_dist) + + clf.fit(X_train, y_train, + eval_set=[(X_train, y_train), (X_test, y_test)], + eval_metric='logloss', + verbose=True) + + evals_result = clf.evals_result() + + The variable evals_result will contain: + {'validation_0': {'logloss': ['0.604835', '0.531479']}, 'validation_1': {'logloss': ['0.41965', '0.17686']}} + """ + if self.evals_result_: + evals_result = self.evals_result_ + else: + raise Error('No results.') + + return evals_result class XGBClassifier(XGBModel, XGBClassifierBase): @@ -259,7 +295,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): If `verbose` and an evaluation set is used, writes the evaluation metric measured on the validation set to stderr. """ - eval_results = {} + evals_result = {} self.classes_ = list(np.unique(y)) self.n_classes_ = len(self.classes_) if self.n_classes_ > 2: @@ -299,13 +335,14 @@ class XGBClassifier(XGBModel, XGBClassifierBase): self._Booster = train(xgb_options, train_dmatrix, self.n_estimators, evals=evals, early_stopping_rounds=early_stopping_rounds, - evals_result=eval_results, feval=feval, + evals_result=evals_result, feval=feval, verbose_eval=verbose) - if eval_results: - for val in eval_results.items(): - eval_results[val[0]] = [np.array(v[1], dtype=float) for v in val[1].items()] - self.eval_results = eval_results + if evals_result: + for val in evals_result.items(): + evals_result_key = val[1].keys()[0] + evals_result[val[0]][evals_result_key] = val[1][evals_result_key] + self.evals_result_ = evals_result if early_stopping_rounds is not None: self.best_score = self._Booster.best_score @@ -332,6 +369,41 @@ class XGBClassifier(XGBModel, XGBClassifierBase): classone_probs = class_probs classzero_probs = 1.0 - classone_probs return np.vstack((classzero_probs, classone_probs)).transpose() + + def evals_result(self): + """Return the evaluation results. + + If eval_set is passed to the `fit` function, you can call evals_result() to + get evaluation results for all passed eval_sets. When eval_metric is also + passed to the `fit` function, the evals_result will contain the eval_metrics + passed to the `fit` function + + Returns + ------- + evals_result : dictionary + + Example + ------- + param_dist = {'objective':'binary:logistic', 'n_estimators':2} + + clf = xgb.XGBClassifier(**param_dist) + + clf.fit(X_train, y_train, + eval_set=[(X_train, y_train), (X_test, y_test)], + eval_metric='logloss', + verbose=True) + + evals_result = clf.evals_result() + + The variable evals_result will contain: + {'validation_0': {'logloss': ['0.604835', '0.531479']}, 'validation_1': {'logloss': ['0.41965', '0.17686']}} + """ + if self.evals_result_: + evals_result = self.evals_result_ + else: + raise Error('No results.') + + return evals_result class XGBRegressor(XGBModel, XGBRegressorBase): # pylint: disable=missing-docstring From 9c8420a4dceb4cc7468b8ec4b67f48e61c5cdfe5 Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 12:53:42 +0200 Subject: [PATCH 04/12] Updated the documentation a bit Will upload some demos for guide-python later. --- python-package/xgboost/training.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index 50d359b15..4841803b4 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -38,7 +38,10 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, If early stopping occurs, the model will have two additional fields: bst.best_score and bst.best_iteration. evals_result: dict - This dictionary stores the evaluation results of all the items in watchlist + This dictionary stores the evaluation results of all the items in watchlist. + Example: with a watchlist containing [(dtest,'eval'), (dtrain,'train')] and + and a paramater containing ('eval_metric', 'logloss') + Returns: {'train': {'logloss': ['0.48253', '0.35953']}, 'eval': {'logloss': ['0.480385', '0.357756']}} verbose_eval : bool If `verbose_eval` then the evaluation metric on the validation set, if given, is printed at each boosting stage. From 67f3c687b8199f43ed6a89413c9489514f889b00 Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 13:06:14 +0200 Subject: [PATCH 05/12] Added Johan Manders to the list, asked by Tianqi Chen --- CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 32a6745f0..6233f7ce0 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -50,3 +50,4 @@ List of Contributors * [Hongliang Liu](https://github.com/phunterlau) - Hongliang is the maintainer of xgboost python PyPI package for pip installation. * [Huayi Zhang](https://github.com/irachex) +* [Johan Manders](https://github.com/johanmanders) From 6e2bdcbbbc55d8f467e1014cbfc5c31faa501221 Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 13:22:39 +0200 Subject: [PATCH 06/12] Demo for accessing eval metrics in xgboost --- demo/guide-python/evals_result.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 demo/guide-python/evals_result.py diff --git a/demo/guide-python/evals_result.py b/demo/guide-python/evals_result.py new file mode 100644 index 000000000..e07ba8572 --- /dev/null +++ b/demo/guide-python/evals_result.py @@ -0,0 +1,29 @@ +import xgboost as xgb +## +# This script demonstrate how to access the eval metrics in xgboost +## +dtrain = xgb.DMatrix('../data/agaricus.txt.train', silent=True) +dtest = xgb.DMatrix('../data/agaricus.txt.test', silent=True) + +param = [('max_depth', 2), ('objective', 'binary:logistic'), ('eval_metric', 'logloss'), ('eval_metric', 'error')] + +num_round = 2 +watchlist = [(dtest,'eval'), (dtrain,'train')] + +evals_result = {} +bst = xgb.train(param, dtrain, num_round, watchlist, evals_result=evals_result) + +print('Access logloss metric directly from evals_result:') +print(evals_result['eval']['logloss']) + +print('') +print('Access metrics through a loop:') +for e_name, e_mtrs in evals_result.items(): + print('- {}'.format(e_name)) + for e_mtr_name, e_mtr_vals in e_mtrs.items(): + print(' - {}'.format(e_mtr_name)) + print(' - {}'.format(e_mtr_vals)) + +print('') +print('Access complete dictionary:') +print(evals_result) From 122ec48a8948a1bf87c2aff45d805c82b56e9794 Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 13:40:20 +0200 Subject: [PATCH 07/12] Update evals_result.py --- demo/guide-python/evals_result.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/demo/guide-python/evals_result.py b/demo/guide-python/evals_result.py index e07ba8572..8449b9307 100644 --- a/demo/guide-python/evals_result.py +++ b/demo/guide-python/evals_result.py @@ -1,7 +1,8 @@ -import xgboost as xgb ## # This script demonstrate how to access the eval metrics in xgboost ## + +import xgboost as xgb dtrain = xgb.DMatrix('../data/agaricus.txt.train', silent=True) dtest = xgb.DMatrix('../data/agaricus.txt.test', silent=True) From f1e1cc28ff00ed3b54d6eb1f4a77290c8ee7f7b2 Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 13:43:14 +0200 Subject: [PATCH 08/12] Access xgboost eval metrics by using sklearn --- demo/guide-python/sklearn_evals_result.py | 43 +++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 demo/guide-python/sklearn_evals_result.py diff --git a/demo/guide-python/sklearn_evals_result.py b/demo/guide-python/sklearn_evals_result.py new file mode 100644 index 000000000..a72cdfc52 --- /dev/null +++ b/demo/guide-python/sklearn_evals_result.py @@ -0,0 +1,43 @@ +## +# This script demonstrate how to access the xgboost eval metrics by using sklearn +## + +import xgboost as xgb +import numpy as np +from sklearn.datasets import make_hastie_10_2 + +X, y = make_hastie_10_2(n_samples=2000, random_state=42) + +# Map labels from {-1, 1} to {0, 1} +labels, y = np.unique(y, return_inverse=True) + +X_train, X_test = X[:1600], X[1600:] +y_train, y_test = y[:1600], y[1600:] + +param_dist = {'objective':'binary:logistic', 'n_estimators':2} + +clf = xgb.XGBModel(**param_dist) +# Or you can use: clf = xgb.XGBClassifier(**param_dist) + +clf.fit(X_train, y_train, + eval_set=[(X_train, y_train), (X_test, y_test)], + eval_metric='logloss', + verbose=True) + +# Load evals result by calling the evals_result() function +evals_result = clf.evals_result() + +print('Access logloss metric directly from validation_0:') +print(evals_result['validation_0']['logloss']) + +print('') +print('Access metrics through a loop:') +for e_name, e_mtrs in evals_result.items(): + print('- {}'.format(e_name)) + for e_mtr_name, e_mtr_vals in e_mtrs.items(): + print(' - {}'.format(e_mtr_name)) + print(' - {}'.format(e_mtr_vals)) + +print('') +print('Access complete dict:') +print(evals_result) From edf4595bc1046c8b62f9fbf5b7a89dc6a6dc17e7 Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 13:45:59 +0200 Subject: [PATCH 09/12] Added evals result demos --- demo/guide-python/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/demo/guide-python/README.md b/demo/guide-python/README.md index d26b8fcf2..d84095f2b 100644 --- a/demo/guide-python/README.md +++ b/demo/guide-python/README.md @@ -9,4 +9,6 @@ XGBoost Python Feature Walkthrough * [Predicting leaf indices](predict_leaf_indices.py) * [Sklearn Wrapper](sklearn_examples.py) * [Sklearn Parallel](sklearn_parallel.py) +* [Sklearn access evals result](sklearn_evals_result.py) +* [Access evals result](evals_result.py) * [External Memory](external_memory.py) From 82c2ba4c44feef7b8cf7b4ce4a6509f43ed21bfa Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 14:17:57 +0200 Subject: [PATCH 10/12] Removed trailing whitespaces and Change Error to XGBoostError --- python-package/xgboost/sklearn.py | 40 ++++++++++++++++--------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 958866b49..bc4539745 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -203,11 +203,11 @@ class XGBModel(XGBModelBase): # pylint: disable=missing-docstring,invalid-name test_dmatrix = DMatrix(data, missing=self.missing) return self.booster().predict(test_dmatrix) - + def evals_result(self): """Return the evaluation results. - If eval_set is passed to the `fit` function, you can call evals_result() to + If eval_set is passed to the `fit` function, you can call evals_result() to get evaluation results for all passed eval_sets. When eval_metric is also passed to the `fit` function, the evals_result will contain the eval_metrics passed to the `fit` function @@ -215,27 +215,28 @@ class XGBModel(XGBModelBase): Returns ------- evals_result : dictionary - + Example ------- param_dist = {'objective':'binary:logistic', 'n_estimators':2} - + clf = xgb.XGBModel(**param_dist) clf.fit(X_train, y_train, - eval_set=[(X_train, y_train), (X_test, y_test)], + eval_set=[(X_train, y_train), (X_test, y_test)], eval_metric='logloss', verbose=True) - + evals_result = clf.evals_result() - - The variable evals_result will contain: - {'validation_0': {'logloss': ['0.604835', '0.531479']}, 'validation_1': {'logloss': ['0.41965', '0.17686']}} + + The variable evals_result will contain: + {'validation_0': {'logloss': ['0.604835', '0.531479']}, + 'validation_1': {'logloss': ['0.41965', '0.17686']}} """ if self.evals_result_: evals_result = self.evals_result_ else: - raise Error('No results.') + raise XGBoostError('No results.') return evals_result @@ -373,7 +374,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): def evals_result(self): """Return the evaluation results. - If eval_set is passed to the `fit` function, you can call evals_result() to + If eval_set is passed to the `fit` function, you can call evals_result() to get evaluation results for all passed eval_sets. When eval_metric is also passed to the `fit` function, the evals_result will contain the eval_metrics passed to the `fit` function @@ -381,27 +382,28 @@ class XGBClassifier(XGBModel, XGBClassifierBase): Returns ------- evals_result : dictionary - + Example ------- param_dist = {'objective':'binary:logistic', 'n_estimators':2} - + clf = xgb.XGBClassifier(**param_dist) clf.fit(X_train, y_train, - eval_set=[(X_train, y_train), (X_test, y_test)], + eval_set=[(X_train, y_train), (X_test, y_test)], eval_metric='logloss', verbose=True) - + evals_result = clf.evals_result() - - The variable evals_result will contain: - {'validation_0': {'logloss': ['0.604835', '0.531479']}, 'validation_1': {'logloss': ['0.41965', '0.17686']}} + + The variable evals_result will contain: + {'validation_0': {'logloss': ['0.604835', '0.531479']}, + 'validation_1': {'logloss': ['0.41965', '0.17686']}} """ if self.evals_result_: evals_result = self.evals_result_ else: - raise Error('No results.') + raise XGBoostError('No results.') return evals_result From 0f8f8e05b2330281a5c2cb9b0d27e4f719a492d2 Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 14:18:31 +0200 Subject: [PATCH 11/12] One line was too long --- python-package/xgboost/training.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index 4841803b4..1e7294d7b 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -41,7 +41,8 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, This dictionary stores the evaluation results of all the items in watchlist. Example: with a watchlist containing [(dtest,'eval'), (dtrain,'train')] and and a paramater containing ('eval_metric', 'logloss') - Returns: {'train': {'logloss': ['0.48253', '0.35953']}, 'eval': {'logloss': ['0.480385', '0.357756']}} + Returns: {'train': {'logloss': ['0.48253', '0.35953']}, + 'eval': {'logloss': ['0.480385', '0.357756']}} verbose_eval : bool If `verbose_eval` then the evaluation metric on the validation set, if given, is printed at each boosting stage. @@ -320,4 +321,3 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(), results = np.array(results) return results - From 00387cb6459491e442a6c809fabe934e2645699f Mon Sep 17 00:00:00 2001 From: Johan Manders Date: Wed, 14 Oct 2015 14:26:18 +0200 Subject: [PATCH 12/12] Removed th last few trailing whitespaces --- python-package/xgboost/sklearn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index bc4539745..3bf747b58 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -237,7 +237,7 @@ class XGBModel(XGBModelBase): evals_result = self.evals_result_ else: raise XGBoostError('No results.') - + return evals_result @@ -370,7 +370,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): classone_probs = class_probs classzero_probs = 1.0 - classone_probs return np.vstack((classzero_probs, classone_probs)).transpose() - + def evals_result(self): """Return the evaluation results. @@ -404,7 +404,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): evals_result = self.evals_result_ else: raise XGBoostError('No results.') - + return evals_result class XGBRegressor(XGBModel, XGBRegressorBase):