* Fix #3663: Allow sklearn API to use callbacks * Fix lint * Add Callback API to Python API doc
This commit is contained in:
parent
5a8bbb39a1
commit
4b43810f51
@ -53,3 +53,15 @@ Plotting API
|
||||
.. autofunction:: xgboost.plot_tree
|
||||
|
||||
.. autofunction:: xgboost.to_graphviz
|
||||
|
||||
.. _callback_api:
|
||||
|
||||
Callback API
|
||||
------------
|
||||
.. autofunction:: xgboost.callback.print_evaluation
|
||||
|
||||
.. autofunction:: xgboost.callback.record_evaluation
|
||||
|
||||
.. autofunction:: xgboost.callback.reset_learning_rate
|
||||
|
||||
.. autofunction:: xgboost.callback.early_stop
|
||||
|
||||
@ -32,7 +32,7 @@ def _fmt_metric(value, show_stdv=True):
|
||||
def print_evaluation(period=1, show_stdv=True):
|
||||
"""Create a callback that print evaluation result.
|
||||
|
||||
We print the evaluation results every ``period`` iterations
|
||||
We print the evaluation results every **period** iterations
|
||||
and on the first and the last iterations.
|
||||
|
||||
Parameters
|
||||
@ -60,7 +60,7 @@ def print_evaluation(period=1, show_stdv=True):
|
||||
|
||||
|
||||
def record_evaluation(eval_result):
|
||||
"""Create a call back that records the evaluation history into eval_result.
|
||||
"""Create a call back that records the evaluation history into **eval_result**.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@ -109,10 +109,11 @@ def reset_learning_rate(learning_rates):
|
||||
learning_rates: list or function
|
||||
List of learning rate for each boosting round
|
||||
or a customized function that calculates eta in terms of
|
||||
current number of round and the total number of boosting round (e.g. yields
|
||||
learning rate decay)
|
||||
- list l: eta = l[boosting_round]
|
||||
- function f: eta = f(boosting_round, num_boost_round)
|
||||
current number of round and the total number of boosting round (e.g.
|
||||
yields learning rate decay)
|
||||
|
||||
* list ``l``: ``eta = l[boosting_round]``
|
||||
* function ``f``: ``eta = f(boosting_round, num_boost_round)``
|
||||
|
||||
Returns
|
||||
-------
|
||||
@ -150,14 +151,14 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
|
||||
"""Create a callback that activates early stoppping.
|
||||
|
||||
Validation error needs to decrease at least
|
||||
every <stopping_rounds> round(s) to continue training.
|
||||
Requires at least one item in evals.
|
||||
every **stopping_rounds** round(s) to continue training.
|
||||
Requires at least one item in **evals**.
|
||||
If there's more than one, will use the last.
|
||||
Returns the model from the last iteration (not the best one).
|
||||
If early stopping occurs, the model will have three additional fields:
|
||||
bst.best_score, bst.best_iteration and bst.best_ntree_limit.
|
||||
(Use bst.best_ntree_limit to get the correct value if num_parallel_tree
|
||||
and/or num_class appears in the parameters)
|
||||
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
|
||||
(Use ``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree``
|
||||
and/or ``num_class`` appears in the parameters)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
# coding: utf-8
|
||||
# pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme, E0012, R0912
|
||||
# pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme, E0012, R0912, C0302
|
||||
"""Scikit-Learn Wrapper interface for XGBoost."""
|
||||
from __future__ import absolute_import
|
||||
|
||||
@ -69,9 +69,9 @@ class XGBModel(XGBModelBase):
|
||||
booster: string
|
||||
Specify which booster to use: gbtree, gblinear or dart.
|
||||
nthread : int
|
||||
Number of parallel threads used to run xgboost. (Deprecated, please use n_jobs)
|
||||
Number of parallel threads used to run xgboost. (Deprecated, please use ``n_jobs``)
|
||||
n_jobs : int
|
||||
Number of parallel threads used to run xgboost. (replaces nthread)
|
||||
Number of parallel threads used to run xgboost. (replaces ``nthread``)
|
||||
gamma : float
|
||||
Minimum loss reduction required to make a further partition on a leaf node of the tree.
|
||||
min_child_weight : int
|
||||
@ -242,7 +242,7 @@ class XGBModel(XGBModelBase):
|
||||
|
||||
def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,
|
||||
early_stopping_rounds=None, verbose=True, xgb_model=None,
|
||||
sample_weight_eval_set=None):
|
||||
sample_weight_eval_set=None, callbacks=None):
|
||||
# pylint: disable=missing-docstring,invalid-name,attribute-defined-outside-init
|
||||
"""
|
||||
Fit the gradient boosting model
|
||||
@ -285,6 +285,14 @@ class XGBModel(XGBModelBase):
|
||||
xgb_model : str
|
||||
file name of stored xgb model or 'Booster' instance Xgb model to be
|
||||
loaded before training (allows training continuation).
|
||||
callbacks : list of callback functions
|
||||
List of callback functions that are applied at end of each iteration.
|
||||
It is possible to use predefined callbacks by using :ref:`callback_api`.
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[xgb.callback.reset_learning_rate(custom_rates)]
|
||||
"""
|
||||
if sample_weight is not None:
|
||||
trainDmatrix = DMatrix(X, label=y, weight=sample_weight,
|
||||
@ -325,7 +333,8 @@ class XGBModel(XGBModelBase):
|
||||
self.n_estimators, evals=evals,
|
||||
early_stopping_rounds=early_stopping_rounds,
|
||||
evals_result=evals_result, obj=obj, feval=feval,
|
||||
verbose_eval=verbose, xgb_model=xgb_model)
|
||||
verbose_eval=verbose, xgb_model=xgb_model,
|
||||
callbacks=callbacks)
|
||||
|
||||
if evals_result:
|
||||
for val in evals_result.items():
|
||||
@ -413,10 +422,10 @@ class XGBModel(XGBModelBase):
|
||||
def evals_result(self):
|
||||
"""Return the evaluation results.
|
||||
|
||||
If ``eval_set`` is passed to the `fit` function, you can call ``evals_result()`` to
|
||||
get evaluation results for all passed eval_sets. When ``eval_metric`` is also
|
||||
passed to the ``fit`` function, the ``evals_result`` will contain the ``eval_metrics``
|
||||
passed to the ``fit`` function
|
||||
If **eval_set** is passed to the `fit` function, you can call
|
||||
``evals_result()`` to get evaluation results for all passed **eval_sets**.
|
||||
When **eval_metric** is also passed to the `fit` function, the
|
||||
**evals_result** will contain the **eval_metrics** passed to the `fit` function.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@ -438,9 +447,9 @@ class XGBModel(XGBModelBase):
|
||||
|
||||
evals_result = clf.evals_result()
|
||||
|
||||
The variable evals_result will contain:
|
||||
The variable **evals_result** will contain:
|
||||
|
||||
.. code-block:: none
|
||||
.. code-block:: python
|
||||
|
||||
{'validation_0': {'logloss': ['0.604835', '0.531479']},
|
||||
'validation_1': {'logloss': ['0.41965', '0.17686']}}
|
||||
@ -492,7 +501,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
|
||||
def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,
|
||||
early_stopping_rounds=None, verbose=True, xgb_model=None,
|
||||
sample_weight_eval_set=None):
|
||||
sample_weight_eval_set=None, callbacks=None):
|
||||
# pylint: disable = attribute-defined-outside-init,arguments-differ
|
||||
"""
|
||||
Fit gradient boosting classifier
|
||||
@ -535,6 +544,14 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
xgb_model : str
|
||||
file name of stored xgb model or 'Booster' instance Xgb model to be
|
||||
loaded before training (allows training continuation).
|
||||
callbacks : list of callback functions
|
||||
List of callback functions that are applied at end of each iteration.
|
||||
It is possible to use predefined callbacks by using :ref:`callback_api`.
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[xgb.callback.reset_learning_rate(custom_rates)]
|
||||
"""
|
||||
evals_result = {}
|
||||
self.classes_ = np.unique(y)
|
||||
@ -592,7 +609,8 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
evals=evals,
|
||||
early_stopping_rounds=early_stopping_rounds,
|
||||
evals_result=evals_result, obj=obj, feval=feval,
|
||||
verbose_eval=verbose, xgb_model=None)
|
||||
verbose_eval=verbose, xgb_model=None,
|
||||
callbacks=callbacks)
|
||||
|
||||
self.objective = xgb_options["objective"]
|
||||
if evals_result:
|
||||
@ -705,10 +723,10 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
def evals_result(self):
|
||||
"""Return the evaluation results.
|
||||
|
||||
If eval_set is passed to the `fit` function, you can call evals_result() to
|
||||
get evaluation results for all passed eval_sets. When eval_metric is also
|
||||
passed to the `fit` function, the evals_result will contain the eval_metrics
|
||||
passed to the `fit` function
|
||||
If **eval_set** is passed to the `fit` function, you can call
|
||||
``evals_result()`` to get evaluation results for all passed **eval_sets**.
|
||||
When **eval_metric** is also passed to the `fit` function, the
|
||||
**evals_result** will contain the **eval_metrics** passed to the `fit` function.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@ -730,9 +748,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
|
||||
evals_result = clf.evals_result()
|
||||
|
||||
The variable ``evals_result`` will contain
|
||||
The variable **evals_result** will contain
|
||||
|
||||
.. code-block:: none
|
||||
.. code-block:: python
|
||||
|
||||
{'validation_0': {'logloss': ['0.604835', '0.531479']},
|
||||
'validation_1': {'logloss': ['0.41965', '0.17686']}}
|
||||
@ -771,9 +789,9 @@ class XGBRanker(XGBModel):
|
||||
booster: string
|
||||
Specify which booster to use: gbtree, gblinear or dart.
|
||||
nthread : int
|
||||
Number of parallel threads used to run xgboost. (Deprecated, please use n_jobs)
|
||||
Number of parallel threads used to run xgboost. (Deprecated, please use ``n_jobs``)
|
||||
n_jobs : int
|
||||
Number of parallel threads used to run xgboost. (replaces nthread)
|
||||
Number of parallel threads used to run xgboost. (replaces ``nthread``)
|
||||
gamma : float
|
||||
Minimum loss reduction required to make a further partition on a leaf node of the tree.
|
||||
min_child_weight : int
|
||||
@ -816,8 +834,12 @@ class XGBRanker(XGBModel):
|
||||
----
|
||||
A custom objective function is currently not supported by XGBRanker.
|
||||
|
||||
Group information is required for ranking tasks. Before fitting the model, your data need to
|
||||
be sorted by group. When fitting the model, you need to provide an additional array that
|
||||
Note
|
||||
----
|
||||
Group information is required for ranking tasks.
|
||||
|
||||
Before fitting the model, your data need to be sorted by group. When
|
||||
fitting the model, you need to provide an additional array that
|
||||
contains the size of each group.
|
||||
|
||||
For example, if your original data look like:
|
||||
@ -863,7 +885,7 @@ class XGBRanker(XGBModel):
|
||||
|
||||
def fit(self, X, y, group, sample_weight=None, eval_set=None, sample_weight_eval_set=None,
|
||||
eval_group=None, eval_metric=None, early_stopping_rounds=None,
|
||||
verbose=False, xgb_model=None):
|
||||
verbose=False, xgb_model=None, callbacks=None):
|
||||
# pylint: disable = attribute-defined-outside-init,arguments-differ
|
||||
"""
|
||||
Fit the gradient boosting model
|
||||
@ -911,6 +933,14 @@ class XGBRanker(XGBModel):
|
||||
xgb_model : str
|
||||
file name of stored xgb model or 'Booster' instance Xgb model to be
|
||||
loaded before training (allows training continuation).
|
||||
callbacks : list of callback functions
|
||||
List of callback functions that are applied at end of each iteration.
|
||||
It is possible to use predefined callbacks by using :ref:`callback_api`.
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[xgb.callback.reset_learning_rate(custom_rates)]
|
||||
"""
|
||||
# check if group information is provided
|
||||
if group is None:
|
||||
@ -963,7 +993,8 @@ class XGBRanker(XGBModel):
|
||||
self.n_estimators,
|
||||
early_stopping_rounds=early_stopping_rounds, evals=evals,
|
||||
evals_result=evals_result, feval=feval,
|
||||
verbose_eval=verbose, xgb_model=xgb_model)
|
||||
verbose_eval=verbose, xgb_model=xgb_model,
|
||||
callbacks=callbacks)
|
||||
|
||||
self.objective = params["objective"]
|
||||
|
||||
|
||||
@ -137,34 +137,35 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
Whether to maximize feval.
|
||||
early_stopping_rounds: int
|
||||
Activates early stopping. Validation error needs to decrease at least
|
||||
every <early_stopping_rounds> round(s) to continue training.
|
||||
Requires at least one item in evals.
|
||||
every **early_stopping_rounds** round(s) to continue training.
|
||||
Requires at least one item in **evals**.
|
||||
If there's more than one, will use the last.
|
||||
Returns the model from the last iteration (not the best one).
|
||||
If early stopping occurs, the model will have three additional fields:
|
||||
bst.best_score, bst.best_iteration and bst.best_ntree_limit.
|
||||
(Use bst.best_ntree_limit to get the correct value if num_parallel_tree
|
||||
and/or num_class appears in the parameters)
|
||||
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
|
||||
(Use ``bst.best_ntree_limit`` to get the correct value if
|
||||
``num_parallel_tree`` and/or ``num_class`` appears in the parameters)
|
||||
evals_result: dict
|
||||
This dictionary stores the evaluation results of all the items in watchlist.
|
||||
|
||||
Example: with a watchlist containing [(dtest,'eval'), (dtrain,'train')] and
|
||||
a parameter containing ('eval_metric': 'logloss'), the **evals_result**
|
||||
returns
|
||||
Example: with a watchlist containing
|
||||
``[(dtest,'eval'), (dtrain,'train')]`` and
|
||||
a parameter containing ``('eval_metric': 'logloss')``,
|
||||
the **evals_result** returns
|
||||
|
||||
.. code-block:: none
|
||||
.. code-block:: python
|
||||
|
||||
{'train': {'logloss': ['0.48253', '0.35953']},
|
||||
'eval': {'logloss': ['0.480385', '0.357756']}}
|
||||
|
||||
verbose_eval : bool or int
|
||||
Requires at least one item in evals.
|
||||
Requires at least one item in **evals**.
|
||||
If **verbose_eval** is True then the evaluation metric on the validation set is
|
||||
printed at each boosting stage.
|
||||
If **verbose_eval** is an integer then the evaluation metric on the validation set
|
||||
is printed at every given **verbose_eval** boosting stage. The last boosting stage
|
||||
/ the boosting stage found by using **early_stopping_rounds** is also printed.
|
||||
Example: with ``verbose_eval=4`` and at least one item in evals, an evaluation metric
|
||||
Example: with ``verbose_eval=4`` and at least one item in **evals**, an evaluation metric
|
||||
is printed every 4 boosting stages, instead of every boosting stage.
|
||||
learning_rates: list or function (deprecated - use callback API instead)
|
||||
List of learning rate for each boosting round
|
||||
@ -175,12 +176,17 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
Xgb model to be loaded before training (allows training continuation).
|
||||
callbacks : list of callback functions
|
||||
List of callback functions that are applied at end of each iteration.
|
||||
It is possible to use predefined callbacks by using xgb.callback module.
|
||||
Example: [xgb.callback.reset_learning_rate(custom_rates)]
|
||||
It is possible to use predefined callbacks by using
|
||||
:ref:`Callback API <callback_api>`.
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[xgb.callback.reset_learning_rate(custom_rates)]
|
||||
|
||||
Returns
|
||||
-------
|
||||
booster : a trained booster model
|
||||
Booster : a trained booster model
|
||||
"""
|
||||
callbacks = [] if callbacks is None else callbacks
|
||||
|
||||
@ -334,7 +340,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
||||
folds : a KFold or StratifiedKFold instance or list of fold indices
|
||||
Sklearn KFolds or StratifiedKFolds object.
|
||||
Alternatively may explicitly pass sample indices for each fold.
|
||||
For ``n`` folds, ``folds`` should be a length ``n`` list of tuples.
|
||||
For ``n`` folds, **folds** should be a length ``n`` list of tuples.
|
||||
Each tuple is ``(in,out)`` where ``in`` is a list of indices to be used
|
||||
as the training samples for the ``n`` th fold and ``out`` is a list of
|
||||
indices to be used as the testing samples for the ``n`` th fold.
|
||||
@ -368,10 +374,11 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
||||
Seed used to generate the folds (passed to numpy.random.seed).
|
||||
callbacks : list of callback functions
|
||||
List of callback functions that are applied at end of each iteration.
|
||||
It is possible to use predefined callbacks by using xgb.callback module.
|
||||
It is possible to use predefined callbacks by using
|
||||
:ref:`Callback API <callback_api>`.
|
||||
Example:
|
||||
|
||||
.. code-block:: none
|
||||
.. code-block:: python
|
||||
|
||||
[xgb.callback.reset_learning_rate(custom_rates)]
|
||||
shuffle : bool
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user