Update Python API doc (#3619)
* Show inherited members of XGBRegressor in API doc, since XGBRegressor uses default methods from XGBModel * Add table of contents to Python API doc * Skip JVM doc download if not available * Show inherited members for XGBRegressor * Add docstring to XGBRegressor.predict() * Fix rendering errors in Python docstrings * Fix lint
This commit is contained in:
@@ -1212,9 +1212,10 @@ class Booster(object):
|
||||
def get_score(self, fmap='', importance_type='weight'):
|
||||
"""Get feature importance of each feature.
|
||||
Importance type can be defined as:
|
||||
'weight' - the number of times a feature is used to split the data across all trees.
|
||||
'gain' - the average gain of the feature when it is used in trees
|
||||
'cover' - the average coverage of the feature when it is used in trees
|
||||
|
||||
* 'weight': the number of times a feature is used to split the data across all trees.
|
||||
* 'gain': the average gain across all splits the feature is used in.
|
||||
* 'cover': the average coverage across all splits the feature is used in.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -1317,6 +1318,7 @@ class Booster(object):
|
||||
|
||||
def get_split_value_histogram(self, feature, fmap='', bins=None, as_pandas=True):
|
||||
"""Get split value histogram of a feature
|
||||
|
||||
Parameters
|
||||
----------
|
||||
feature: str
|
||||
@@ -1327,7 +1329,7 @@ class Booster(object):
|
||||
The maximum number of bins.
|
||||
Number of bins equals number of unique split values n_unique,
|
||||
if bins == None or bins > n_unique.
|
||||
as_pandas : bool, default True
|
||||
as_pandas: bool, default True
|
||||
Return pd.DataFrame when pandas is installed.
|
||||
If False or pandas is not installed, return numpy ndarray.
|
||||
|
||||
|
||||
@@ -28,10 +28,11 @@ def plot_importance(booster, ax=None, height=0.2,
|
||||
grid : bool, Turn the axes grids on or off. Default is True (On).
|
||||
importance_type : str, default "weight"
|
||||
How the importance is calculated: either "weight", "gain", or "cover"
|
||||
"weight" is the number of times a feature appears in a tree
|
||||
"gain" is the average gain of splits which use the feature
|
||||
"cover" is the average coverage of splits which use the feature
|
||||
where coverage is defined as the number of samples affected by the split
|
||||
|
||||
* "weight" is the number of times a feature appears in a tree
|
||||
* "gain" is the average gain of splits which use the feature
|
||||
* "cover" is the average coverage of splits which use the feature
|
||||
where coverage is defined as the number of samples affected by the split
|
||||
max_num_features : int, default None
|
||||
Maximum number of top features displayed on plot. If None, all features will be displayed.
|
||||
height : float, default 0.2
|
||||
|
||||
@@ -99,14 +99,16 @@ class XGBModel(XGBModelBase):
|
||||
missing : float, optional
|
||||
Value in the data which needs to be present as a missing value. If
|
||||
None, defaults to np.nan.
|
||||
**kwargs : dict, optional
|
||||
\*\*kwargs : dict, optional
|
||||
Keyword arguments for XGBoost Booster object. Full documentation of parameters can
|
||||
be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.md.
|
||||
Attempting to set a parameter via the constructor args and **kwargs dict simultaneously
|
||||
be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
|
||||
Attempting to set a parameter via the constructor args and \*\*kwargs dict simultaneously
|
||||
will result in a TypeError.
|
||||
Note:
|
||||
**kwargs is unsupported by Sklearn. We do not guarantee that parameters passed via
|
||||
this argument will interact properly with Sklearn.
|
||||
|
||||
.. note:: \*\*kwargs unsupported by scikit-learn
|
||||
|
||||
\*\*kwargs is unsupported by scikit-learn. We do not guarantee that parameters
|
||||
passed via this argument will interact properly with scikit-learn.
|
||||
|
||||
Note
|
||||
----
|
||||
@@ -237,7 +239,7 @@ class XGBModel(XGBModelBase):
|
||||
instance weights on the i-th validation set.
|
||||
eval_metric : str, callable, optional
|
||||
If a str, should be a built-in evaluation metric to use. See
|
||||
doc/parameter.md. If callable, a custom evaluation metric. The call
|
||||
doc/parameter.rst. If callable, a custom evaluation metric. The call
|
||||
signature is func(y_predicted, y_true) where y_true will be a
|
||||
DMatrix object such that you may need to call the get_label
|
||||
method. It must return a str, value pair where the str is a name
|
||||
@@ -314,6 +316,38 @@ class XGBModel(XGBModelBase):
|
||||
return self
|
||||
|
||||
def predict(self, data, output_margin=False, ntree_limit=0):
|
||||
"""
|
||||
Predict with `data`.
|
||||
|
||||
.. note:: This function is not thread safe.
|
||||
|
||||
For each booster object, predict can only be called from one thread.
|
||||
If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
|
||||
of model object and then call ``predict()``.
|
||||
|
||||
.. note:: Using ``predict()`` with DART booster
|
||||
|
||||
If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
|
||||
some of the trees will be evaluated. This will produce incorrect results if ``data`` is
|
||||
not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
|
||||
a nonzero value, e.g.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
preds = bst.predict(dtest, ntree_limit=num_round)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : DMatrix
|
||||
The dmatrix storing the input.
|
||||
output_margin : bool
|
||||
Whether to output the raw untransformed margin value.
|
||||
ntree_limit : int
|
||||
Limit number of trees in the prediction; defaults to 0 (use all trees).
|
||||
Returns
|
||||
-------
|
||||
prediction : numpy array
|
||||
"""
|
||||
# pylint: disable=missing-docstring,invalid-name
|
||||
test_dmatrix = DMatrix(data, missing=self.missing, nthread=self.n_jobs)
|
||||
return self.get_booster().predict(test_dmatrix,
|
||||
@@ -346,10 +380,10 @@ class XGBModel(XGBModelBase):
|
||||
def evals_result(self):
|
||||
"""Return the evaluation results.
|
||||
|
||||
If eval_set is passed to the `fit` function, you can call evals_result() to
|
||||
get evaluation results for all passed eval_sets. When eval_metric is also
|
||||
passed to the `fit` function, the evals_result will contain the eval_metrics
|
||||
passed to the `fit` function
|
||||
If ``eval_set`` is passed to the `fit` function, you can call ``evals_result()`` to
|
||||
get evaluation results for all passed eval_sets. When ``eval_metric`` is also
|
||||
passed to the ``fit`` function, the ``evals_result`` will contain the ``eval_metrics``
|
||||
passed to the ``fit`` function
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -357,20 +391,26 @@ class XGBModel(XGBModelBase):
|
||||
|
||||
Example
|
||||
-------
|
||||
param_dist = {'objective':'binary:logistic', 'n_estimators':2}
|
||||
|
||||
clf = xgb.XGBModel(**param_dist)
|
||||
.. code-block:: python
|
||||
|
||||
clf.fit(X_train, y_train,
|
||||
eval_set=[(X_train, y_train), (X_test, y_test)],
|
||||
eval_metric='logloss',
|
||||
verbose=True)
|
||||
param_dist = {'objective':'binary:logistic', 'n_estimators':2}
|
||||
|
||||
evals_result = clf.evals_result()
|
||||
clf = xgb.XGBModel(**param_dist)
|
||||
|
||||
clf.fit(X_train, y_train,
|
||||
eval_set=[(X_train, y_train), (X_test, y_test)],
|
||||
eval_metric='logloss',
|
||||
verbose=True)
|
||||
|
||||
evals_result = clf.evals_result()
|
||||
|
||||
The variable evals_result will contain:
|
||||
{'validation_0': {'logloss': ['0.604835', '0.531479']},
|
||||
'validation_1': {'logloss': ['0.41965', '0.17686']}}
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
{'validation_0': {'logloss': ['0.604835', '0.531479']},
|
||||
'validation_1': {'logloss': ['0.41965', '0.17686']}}
|
||||
"""
|
||||
if self.evals_result_:
|
||||
evals_result = self.evals_result_
|
||||
@@ -382,9 +422,11 @@ class XGBModel(XGBModelBase):
|
||||
@property
|
||||
def feature_importances_(self):
|
||||
"""
|
||||
Feature importances property
|
||||
|
||||
Returns
|
||||
-------
|
||||
feature_importances_ : array of shape = [n_features]
|
||||
feature_importances_ : array of shape ``[n_features]``
|
||||
|
||||
"""
|
||||
b = self.get_booster()
|
||||
@@ -396,9 +438,8 @@ class XGBModel(XGBModelBase):
|
||||
|
||||
class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
# pylint: disable=missing-docstring,too-many-arguments,invalid-name
|
||||
__doc__ = """Implementation of the scikit-learn API for XGBoost classification.
|
||||
|
||||
""" + '\n'.join(XGBModel.__doc__.split('\n')[2:])
|
||||
__doc__ = "Implementation of the scikit-learn API for XGBoost classification.\n\n" \
|
||||
+ '\n'.join(XGBModel.__doc__.split('\n')[2:])
|
||||
|
||||
def __init__(self, max_depth=3, learning_rate=0.1,
|
||||
n_estimators=100, silent=True,
|
||||
@@ -439,7 +480,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
instance weights on the i-th validation set.
|
||||
eval_metric : str, callable, optional
|
||||
If a str, should be a built-in evaluation metric to use. See
|
||||
doc/parameter.md. If callable, a custom evaluation metric. The call
|
||||
doc/parameter.rst. If callable, a custom evaluation metric. The call
|
||||
signature is func(y_predicted, y_true) where y_true will be a
|
||||
DMatrix object such that you may need to call the get_label
|
||||
method. It must return a str, value pair where the str is a name
|
||||
@@ -567,10 +608,13 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
def predict_proba(self, data, ntree_limit=0):
|
||||
"""
|
||||
Predict the probability of each `data` example being of a given class.
|
||||
NOTE: This function is not thread safe.
|
||||
For each booster object, predict can only be called from one thread.
|
||||
If you want to run prediction using multiple thread, call xgb.copy() to make copies
|
||||
of model object and then call predict
|
||||
|
||||
.. note:: This function is not thread safe
|
||||
|
||||
For each booster object, predict can only be called from one thread.
|
||||
If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
|
||||
of model object and then call predict
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : DMatrix
|
||||
@@ -606,20 +650,26 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
|
||||
Example
|
||||
-------
|
||||
param_dist = {'objective':'binary:logistic', 'n_estimators':2}
|
||||
|
||||
clf = xgb.XGBClassifier(**param_dist)
|
||||
.. code-block:: python
|
||||
|
||||
clf.fit(X_train, y_train,
|
||||
eval_set=[(X_train, y_train), (X_test, y_test)],
|
||||
eval_metric='logloss',
|
||||
verbose=True)
|
||||
param_dist = {'objective':'binary:logistic', 'n_estimators':2}
|
||||
|
||||
evals_result = clf.evals_result()
|
||||
clf = xgb.XGBClassifier(**param_dist)
|
||||
|
||||
The variable evals_result will contain:
|
||||
{'validation_0': {'logloss': ['0.604835', '0.531479']},
|
||||
'validation_1': {'logloss': ['0.41965', '0.17686']}}
|
||||
clf.fit(X_train, y_train,
|
||||
eval_set=[(X_train, y_train), (X_test, y_test)],
|
||||
eval_metric='logloss',
|
||||
verbose=True)
|
||||
|
||||
evals_result = clf.evals_result()
|
||||
|
||||
The variable ``evals_result`` will contain
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
{'validation_0': {'logloss': ['0.604835', '0.531479']},
|
||||
'validation_1': {'logloss': ['0.41965', '0.17686']}}
|
||||
"""
|
||||
if self.evals_result_:
|
||||
evals_result = self.evals_result_
|
||||
@@ -631,5 +681,5 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
|
||||
class XGBRegressor(XGBModel, XGBRegressorBase):
|
||||
# pylint: disable=missing-docstring
|
||||
__doc__ = """Implementation of the scikit-learn API for XGBoost regression.
|
||||
""" + '\n'.join(XGBModel.__doc__.split('\n')[2:])
|
||||
__doc__ = "Implementation of the scikit-learn API for XGBoost regression.\n\n"\
|
||||
+ '\n'.join(XGBModel.__doc__.split('\n')[2:])
|
||||
|
||||
@@ -147,18 +147,24 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
and/or num_class appears in the parameters)
|
||||
evals_result: dict
|
||||
This dictionary stores the evaluation results of all the items in watchlist.
|
||||
|
||||
Example: with a watchlist containing [(dtest,'eval'), (dtrain,'train')] and
|
||||
a parameter containing ('eval_metric': 'logloss')
|
||||
Returns: {'train': {'logloss': ['0.48253', '0.35953']},
|
||||
'eval': {'logloss': ['0.480385', '0.357756']}}
|
||||
a parameter containing ('eval_metric': 'logloss'), the **evals_result**
|
||||
returns
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
{'train': {'logloss': ['0.48253', '0.35953']},
|
||||
'eval': {'logloss': ['0.480385', '0.357756']}}
|
||||
|
||||
verbose_eval : bool or int
|
||||
Requires at least one item in evals.
|
||||
If `verbose_eval` is True then the evaluation metric on the validation set is
|
||||
If **verbose_eval** is True then the evaluation metric on the validation set is
|
||||
printed at each boosting stage.
|
||||
If `verbose_eval` is an integer then the evaluation metric on the validation set
|
||||
is printed at every given `verbose_eval` boosting stage. The last boosting stage
|
||||
/ the boosting stage found by using `early_stopping_rounds` is also printed.
|
||||
Example: with verbose_eval=4 and at least one item in evals, an evaluation metric
|
||||
If **verbose_eval** is an integer then the evaluation metric on the validation set
|
||||
is printed at every given **verbose_eval** boosting stage. The last boosting stage
|
||||
/ the boosting stage found by using **early_stopping_rounds** is also printed.
|
||||
Example: with ``verbose_eval=4`` and at least one item in evals, an evaluation metric
|
||||
is printed every 4 boosting stages, instead of every boosting stage.
|
||||
learning_rates: list or function (deprecated - use callback API instead)
|
||||
List of learning rate for each boosting round
|
||||
@@ -341,8 +347,12 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
||||
callbacks : list of callback functions
|
||||
List of callback functions that are applied at end of each iteration.
|
||||
It is possible to use predefined callbacks by using xgb.callback module.
|
||||
Example: [xgb.callback.reset_learning_rate(custom_rates)]
|
||||
shuffle : bool
|
||||
Example:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[xgb.callback.reset_learning_rate(custom_rates)]
|
||||
shuffle : bool
|
||||
Shuffle data before creating folds.
|
||||
|
||||
Returns
|
||||
|
||||
Reference in New Issue
Block a user