From 953ed1a99b952fd9ee3529ffa93d682a148c5f82 Mon Sep 17 00:00:00 2001
From: Philip Cho <chohyu01@cs.washington.edu>
Date: Wed, 5 Sep 2018 12:15:23 -0700
Subject: [PATCH] Update Python API doc (#3619)

* Show inherited members of XGBRegressor in API doc, since XGBRegressor uses default methods from XGBModel

* Add table of contents to Python API doc

* Skip JVM doc download if not available

* Show inherited members for XGBRegressor

* Add docstring to XGBRegressor.predict()

* Fix rendering errors in Python docstrings

* Fix lint
---
 doc/conf.py                        |   8 +-
 doc/python/python_api.rst          |   6 ++
 python-package/xgboost/core.py     |  10 ++-
 python-package/xgboost/plotting.py |   9 +-
 python-package/xgboost/sklearn.py  | 132 ++++++++++++++++++++---------
 python-package/xgboost/training.py |  30 ++++---
 6 files changed, 134 insertions(+), 61 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 91fbd741f..c82fa155e 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -14,6 +14,7 @@
 from subprocess import call
 from sh.contrib import git
 import urllib.request
+from urllib.error import HTTPError
 from recommonmark.parser import CommonMarkParser
 import sys
 import re
@@ -24,8 +25,11 @@ import guzzle_sphinx_theme
 git_branch = [re.sub(r'origin/', '', x.lstrip(' ')) for x in str(git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n')]
 git_branch = [x for x in git_branch if 'HEAD' not in x]
 print('git_branch = {}'.format(git_branch[0]))
-filename, _ = urllib.request.urlretrieve('https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(git_branch[0]))
-call('if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'.format(filename), shell=True)
+try:
+  filename, _ = urllib.request.urlretrieve('https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(git_branch[0]))
+  call('if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'.format(filename), shell=True)
+except HTTPError:
+  print('JVM doc not found. Skipping...')
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst
index b430bba39..a55de0643 100644
--- a/doc/python/python_api.rst
+++ b/doc/python/python_api.rst
@@ -2,6 +2,10 @@ Python API Reference
 ====================
 This page gives the Python API reference of xgboost, please also refer to Python Package Introduction for more information about python package.
 
+.. contents::
+  :backlinks: none
+  :local:
+
 Core Data Structure
 -------------------
 .. automodule:: xgboost.core
@@ -29,9 +33,11 @@ Scikit-Learn API
 .. automodule:: xgboost.sklearn
 .. autoclass:: xgboost.XGBRegressor
     :members:
+    :inherited-members:
     :show-inheritance:
 .. autoclass:: xgboost.XGBClassifier
     :members:
+    :inherited-members:
     :show-inheritance:
 
 Plotting API
diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py
index 83877c2d1..daf7bea44 100644
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -1212,9 +1212,10 @@ class Booster(object):
     def get_score(self, fmap='', importance_type='weight'):
         """Get feature importance of each feature.
         Importance type can be defined as:
-            'weight' - the number of times a feature is used to split the data across all trees.
-            'gain' - the average gain of the feature when it is used in trees
-            'cover' - the average coverage of the feature when it is used in trees
+
+        * 'weight': the number of times a feature is used to split the data across all trees.
+        * 'gain': the average gain across all splits the feature is used in.
+        * 'cover': the average coverage across all splits the feature is used in.
 
         Parameters
         ----------
@@ -1317,6 +1318,7 @@ class Booster(object):
 
     def get_split_value_histogram(self, feature, fmap='', bins=None, as_pandas=True):
         """Get split value histogram of a feature
+
         Parameters
         ----------
         feature: str
@@ -1327,7 +1329,7 @@ class Booster(object):
             The maximum number of bins.
             Number of bins equals number of unique split values n_unique,
             if bins == None or bins > n_unique.
-        as_pandas : bool, default True
+        as_pandas: bool, default True
             Return pd.DataFrame when pandas is installed.
             If False or pandas is not installed, return numpy ndarray.
 
diff --git a/python-package/xgboost/plotting.py b/python-package/xgboost/plotting.py
index 59c657c55..99bc31675 100644
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@@ -28,10 +28,11 @@ def plot_importance(booster, ax=None, height=0.2,
     grid : bool, Turn the axes grids on or off.  Default is True (On).
     importance_type : str, default "weight"
         How the importance is calculated: either "weight", "gain", or "cover"
-        "weight" is the number of times a feature appears in a tree
-        "gain" is the average gain of splits which use the feature
-        "cover" is the average coverage of splits which use the feature
-            where coverage is defined as the number of samples affected by the split
+
+        * "weight" is the number of times a feature appears in a tree
+        * "gain" is the average gain of splits which use the feature
+        * "cover" is the average coverage of splits which use the feature
+          where coverage is defined as the number of samples affected by the split
     max_num_features : int, default None
         Maximum number of top features displayed on plot. If None, all features will be displayed.
     height : float, default 0.2
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index eeda07d54..173f8e510 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -99,14 +99,16 @@ class XGBModel(XGBModelBase):
     missing : float, optional
         Value in the data which needs to be present as a missing value. If
         None, defaults to np.nan.
-    **kwargs : dict, optional
+    \*\*kwargs : dict, optional
         Keyword arguments for XGBoost Booster object.  Full documentation of parameters can
-        be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.md.
-        Attempting to set a parameter via the constructor args and **kwargs dict simultaneously
+        be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
+        Attempting to set a parameter via the constructor args and \*\*kwargs dict simultaneously
         will result in a TypeError.
-        Note:
-            **kwargs is unsupported by Sklearn.  We do not guarantee that parameters passed via
-            this argument will interact properly with Sklearn.
+
+        .. note:: \*\*kwargs unsupported by scikit-learn
+
+            \*\*kwargs is unsupported by scikit-learn.  We do not guarantee that parameters
+            passed via this argument will interact properly with scikit-learn.
 
     Note
     ----
@@ -237,7 +239,7 @@ class XGBModel(XGBModelBase):
             instance weights on the i-th validation set.
         eval_metric : str, callable, optional
             If a str, should be a built-in evaluation metric to use. See
-            doc/parameter.md. If callable, a custom evaluation metric. The call
+            doc/parameter.rst. If callable, a custom evaluation metric. The call
             signature is func(y_predicted, y_true) where y_true will be a
             DMatrix object such that you may need to call the get_label
             method. It must return a str, value pair where the str is a name
@@ -314,6 +316,38 @@ class XGBModel(XGBModelBase):
         return self
 
     def predict(self, data, output_margin=False, ntree_limit=0):
+        """
+        Predict with `data`.
+
+        .. note:: This function is not thread safe.
+
+          For each booster object, predict can only be called from one thread.
+          If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
+          of model object and then call ``predict()``.
+
+        .. note:: Using ``predict()`` with DART booster
+
+          If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
+          some of the trees will be evaluated. This will produce incorrect results if ``data`` is
+          not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
+          a nonzero value, e.g.
+
+          .. code-block:: python
+
+            preds = bst.predict(dtest, ntree_limit=num_round)
+
+        Parameters
+        ----------
+        data : DMatrix
+            The dmatrix storing the input.
+        output_margin : bool
+            Whether to output the raw untransformed margin value.
+        ntree_limit : int
+            Limit number of trees in the prediction; defaults to 0 (use all trees).
+        Returns
+        -------
+        prediction : numpy array
+        """
         # pylint: disable=missing-docstring,invalid-name
         test_dmatrix = DMatrix(data, missing=self.missing, nthread=self.n_jobs)
         return self.get_booster().predict(test_dmatrix,
@@ -346,10 +380,10 @@ class XGBModel(XGBModelBase):
     def evals_result(self):
         """Return the evaluation results.
 
-        If eval_set is passed to the `fit` function, you can call evals_result() to
-        get evaluation results for all passed eval_sets. When eval_metric is also
-        passed to the `fit` function, the evals_result will contain the eval_metrics
-        passed to the `fit` function
+        If ``eval_set`` is passed to the `fit` function, you can call ``evals_result()`` to
+        get evaluation results for all passed eval_sets. When ``eval_metric`` is also
+        passed to the ``fit`` function, the ``evals_result`` will contain the ``eval_metrics``
+        passed to the ``fit`` function
 
         Returns
         -------
@@ -357,20 +391,26 @@ class XGBModel(XGBModelBase):
 
         Example
         -------
-        param_dist = {'objective':'binary:logistic', 'n_estimators':2}
 
-        clf = xgb.XGBModel(**param_dist)
+        .. code-block:: python
 
-        clf.fit(X_train, y_train,
-                eval_set=[(X_train, y_train), (X_test, y_test)],
-                eval_metric='logloss',
-                verbose=True)
+            param_dist = {'objective':'binary:logistic', 'n_estimators':2}
 
-        evals_result = clf.evals_result()
+            clf = xgb.XGBModel(**param_dist)
+
+            clf.fit(X_train, y_train,
+                    eval_set=[(X_train, y_train), (X_test, y_test)],
+                    eval_metric='logloss',
+                    verbose=True)
+
+            evals_result = clf.evals_result()
 
         The variable evals_result will contain:
-        {'validation_0': {'logloss': ['0.604835', '0.531479']},
-         'validation_1': {'logloss': ['0.41965', '0.17686']}}
+
+        .. code-block:: none
+
+            {'validation_0': {'logloss': ['0.604835', '0.531479']},
+            'validation_1': {'logloss': ['0.41965', '0.17686']}}
         """
         if self.evals_result_:
             evals_result = self.evals_result_
@@ -382,9 +422,11 @@ class XGBModel(XGBModelBase):
     @property
     def feature_importances_(self):
         """
+        Feature importances property
+
         Returns
         -------
-        feature_importances_ : array of shape = [n_features]
+        feature_importances_ : array of shape ``[n_features]``
 
         """
         b = self.get_booster()
@@ -396,9 +438,8 @@ class XGBModel(XGBModelBase):
 
 class XGBClassifier(XGBModel, XGBClassifierBase):
     # pylint: disable=missing-docstring,too-many-arguments,invalid-name
-    __doc__ = """Implementation of the scikit-learn API for XGBoost classification.
-
-    """ + '\n'.join(XGBModel.__doc__.split('\n')[2:])
+    __doc__ = "Implementation of the scikit-learn API for XGBoost classification.\n\n" \
+        + '\n'.join(XGBModel.__doc__.split('\n')[2:])
 
     def __init__(self, max_depth=3, learning_rate=0.1,
                  n_estimators=100, silent=True,
@@ -439,7 +480,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
             instance weights on the i-th validation set.
         eval_metric : str, callable, optional
             If a str, should be a built-in evaluation metric to use. See
-            doc/parameter.md. If callable, a custom evaluation metric. The call
+            doc/parameter.rst. If callable, a custom evaluation metric. The call
             signature is func(y_predicted, y_true) where y_true will be a
             DMatrix object such that you may need to call the get_label
             method. It must return a str, value pair where the str is a name
@@ -567,10 +608,13 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
     def predict_proba(self, data, ntree_limit=0):
         """
         Predict the probability of each `data` example being of a given class.
-        NOTE: This function is not thread safe.
-              For each booster object, predict can only be called from one thread.
-              If you want to run prediction using multiple thread, call xgb.copy() to make copies
-              of model object and then call predict
+
+        .. note:: This function is not thread safe
+
+            For each booster object, predict can only be called from one thread.
+            If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
+            of model object and then call predict
+
         Parameters
         ----------
         data : DMatrix
@@ -606,20 +650,26 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
 
         Example
         -------
-        param_dist = {'objective':'binary:logistic', 'n_estimators':2}
 
-        clf = xgb.XGBClassifier(**param_dist)
+        .. code-block:: python
 
-        clf.fit(X_train, y_train,
-                eval_set=[(X_train, y_train), (X_test, y_test)],
-                eval_metric='logloss',
-                verbose=True)
+            param_dist = {'objective':'binary:logistic', 'n_estimators':2}
 
-        evals_result = clf.evals_result()
+            clf = xgb.XGBClassifier(**param_dist)
 
-        The variable evals_result will contain:
-        {'validation_0': {'logloss': ['0.604835', '0.531479']},
-         'validation_1': {'logloss': ['0.41965', '0.17686']}}
+            clf.fit(X_train, y_train,
+                    eval_set=[(X_train, y_train), (X_test, y_test)],
+                    eval_metric='logloss',
+                    verbose=True)
+
+            evals_result = clf.evals_result()
+
+        The variable ``evals_result`` will contain
+
+        .. code-block:: none
+
+            {'validation_0': {'logloss': ['0.604835', '0.531479']},
+            'validation_1': {'logloss': ['0.41965', '0.17686']}}
         """
         if self.evals_result_:
             evals_result = self.evals_result_
@@ -631,5 +681,5 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
 
 class XGBRegressor(XGBModel, XGBRegressorBase):
     # pylint: disable=missing-docstring
-    __doc__ = """Implementation of the scikit-learn API for XGBoost regression.
-    """ + '\n'.join(XGBModel.__doc__.split('\n')[2:])
+    __doc__ = "Implementation of the scikit-learn API for XGBoost regression.\n\n"\
+        + '\n'.join(XGBModel.__doc__.split('\n')[2:])
diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py
index 59dd88286..ac3d1a172 100644
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -147,18 +147,24 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
         and/or num_class appears in the parameters)
     evals_result: dict
         This dictionary stores the evaluation results of all the items in watchlist.
+
         Example: with a watchlist containing [(dtest,'eval'), (dtrain,'train')] and
-        a parameter containing ('eval_metric': 'logloss')
-        Returns: {'train': {'logloss': ['0.48253', '0.35953']},
-                  'eval': {'logloss': ['0.480385', '0.357756']}}
+        a parameter containing ('eval_metric': 'logloss'), the **evals_result**
+        returns
+
+        .. code-block:: none
+
+            {'train': {'logloss': ['0.48253', '0.35953']},
+             'eval': {'logloss': ['0.480385', '0.357756']}}
+
     verbose_eval : bool or int
         Requires at least one item in evals.
-        If `verbose_eval` is True then the evaluation metric on the validation set is
+        If **verbose_eval** is True then the evaluation metric on the validation set is
         printed at each boosting stage.
-        If `verbose_eval` is an integer then the evaluation metric on the validation set
-        is printed at every given `verbose_eval` boosting stage. The last boosting stage
-        / the boosting stage found by using `early_stopping_rounds` is also printed.
-        Example: with verbose_eval=4 and at least one item in evals, an evaluation metric
+        If **verbose_eval** is an integer then the evaluation metric on the validation set
+        is printed at every given **verbose_eval** boosting stage. The last boosting stage
+        / the boosting stage found by using **early_stopping_rounds** is also printed.
+        Example: with ``verbose_eval=4`` and at least one item in evals, an evaluation metric
         is printed every 4 boosting stages, instead of every boosting stage.
     learning_rates: list or function (deprecated - use callback API instead)
         List of learning rate for each boosting round
@@ -341,8 +347,12 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
     callbacks : list of callback functions
         List of callback functions that are applied at end of each iteration.
         It is possible to use predefined callbacks by using xgb.callback module.
-        Example: [xgb.callback.reset_learning_rate(custom_rates)]
-     shuffle : bool
+        Example:
+
+        .. code-block:: none
+
+            [xgb.callback.reset_learning_rate(custom_rates)]
+    shuffle : bool
         Shuffle data before creating folds.
 
     Returns