[doc] Remove parameter type in Python doc strings. (#9005)

2023-04-01 04:04:30 +08:00
parent 4caca2947d
commit 720a8c3273
6 changed files with 105 additions and 94 deletions
--- a/demo/guide-python/quantile_regression.py
+++ b/demo/guide-python/quantile_regression.py
@@ -2,6 +2,8 @@
 Quantile Regression
 ===================
    .. versionadded:: 2.0.0
 The script is inspired by this awesome example in sklearn:
 https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -360,7 +360,13 @@ Specify the learning task and the corresponding learning objective. The objectiv
  - ``reg:logistic``: logistic regression.
  - ``reg:pseudohubererror``: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.
  - ``reg:absoluteerror``: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal.
    .. versionadded:: 1.7.0
  - ``reg:quantileerror``: Quantile loss, also known as ``pinball loss``. See later sections for its parameter and :ref:`sphx_glr_python_examples_quantile_regression.py` for a worked example.
    .. versionadded:: 2.0.0
  - ``binary:logistic``: logistic regression for binary classification, output probability
  - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation
  - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
@@ -467,6 +473,8 @@ Parameter for using Quantile Loss (``reg:quantileerror``)
 * ``quantile_alpha``: A scala or a list of targeted quantiles.
    .. versionadded:: 2.0.0
 Parameter for using AFT Survival Loss (``survival:aft``) and Negative Log Likelihood of AFT metric (``aft-nloglik``)
 ====================================================================================================================
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -94,9 +94,9 @@ def from_cstr_to_pystr(data: CStrPptr, length: c_bst_ulong) -> List[str]:
    Parameters
    ----------
-    data : ctypes pointer
+    data :
        pointer to data
-    length : ctypes pointer
+    length :
        pointer to length of data
    """
    res = []
@@ -131,9 +131,9 @@ def _expect(expectations: Sequence[Type], got: Type) -> str:
    Parameters
    ----------
-    expectations: sequence
+    expectations :
        a list of expected value.
-    got:
+    got :
        actual input
    Returns
@@ -263,7 +263,7 @@ def _check_call(ret: int) -> None:
    Parameters
    ----------
-    ret : int
+    ret :
        return value from API calls
    """
    if ret != 0:
@@ -271,10 +271,10 @@ def _check_call(ret: int) -> None:
 def build_info() -> dict:
-    """Build information of XGBoost.  The returned value format is not stable. Also, please
+    """Build information of XGBoost.  The returned value format is not stable. Also,
-    note that build time dependency is not the same as runtime dependency. For instance,
+    please note that build time dependency is not the same as runtime dependency. For
-    it's possible to build XGBoost with older CUDA version but run it with the lastest
+    instance, it's possible to build XGBoost with older CUDA version but run it with the
-    one.
+    lastest one.
      .. versionadded:: 1.6.0
@@ -658,28 +658,28 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes,too-many-public-m
        data :
            Data source of DMatrix. See :ref:`py-data` for a list of supported input
            types.
-        label : array_like
+        label :
            Label of the training data.
-        weight : array_like
+        weight :
            Weight for each instance.
-            .. note:: For ranking task, weights are per-group.
+             .. note::
-                In ranking task, one weight is assigned to each group (not each
+                 For ranking task, weights are per-group.  In ranking task, one weight
-                data point). This is because we only care about the relative
+                 is assigned to each group (not each data point). This is because we
-                ordering of data points within each group, so it doesn't make
+                 only care about the relative ordering of data points within each group,
-                sense to assign weights to individual data points.
+                 so it doesn't make sense to assign weights to individual data points.
-        base_margin: array_like
+        base_margin :
            Base margin used for boosting from existing model.
-        missing : float, optional
+        missing :
-            Value in the input data which needs to be present as a missing
+            Value in the input data which needs to be present as a missing value. If
-            value. If None, defaults to np.nan.
+            None, defaults to np.nan.
-        silent : boolean, optional
+        silent :
            Whether print messages during construction
-        feature_names : list, optional
+        feature_names :
            Set names for features.
-        feature_types : FeatureTypes
+        feature_types :
            Set types for features.  When `enable_categorical` is set to `True`, string
            "c" represents categorical data type while "q" represents numerical feature
@@ -689,20 +689,20 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes,too-many-public-m
            `.cat.codes` method. This is useful when users want to specify categorical
            features without having to construct a dataframe as input.
-        nthread : integer, optional
+        nthread :
            Number of threads to use for loading data when parallelization is
            applicable. If -1, uses maximum threads available on the system.
-        group : array_like
+        group :
            Group size for all ranking group.
-        qid : array_like
+        qid :
            Query ID for data samples, used for ranking.
-        label_lower_bound : array_like
+        label_lower_bound :
            Lower bound for survival training.
-        label_upper_bound : array_like
+        label_upper_bound :
            Upper bound for survival training.
-        feature_weights : array_like, optional
+        feature_weights :
            Set feature weights for column sampling.
-        enable_categorical: boolean, optional
+        enable_categorical :
            .. versionadded:: 1.3.0
@@ -1712,6 +1712,7 @@ class Booster:
        string.
        .. versionadded:: 1.0.0
        """
        json_string = ctypes.c_char_p()
        length = c_bst_ulong()
@@ -1744,8 +1745,8 @@ class Booster:
        Returns
        -------
-        booster: `Booster`
+        booster :
-            a copied booster model
+            A copied booster model
        """
        return copy.copy(self)
@@ -1754,12 +1755,12 @@ class Booster:
        Parameters
        ----------
-        key : str
+        key :
            The key to get attribute from.
        Returns
        -------
-        value : str
+        value :
            The attribute value of the key, returns None if attribute do not exist.
        """
        ret = ctypes.c_char_p()
@@ -1878,9 +1879,9 @@ class Booster:
        Parameters
        ----------
-        params: dict/list/str
+        params :
           list of key,value pairs, dict of key to value or simply str key
-        value: optional
+        value :
           value of the specified parameter, when params is str key
        """
        if isinstance(params, Mapping):
@@ -1903,11 +1904,11 @@ class Booster:
        Parameters
        ----------
-        dtrain : DMatrix
+        dtrain :
            Training data.
-        iteration : int
+        iteration :
            Current iteration number.
-        fobj : function
+        fobj :
            Customized objective function.
        """
@@ -2205,8 +2206,7 @@ class Booster:
        Parameters
        ----------
-        data : numpy.ndarray/scipy.sparse.csr_matrix/cupy.ndarray/
+        data :
               cudf.DataFrame/pd.DataFrame
            The input data, must not be a view for numpy array.  Set
            ``predictor`` to ``gpu_predictor`` for running prediction on CuPy
            array or CuDF DataFrame.
@@ -2390,7 +2390,7 @@ class Booster:
        Parameters
        ----------
-        fname : string or os.PathLike
+        fname :
            Output file name
        """
@@ -2494,13 +2494,13 @@ class Booster:
        Parameters
        ----------
-        fout : string or os.PathLike
+        fout :
            Output file name.
-        fmap : string or os.PathLike, optional
+        fmap :
            Name of the file containing feature map names.
-        with_stats : bool, optional
+        with_stats :
            Controls whether the split statistics are output.
-        dump_format : string, optional
+        dump_format :
            Format of model dump file. Can be 'text' or 'json'.
        """
        if isinstance(fout, (str, os.PathLike)):
@@ -2604,9 +2604,9 @@ class Booster:
        Parameters
        ----------
-        fmap:
+        fmap :
           The name of feature map file.
-        importance_type:
+        importance_type :
            One of the importance types defined above.
        Returns
@@ -2655,7 +2655,7 @@ class Booster:
        Parameters
        ----------
-        fmap: str or os.PathLike (optional)
+        fmap :
           The name of feature map file.
        """
        # pylint: disable=too-many-locals
@@ -2821,15 +2821,15 @@ class Booster:
        Parameters
        ----------
-        feature: str
+        feature :
            The name of the feature.
-        fmap: str or os.PathLike (optional)
+        fmap:
            The name of feature map file.
-        bin: int, default None
+        bin :
            The maximum number of bins.
            Number of bins equals number of unique split values n_unique,
            if bins == None or bins > n_unique.
-        as_pandas: bool, default True
+        as_pandas :
            Return pd.DataFrame when pandas is installed.
            If False or pandas is not installed, return numpy ndarray.
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@@ -1,10 +1,9 @@
 # pylint: disable=too-many-locals, too-many-arguments, invalid-name,
 # pylint: disable=too-many-branches
 # coding: utf-8
 """Plotting Library."""
 import json
 from io import BytesIO
-from typing import Any, Optional
+from typing import Any, Optional, Union
 import numpy as np
@@ -17,7 +16,7 @@ GraphvizSource = Any  # real type is graphviz.Source
 def plot_importance(
-    booster: Booster,
+    booster: Union[XGBModel, Booster, dict],
    ax: Optional[Axes] = None,
    height: float = 0.2,
    xlim: Optional[tuple] = None,
@@ -37,40 +36,42 @@ def plot_importance(
    Parameters
    ----------
-    booster : Booster, XGBModel or dict
+    booster :
        Booster or XGBModel instance, or dict taken by Booster.get_fscore()
-    ax : matplotlib Axes, default None
+    ax : matplotlib Axes
        Target axes instance. If None, new figure and axes will be created.
-    grid : bool, Turn the axes grids on or off.  Default is True (On).
+    grid :
-    importance_type : str, default "weight"
+        Turn the axes grids on or off.  Default is True (On).
    importance_type :
        How the importance is calculated: either "weight", "gain", or "cover"
        * "weight" is the number of times a feature appears in a tree
        * "gain" is the average gain of splits which use the feature
        * "cover" is the average coverage of splits which use the feature
          where coverage is defined as the number of samples affected by the split
-    max_num_features : int, default None
+    max_num_features :
-        Maximum number of top features displayed on plot. If None, all features will be displayed.
+        Maximum number of top features displayed on plot. If None, all features will be
-    height : float, default 0.2
+        displayed.
    height :
        Bar height, passed to ax.barh()
-    xlim : tuple, default None
+    xlim :
        Tuple passed to axes.xlim()
-    ylim : tuple, default None
+    ylim :
        Tuple passed to axes.ylim()
-    title : str, default "Feature importance"
+    title :
        Axes title. To disable, pass None.
-    xlabel : str, default "F score"
+    xlabel :
        X axis title label. To disable, pass None.
-    ylabel : str, default "Features"
+    ylabel :
        Y axis title label. To disable, pass None.
-    fmap: str or os.PathLike (optional)
+    fmap :
        The name of feature map file.
-    show_values : bool, default True
+    show_values :
        Show values on plot. To disable, pass False.
-    values_format : str, default "{v}"
+    values_format :
-        Format string for values. "v" will be replaced by the value of the feature importance.
+        Format string for values. "v" will be replaced by the value of the feature
-        e.g. Pass "{v:.2f}" in order to limit the number of digits after the decimal point
+        importance.  e.g. Pass "{v:.2f}" in order to limit the number of digits after
-        to two, for each value printed on the graph.
+        the decimal point to two, for each value printed on the graph.
    kwargs :
        Other keywords passed to ax.barh()
@@ -146,7 +147,7 @@ def plot_importance(
 def to_graphviz(
-    booster: Booster,
+    booster: Union[Booster, XGBModel],
    fmap: PathLike = "",
    num_trees: int = 0,
    rankdir: Optional[str] = None,
@@ -162,19 +163,19 @@ def to_graphviz(
    Parameters
    ----------
-    booster : Booster, XGBModel
+    booster :
        Booster or XGBModel instance
-    fmap: str (optional)
+    fmap :
       The name of feature map file
-    num_trees : int, default 0
+    num_trees :
        Specify the ordinal number of target tree
-    rankdir : str, default "UT"
+    rankdir :
        Passed to graphviz via graph_attr
-    yes_color : str, default '#0000FF'
+    yes_color :
        Edge color when meets the node condition.
-    no_color : str, default '#FF0000'
+    no_color :
        Edge color when doesn't meet the node condition.
-    condition_node_params : dict, optional
+    condition_node_params :
        Condition node configuration for for graphviz.  Example:
        .. code-block:: python
@@ -183,7 +184,7 @@ def to_graphviz(
             'style': 'filled,rounded',
             'fillcolor': '#78bceb'}
-    leaf_node_params : dict, optional
+    leaf_node_params :
        Leaf node configuration for graphviz. Example:
        .. code-block:: python
@@ -192,7 +193,7 @@ def to_graphviz(
             'style': 'filled',
             'fillcolor': '#e48038'}
-    \\*\\*kwargs: dict, optional
+    kwargs :
        Other keywords passed to graphviz graph_attr, e.g. ``graph [ {key} = {value} ]``
    Returns
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -1012,9 +1012,9 @@ class XGBModel(XGBModelBase):
        verbose :
            If `verbose` is True and an evaluation set is used, the evaluation metric
            measured on the validation set is printed to stdout at each boosting stage.
-            If `verbose` is an integer, the evaluation metric is printed at each `verbose`
+            If `verbose` is an integer, the evaluation metric is printed at each
-            boosting stage. The last boosting stage / the boosting stage found by using
+            `verbose` boosting stage. The last boosting stage / the boosting stage found
-            `early_stopping_rounds` is also printed.
+            by using `early_stopping_rounds` is also printed.
        xgb_model :
            file name of stored XGBoost model or 'Booster' instance XGBoost model to be
            loaded before training (allows training continuation).
@@ -1590,12 +1590,12 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
        Parameters
        ----------
-        X : array_like
+        X :
            Feature matrix. See :ref:`py-data` for a list of supported types.
-        validate_features : bool
+        validate_features :
            When this is True, validate that the Booster's and data's feature_names are
            identical.  Otherwise, it is assumed that the feature_names are the same.
-        base_margin : array_like
+        base_margin :
            Margin added to prediction.
        iteration_range :
            Specifies which layer of trees are used in prediction.  For example, if a
@@ -1964,9 +1964,9 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
        verbose :
            If `verbose` is True and an evaluation set is used, the evaluation metric
            measured on the validation set is printed to stdout at each boosting stage.
-            If `verbose` is an integer, the evaluation metric is printed at each `verbose`
+            If `verbose` is an integer, the evaluation metric is printed at each
-            boosting stage. The last boosting stage / the boosting stage found by using
+            `verbose` boosting stage. The last boosting stage / the boosting stage found
-            `early_stopping_rounds` is also printed.
+            by using `early_stopping_rounds` is also printed.
        xgb_model :
            file name of stored XGBoost model or 'Booster' instance XGBoost model to be
            loaded before training (allows training continuation).
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -95,7 +95,7 @@ def train(
    feval :
        .. deprecated:: 1.6.0
            Use `custom_metric` instead.
-    maximize : bool
+    maximize :
        Whether to maximize feval.
    early_stopping_rounds :
        Activates early stopping. Validation metric needs to improve at least once in