From 720a8c3273a844679e1edf11bd87297331f19d58 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 1 Apr 2023 04:04:30 +0800 Subject: [PATCH] [doc] Remove parameter type in Python doc strings. (#9005) --- demo/guide-python/quantile_regression.py | 2 + doc/parameter.rst | 8 ++ python-package/xgboost/core.py | 106 +++++++++++------------ python-package/xgboost/plotting.py | 63 +++++++------- python-package/xgboost/sklearn.py | 18 ++-- python-package/xgboost/training.py | 2 +- 6 files changed, 105 insertions(+), 94 deletions(-) diff --git a/demo/guide-python/quantile_regression.py b/demo/guide-python/quantile_regression.py index d92115bf0..6d3e08df5 100644 --- a/demo/guide-python/quantile_regression.py +++ b/demo/guide-python/quantile_regression.py @@ -2,6 +2,8 @@ Quantile Regression =================== + .. versionadded:: 2.0.0 + The script is inspired by this awesome example in sklearn: https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html diff --git a/doc/parameter.rst b/doc/parameter.rst index e26ec83b2..c070e7018 100644 --- a/doc/parameter.rst +++ b/doc/parameter.rst @@ -360,7 +360,13 @@ Specify the learning task and the corresponding learning objective. The objectiv - ``reg:logistic``: logistic regression. - ``reg:pseudohubererror``: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss. - ``reg:absoluteerror``: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal. + + .. versionadded:: 1.7.0 + - ``reg:quantileerror``: Quantile loss, also known as ``pinball loss``. See later sections for its parameter and :ref:`sphx_glr_python_examples_quantile_regression.py` for a worked example. + + .. versionadded:: 2.0.0 + - ``binary:logistic``: logistic regression for binary classification, output probability - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities. @@ -467,6 +473,8 @@ Parameter for using Quantile Loss (``reg:quantileerror``) * ``quantile_alpha``: A scala or a list of targeted quantiles. + .. versionadded:: 2.0.0 + Parameter for using AFT Survival Loss (``survival:aft``) and Negative Log Likelihood of AFT metric (``aft-nloglik``) ==================================================================================================================== diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 68346d900..3a27f5e18 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -94,9 +94,9 @@ def from_cstr_to_pystr(data: CStrPptr, length: c_bst_ulong) -> List[str]: Parameters ---------- - data : ctypes pointer + data : pointer to data - length : ctypes pointer + length : pointer to length of data """ res = [] @@ -131,9 +131,9 @@ def _expect(expectations: Sequence[Type], got: Type) -> str: Parameters ---------- - expectations: sequence + expectations : a list of expected value. - got: + got : actual input Returns @@ -263,7 +263,7 @@ def _check_call(ret: int) -> None: Parameters ---------- - ret : int + ret : return value from API calls """ if ret != 0: @@ -271,10 +271,10 @@ def _check_call(ret: int) -> None: def build_info() -> dict: - """Build information of XGBoost. The returned value format is not stable. Also, please - note that build time dependency is not the same as runtime dependency. For instance, - it's possible to build XGBoost with older CUDA version but run it with the lastest - one. + """Build information of XGBoost. The returned value format is not stable. Also, + please note that build time dependency is not the same as runtime dependency. For + instance, it's possible to build XGBoost with older CUDA version but run it with the + lastest one. .. versionadded:: 1.6.0 @@ -658,28 +658,28 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m data : Data source of DMatrix. See :ref:`py-data` for a list of supported input types. - label : array_like + label : Label of the training data. - weight : array_like + weight : Weight for each instance. - .. note:: For ranking task, weights are per-group. + .. note:: - In ranking task, one weight is assigned to each group (not each - data point). This is because we only care about the relative - ordering of data points within each group, so it doesn't make - sense to assign weights to individual data points. + For ranking task, weights are per-group. In ranking task, one weight + is assigned to each group (not each data point). This is because we + only care about the relative ordering of data points within each group, + so it doesn't make sense to assign weights to individual data points. - base_margin: array_like + base_margin : Base margin used for boosting from existing model. - missing : float, optional - Value in the input data which needs to be present as a missing - value. If None, defaults to np.nan. - silent : boolean, optional + missing : + Value in the input data which needs to be present as a missing value. If + None, defaults to np.nan. + silent : Whether print messages during construction - feature_names : list, optional + feature_names : Set names for features. - feature_types : FeatureTypes + feature_types : Set types for features. When `enable_categorical` is set to `True`, string "c" represents categorical data type while "q" represents numerical feature @@ -689,20 +689,20 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m `.cat.codes` method. This is useful when users want to specify categorical features without having to construct a dataframe as input. - nthread : integer, optional + nthread : Number of threads to use for loading data when parallelization is applicable. If -1, uses maximum threads available on the system. - group : array_like + group : Group size for all ranking group. - qid : array_like + qid : Query ID for data samples, used for ranking. - label_lower_bound : array_like + label_lower_bound : Lower bound for survival training. - label_upper_bound : array_like + label_upper_bound : Upper bound for survival training. - feature_weights : array_like, optional + feature_weights : Set feature weights for column sampling. - enable_categorical: boolean, optional + enable_categorical : .. versionadded:: 1.3.0 @@ -1712,6 +1712,7 @@ class Booster: string. .. versionadded:: 1.0.0 + """ json_string = ctypes.c_char_p() length = c_bst_ulong() @@ -1744,8 +1745,8 @@ class Booster: Returns ------- - booster: `Booster` - a copied booster model + booster : + A copied booster model """ return copy.copy(self) @@ -1754,12 +1755,12 @@ class Booster: Parameters ---------- - key : str + key : The key to get attribute from. Returns ------- - value : str + value : The attribute value of the key, returns None if attribute do not exist. """ ret = ctypes.c_char_p() @@ -1878,9 +1879,9 @@ class Booster: Parameters ---------- - params: dict/list/str + params : list of key,value pairs, dict of key to value or simply str key - value: optional + value : value of the specified parameter, when params is str key """ if isinstance(params, Mapping): @@ -1903,11 +1904,11 @@ class Booster: Parameters ---------- - dtrain : DMatrix + dtrain : Training data. - iteration : int + iteration : Current iteration number. - fobj : function + fobj : Customized objective function. """ @@ -2205,8 +2206,7 @@ class Booster: Parameters ---------- - data : numpy.ndarray/scipy.sparse.csr_matrix/cupy.ndarray/ - cudf.DataFrame/pd.DataFrame + data : The input data, must not be a view for numpy array. Set ``predictor`` to ``gpu_predictor`` for running prediction on CuPy array or CuDF DataFrame. @@ -2390,7 +2390,7 @@ class Booster: Parameters ---------- - fname : string or os.PathLike + fname : Output file name """ @@ -2494,13 +2494,13 @@ class Booster: Parameters ---------- - fout : string or os.PathLike + fout : Output file name. - fmap : string or os.PathLike, optional + fmap : Name of the file containing feature map names. - with_stats : bool, optional + with_stats : Controls whether the split statistics are output. - dump_format : string, optional + dump_format : Format of model dump file. Can be 'text' or 'json'. """ if isinstance(fout, (str, os.PathLike)): @@ -2604,9 +2604,9 @@ class Booster: Parameters ---------- - fmap: + fmap : The name of feature map file. - importance_type: + importance_type : One of the importance types defined above. Returns @@ -2655,7 +2655,7 @@ class Booster: Parameters ---------- - fmap: str or os.PathLike (optional) + fmap : The name of feature map file. """ # pylint: disable=too-many-locals @@ -2821,15 +2821,15 @@ class Booster: Parameters ---------- - feature: str + feature : The name of the feature. - fmap: str or os.PathLike (optional) + fmap: The name of feature map file. - bin: int, default None + bin : The maximum number of bins. Number of bins equals number of unique split values n_unique, if bins == None or bins > n_unique. - as_pandas: bool, default True + as_pandas : Return pd.DataFrame when pandas is installed. If False or pandas is not installed, return numpy ndarray. diff --git a/python-package/xgboost/plotting.py b/python-package/xgboost/plotting.py index a364e1eb6..71058e8c9 100644 --- a/python-package/xgboost/plotting.py +++ b/python-package/xgboost/plotting.py @@ -1,10 +1,9 @@ # pylint: disable=too-many-locals, too-many-arguments, invalid-name, # pylint: disable=too-many-branches -# coding: utf-8 """Plotting Library.""" import json from io import BytesIO -from typing import Any, Optional +from typing import Any, Optional, Union import numpy as np @@ -17,7 +16,7 @@ GraphvizSource = Any # real type is graphviz.Source def plot_importance( - booster: Booster, + booster: Union[XGBModel, Booster, dict], ax: Optional[Axes] = None, height: float = 0.2, xlim: Optional[tuple] = None, @@ -37,40 +36,42 @@ def plot_importance( Parameters ---------- - booster : Booster, XGBModel or dict + booster : Booster or XGBModel instance, or dict taken by Booster.get_fscore() - ax : matplotlib Axes, default None + ax : matplotlib Axes Target axes instance. If None, new figure and axes will be created. - grid : bool, Turn the axes grids on or off. Default is True (On). - importance_type : str, default "weight" + grid : + Turn the axes grids on or off. Default is True (On). + importance_type : How the importance is calculated: either "weight", "gain", or "cover" * "weight" is the number of times a feature appears in a tree * "gain" is the average gain of splits which use the feature * "cover" is the average coverage of splits which use the feature where coverage is defined as the number of samples affected by the split - max_num_features : int, default None - Maximum number of top features displayed on plot. If None, all features will be displayed. - height : float, default 0.2 + max_num_features : + Maximum number of top features displayed on plot. If None, all features will be + displayed. + height : Bar height, passed to ax.barh() - xlim : tuple, default None + xlim : Tuple passed to axes.xlim() - ylim : tuple, default None + ylim : Tuple passed to axes.ylim() - title : str, default "Feature importance" + title : Axes title. To disable, pass None. - xlabel : str, default "F score" + xlabel : X axis title label. To disable, pass None. - ylabel : str, default "Features" + ylabel : Y axis title label. To disable, pass None. - fmap: str or os.PathLike (optional) + fmap : The name of feature map file. - show_values : bool, default True + show_values : Show values on plot. To disable, pass False. - values_format : str, default "{v}" - Format string for values. "v" will be replaced by the value of the feature importance. - e.g. Pass "{v:.2f}" in order to limit the number of digits after the decimal point - to two, for each value printed on the graph. + values_format : + Format string for values. "v" will be replaced by the value of the feature + importance. e.g. Pass "{v:.2f}" in order to limit the number of digits after + the decimal point to two, for each value printed on the graph. kwargs : Other keywords passed to ax.barh() @@ -146,7 +147,7 @@ def plot_importance( def to_graphviz( - booster: Booster, + booster: Union[Booster, XGBModel], fmap: PathLike = "", num_trees: int = 0, rankdir: Optional[str] = None, @@ -162,19 +163,19 @@ def to_graphviz( Parameters ---------- - booster : Booster, XGBModel + booster : Booster or XGBModel instance - fmap: str (optional) + fmap : The name of feature map file - num_trees : int, default 0 + num_trees : Specify the ordinal number of target tree - rankdir : str, default "UT" + rankdir : Passed to graphviz via graph_attr - yes_color : str, default '#0000FF' + yes_color : Edge color when meets the node condition. - no_color : str, default '#FF0000' + no_color : Edge color when doesn't meet the node condition. - condition_node_params : dict, optional + condition_node_params : Condition node configuration for for graphviz. Example: .. code-block:: python @@ -183,7 +184,7 @@ def to_graphviz( 'style': 'filled,rounded', 'fillcolor': '#78bceb'} - leaf_node_params : dict, optional + leaf_node_params : Leaf node configuration for graphviz. Example: .. code-block:: python @@ -192,7 +193,7 @@ def to_graphviz( 'style': 'filled', 'fillcolor': '#e48038'} - \\*\\*kwargs: dict, optional + kwargs : Other keywords passed to graphviz graph_attr, e.g. ``graph [ {key} = {value} ]`` Returns diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index fffc0eb9b..9b5949cdb 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -1012,9 +1012,9 @@ class XGBModel(XGBModelBase): verbose : If `verbose` is True and an evaluation set is used, the evaluation metric measured on the validation set is printed to stdout at each boosting stage. - If `verbose` is an integer, the evaluation metric is printed at each `verbose` - boosting stage. The last boosting stage / the boosting stage found by using - `early_stopping_rounds` is also printed. + If `verbose` is an integer, the evaluation metric is printed at each + `verbose` boosting stage. The last boosting stage / the boosting stage found + by using `early_stopping_rounds` is also printed. xgb_model : file name of stored XGBoost model or 'Booster' instance XGBoost model to be loaded before training (allows training continuation). @@ -1590,12 +1590,12 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase): Parameters ---------- - X : array_like + X : Feature matrix. See :ref:`py-data` for a list of supported types. - validate_features : bool + validate_features : When this is True, validate that the Booster's and data's feature_names are identical. Otherwise, it is assumed that the feature_names are the same. - base_margin : array_like + base_margin : Margin added to prediction. iteration_range : Specifies which layer of trees are used in prediction. For example, if a @@ -1964,9 +1964,9 @@ class XGBRanker(XGBModel, XGBRankerMixIn): verbose : If `verbose` is True and an evaluation set is used, the evaluation metric measured on the validation set is printed to stdout at each boosting stage. - If `verbose` is an integer, the evaluation metric is printed at each `verbose` - boosting stage. The last boosting stage / the boosting stage found by using - `early_stopping_rounds` is also printed. + If `verbose` is an integer, the evaluation metric is printed at each + `verbose` boosting stage. The last boosting stage / the boosting stage found + by using `early_stopping_rounds` is also printed. xgb_model : file name of stored XGBoost model or 'Booster' instance XGBoost model to be loaded before training (allows training continuation). diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index 5ef6eeaa2..a238e73c8 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -95,7 +95,7 @@ def train( feval : .. deprecated:: 1.6.0 Use `custom_metric` instead. - maximize : bool + maximize : Whether to maximize feval. early_stopping_rounds : Activates early stopping. Validation metric needs to improve at least once in