Update document for multi output and categorical. (#7574)
* Group together categorical related parameters. * Update documents about multioutput and categorical.
This commit is contained in:
parent
dac9eb13bd
commit
b4ec1682c6
@ -7,7 +7,7 @@ weight is not used in following example. In this script, we implement the Square
|
|||||||
Error (SLE) objective and RMSLE metric as customized functions, then compare it with
|
Error (SLE) objective and RMSLE metric as customized functions, then compare it with
|
||||||
native implementation in XGBoost.
|
native implementation in XGBoost.
|
||||||
|
|
||||||
See doc/tutorials/custom_metric_obj.rst for a step by step walkthrough, with other
|
See :doc:`/tutorials/custom_metric_obj` for a step by step walkthrough, with other
|
||||||
details.
|
details.
|
||||||
|
|
||||||
The `SLE` objective reduces impact of outliers in training dataset, hence here we also
|
The `SLE` objective reduces impact of outliers in training dataset, hence here we also
|
||||||
|
|||||||
@ -5,6 +5,8 @@ A demo for multi-output regression
|
|||||||
The demo is adopted from scikit-learn:
|
The demo is adopted from scikit-learn:
|
||||||
|
|
||||||
https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py
|
https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py
|
||||||
|
|
||||||
|
See :doc:`/tutorials/multioutput` for more information.
|
||||||
"""
|
"""
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|||||||
@ -113,7 +113,7 @@ Miscellaneous
|
|||||||
*************
|
*************
|
||||||
|
|
||||||
By default, XGBoost assumes input categories are integers starting from 0 till the number
|
By default, XGBoost assumes input categories are integers starting from 0 till the number
|
||||||
of categories :math:`[0, n_categories)`. However, user might provide inputs with invalid
|
of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
|
||||||
values due to mistakes or missing values. It can be negative value, integer values that
|
values due to mistakes or missing values. It can be negative value, integer values that
|
||||||
can not be accurately represented by 32-bit floating point, or values that are larger than
|
can not be accurately represented by 32-bit floating point, or values that are larger than
|
||||||
actual number of unique categories. During training this is validated but for prediction
|
actual number of unique categories. During training this is validated but for prediction
|
||||||
|
|||||||
@ -12,14 +12,15 @@ terminologies related to different multi-output models please refer to the `scik
|
|||||||
user guide <https://scikit-learn.org/stable/modules/multiclass.HTML>`_.
|
user guide <https://scikit-learn.org/stable/modules/multiclass.HTML>`_.
|
||||||
|
|
||||||
Internally, XGBoost builds one model for each target similar to sklearn meta estimators,
|
Internally, XGBoost builds one model for each target similar to sklearn meta estimators,
|
||||||
with the added benefit of reusing data and custom objective support. For a worked example
|
with the added benefit of reusing data and other integrated features like SHAP. For a
|
||||||
of regression, see :ref:`sphx_glr_python_examples_multioutput_regression.py`. For
|
worked example of regression, see
|
||||||
multi-label classification, the binary relevance strategy is used. Input ``y`` should be
|
:ref:`sphx_glr_python_examples_multioutput_regression.py`. For multi-label classification,
|
||||||
of shape ``(n_samples, n_classes)`` with each column having a value of 0 or 1 to specify
|
the binary relevance strategy is used. Input ``y`` should be of shape ``(n_samples,
|
||||||
whether the sample is labeled as positive for respective class. Given a sample with 3
|
n_classes)`` with each column having a value of 0 or 1 to specify whether the sample is
|
||||||
output classes and 2 labels, the corresponding `y` should be encoded as ``[1, 0, 1]`` with
|
labeled as positive for respective class. Given a sample with 3 output classes and 2
|
||||||
the second class labeled as negative and the rest labeled as positive. At the moment
|
labels, the corresponding `y` should be encoded as ``[1, 0, 1]`` with the second class
|
||||||
XGBoost supports only dense matrix for labels.
|
labeled as negative and the rest labeled as positive. At the moment XGBoost supports only
|
||||||
|
dense matrix for labels.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
|||||||
@ -197,6 +197,18 @@ __model_doc = f'''
|
|||||||
Experimental support for categorical data. Do not set to true unless you are
|
Experimental support for categorical data. Do not set to true unless you are
|
||||||
interested in development. Only valid when `gpu_hist` and dataframe are used.
|
interested in development. Only valid when `gpu_hist` and dataframe are used.
|
||||||
|
|
||||||
|
max_cat_to_onehot : bool
|
||||||
|
|
||||||
|
.. versionadded:: 1.6.0
|
||||||
|
|
||||||
|
.. note:: This parameter is experimental
|
||||||
|
|
||||||
|
A threshold for deciding whether XGBoost should use one-hot encoding based split
|
||||||
|
for categorical data. When number of categories is lesser than the threshold then
|
||||||
|
one-hot encoding is chosen, otherwise the categories will be partitioned into
|
||||||
|
children nodes. Only relevant for regression and binary classification and
|
||||||
|
`approx` tree method.
|
||||||
|
|
||||||
eval_metric : Optional[Union[str, List[str], Callable]]
|
eval_metric : Optional[Union[str, List[str], Callable]]
|
||||||
|
|
||||||
.. versionadded:: 1.6.0
|
.. versionadded:: 1.6.0
|
||||||
@ -267,16 +279,6 @@ __model_doc = f'''
|
|||||||
callbacks = [xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
|
callbacks = [xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
|
||||||
save_best=True)]
|
save_best=True)]
|
||||||
|
|
||||||
max_cat_to_onehot : bool
|
|
||||||
|
|
||||||
.. versionadded:: 1.6.0
|
|
||||||
|
|
||||||
A threshold for deciding whether XGBoost should use one-hot encoding based split
|
|
||||||
for categorical data. When number of categories is lesser than the threshold then
|
|
||||||
one-hot encoding is chosen, otherwise the categories will be partitioned into
|
|
||||||
children nodes. Only relevant for regression and binary classification and
|
|
||||||
`approx` tree method.
|
|
||||||
|
|
||||||
kwargs : dict, optional
|
kwargs : dict, optional
|
||||||
Keyword arguments for XGBoost Booster object. Full documentation of parameters
|
Keyword arguments for XGBoost Booster object. Full documentation of parameters
|
||||||
can be found :doc:`here </parameter>`.
|
can be found :doc:`here </parameter>`.
|
||||||
@ -490,10 +492,10 @@ class XGBModel(XGBModelBase):
|
|||||||
validate_parameters: Optional[bool] = None,
|
validate_parameters: Optional[bool] = None,
|
||||||
predictor: Optional[str] = None,
|
predictor: Optional[str] = None,
|
||||||
enable_categorical: bool = False,
|
enable_categorical: bool = False,
|
||||||
|
max_cat_to_onehot: Optional[int] = None,
|
||||||
eval_metric: Optional[Union[str, List[str], Callable]] = None,
|
eval_metric: Optional[Union[str, List[str], Callable]] = None,
|
||||||
early_stopping_rounds: Optional[int] = None,
|
early_stopping_rounds: Optional[int] = None,
|
||||||
callbacks: Optional[List[TrainingCallback]] = None,
|
callbacks: Optional[List[TrainingCallback]] = None,
|
||||||
max_cat_to_onehot: Optional[int] = None,
|
|
||||||
**kwargs: Any
|
**kwargs: Any
|
||||||
) -> None:
|
) -> None:
|
||||||
if not SKLEARN_INSTALLED:
|
if not SKLEARN_INSTALLED:
|
||||||
@ -530,10 +532,10 @@ class XGBModel(XGBModelBase):
|
|||||||
self.validate_parameters = validate_parameters
|
self.validate_parameters = validate_parameters
|
||||||
self.predictor = predictor
|
self.predictor = predictor
|
||||||
self.enable_categorical = enable_categorical
|
self.enable_categorical = enable_categorical
|
||||||
|
self.max_cat_to_onehot = max_cat_to_onehot
|
||||||
self.eval_metric = eval_metric
|
self.eval_metric = eval_metric
|
||||||
self.early_stopping_rounds = early_stopping_rounds
|
self.early_stopping_rounds = early_stopping_rounds
|
||||||
self.callbacks = callbacks
|
self.callbacks = callbacks
|
||||||
self.max_cat_to_onehot = max_cat_to_onehot
|
|
||||||
if kwargs:
|
if kwargs:
|
||||||
self.kwargs = kwargs
|
self.kwargs = kwargs
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user