Update document for multi output and categorical. (#7574)
* Group together categorical related parameters. * Update documents about multioutput and categorical.
This commit is contained in:
parent
dac9eb13bd
commit
b4ec1682c6
@ -7,7 +7,7 @@ weight is not used in following example. In this script, we implement the Square
|
||||
Error (SLE) objective and RMSLE metric as customized functions, then compare it with
|
||||
native implementation in XGBoost.
|
||||
|
||||
See doc/tutorials/custom_metric_obj.rst for a step by step walkthrough, with other
|
||||
See :doc:`/tutorials/custom_metric_obj` for a step by step walkthrough, with other
|
||||
details.
|
||||
|
||||
The `SLE` objective reduces impact of outliers in training dataset, hence here we also
|
||||
|
||||
@ -5,6 +5,8 @@ A demo for multi-output regression
|
||||
The demo is adopted from scikit-learn:
|
||||
|
||||
https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py
|
||||
|
||||
See :doc:`/tutorials/multioutput` for more information.
|
||||
"""
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
|
||||
@ -113,7 +113,7 @@ Miscellaneous
|
||||
*************
|
||||
|
||||
By default, XGBoost assumes input categories are integers starting from 0 till the number
|
||||
of categories :math:`[0, n_categories)`. However, user might provide inputs with invalid
|
||||
of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
|
||||
values due to mistakes or missing values. It can be negative value, integer values that
|
||||
can not be accurately represented by 32-bit floating point, or values that are larger than
|
||||
actual number of unique categories. During training this is validated but for prediction
|
||||
|
||||
@ -12,14 +12,15 @@ terminologies related to different multi-output models please refer to the `scik
|
||||
user guide <https://scikit-learn.org/stable/modules/multiclass.HTML>`_.
|
||||
|
||||
Internally, XGBoost builds one model for each target similar to sklearn meta estimators,
|
||||
with the added benefit of reusing data and custom objective support. For a worked example
|
||||
of regression, see :ref:`sphx_glr_python_examples_multioutput_regression.py`. For
|
||||
multi-label classification, the binary relevance strategy is used. Input ``y`` should be
|
||||
of shape ``(n_samples, n_classes)`` with each column having a value of 0 or 1 to specify
|
||||
whether the sample is labeled as positive for respective class. Given a sample with 3
|
||||
output classes and 2 labels, the corresponding `y` should be encoded as ``[1, 0, 1]`` with
|
||||
the second class labeled as negative and the rest labeled as positive. At the moment
|
||||
XGBoost supports only dense matrix for labels.
|
||||
with the added benefit of reusing data and other integrated features like SHAP. For a
|
||||
worked example of regression, see
|
||||
:ref:`sphx_glr_python_examples_multioutput_regression.py`. For multi-label classification,
|
||||
the binary relevance strategy is used. Input ``y`` should be of shape ``(n_samples,
|
||||
n_classes)`` with each column having a value of 0 or 1 to specify whether the sample is
|
||||
labeled as positive for respective class. Given a sample with 3 output classes and 2
|
||||
labels, the corresponding `y` should be encoded as ``[1, 0, 1]`` with the second class
|
||||
labeled as negative and the rest labeled as positive. At the moment XGBoost supports only
|
||||
dense matrix for labels.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
||||
@ -197,6 +197,18 @@ __model_doc = f'''
|
||||
Experimental support for categorical data. Do not set to true unless you are
|
||||
interested in development. Only valid when `gpu_hist` and dataframe are used.
|
||||
|
||||
max_cat_to_onehot : bool
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
|
||||
.. note:: This parameter is experimental
|
||||
|
||||
A threshold for deciding whether XGBoost should use one-hot encoding based split
|
||||
for categorical data. When number of categories is lesser than the threshold then
|
||||
one-hot encoding is chosen, otherwise the categories will be partitioned into
|
||||
children nodes. Only relevant for regression and binary classification and
|
||||
`approx` tree method.
|
||||
|
||||
eval_metric : Optional[Union[str, List[str], Callable]]
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
@ -267,16 +279,6 @@ __model_doc = f'''
|
||||
callbacks = [xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
|
||||
save_best=True)]
|
||||
|
||||
max_cat_to_onehot : bool
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
|
||||
A threshold for deciding whether XGBoost should use one-hot encoding based split
|
||||
for categorical data. When number of categories is lesser than the threshold then
|
||||
one-hot encoding is chosen, otherwise the categories will be partitioned into
|
||||
children nodes. Only relevant for regression and binary classification and
|
||||
`approx` tree method.
|
||||
|
||||
kwargs : dict, optional
|
||||
Keyword arguments for XGBoost Booster object. Full documentation of parameters
|
||||
can be found :doc:`here </parameter>`.
|
||||
@ -490,10 +492,10 @@ class XGBModel(XGBModelBase):
|
||||
validate_parameters: Optional[bool] = None,
|
||||
predictor: Optional[str] = None,
|
||||
enable_categorical: bool = False,
|
||||
max_cat_to_onehot: Optional[int] = None,
|
||||
eval_metric: Optional[Union[str, List[str], Callable]] = None,
|
||||
early_stopping_rounds: Optional[int] = None,
|
||||
callbacks: Optional[List[TrainingCallback]] = None,
|
||||
max_cat_to_onehot: Optional[int] = None,
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
if not SKLEARN_INSTALLED:
|
||||
@ -530,10 +532,10 @@ class XGBModel(XGBModelBase):
|
||||
self.validate_parameters = validate_parameters
|
||||
self.predictor = predictor
|
||||
self.enable_categorical = enable_categorical
|
||||
self.max_cat_to_onehot = max_cat_to_onehot
|
||||
self.eval_metric = eval_metric
|
||||
self.early_stopping_rounds = early_stopping_rounds
|
||||
self.callbacks = callbacks
|
||||
self.max_cat_to_onehot = max_cat_to_onehot
|
||||
if kwargs:
|
||||
self.kwargs = kwargs
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user