From b4ec1682c6754805e9cc2b4da290779f520003ac Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 19 Jan 2022 04:35:17 +0800
Subject: [PATCH] Update document for multi output and categorical. (#7574)

* Group together categorical related parameters.
* Update documents about multioutput and categorical.
---
 demo/guide-python/custom_rmsle.py           |  2 +-
 demo/guide-python/multioutput_regression.py |  2 ++
 doc/tutorials/categorical.rst               |  2 +-
 doc/tutorials/multioutput.rst               | 17 +++++++-------
 python-package/xgboost/sklearn.py           | 26 +++++++++++----------
 5 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/demo/guide-python/custom_rmsle.py b/demo/guide-python/custom_rmsle.py
index 66fbd83a0..bc21f9022 100644
--- a/demo/guide-python/custom_rmsle.py
+++ b/demo/guide-python/custom_rmsle.py
@@ -7,7 +7,7 @@ weight is not used in following example. In this script, we implement the Square
 Error (SLE) objective and RMSLE metric as customized functions, then compare it with
 native implementation in XGBoost.
 
-See doc/tutorials/custom_metric_obj.rst for a step by step walkthrough, with other
+See :doc:`/tutorials/custom_metric_obj` for a step by step walkthrough, with other
 details.
 
 The `SLE` objective reduces impact of outliers in training dataset, hence here we also
diff --git a/demo/guide-python/multioutput_regression.py b/demo/guide-python/multioutput_regression.py
index a0d0998e6..f3f62609c 100644
--- a/demo/guide-python/multioutput_regression.py
+++ b/demo/guide-python/multioutput_regression.py
@@ -5,6 +5,8 @@ A demo for multi-output regression
 The demo is adopted from scikit-learn:
 
 https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py
+
+See :doc:`/tutorials/multioutput` for more information.
 """
 import numpy as np
 import xgboost as xgb
diff --git a/doc/tutorials/categorical.rst b/doc/tutorials/categorical.rst
index f302e5e47..c1d93fb45 100644
--- a/doc/tutorials/categorical.rst
+++ b/doc/tutorials/categorical.rst
@@ -113,7 +113,7 @@ Miscellaneous
 *************
 
 By default, XGBoost assumes input categories are integers starting from 0 till the number
-of categories :math:`[0, n_categories)`. However, user might provide inputs with invalid
+of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
 values due to mistakes or missing values. It can be negative value, integer values that
 can not be accurately represented by 32-bit floating point, or values that are larger than
 actual number of unique categories.  During training this is validated but for prediction
diff --git a/doc/tutorials/multioutput.rst b/doc/tutorials/multioutput.rst
index d9af9313e..0be27ced0 100644
--- a/doc/tutorials/multioutput.rst
+++ b/doc/tutorials/multioutput.rst
@@ -12,14 +12,15 @@ terminologies related to different multi-output models please refer to the `scik
 user guide <https://scikit-learn.org/stable/modules/multiclass.HTML>`_.
 
 Internally, XGBoost builds one model for each target similar to sklearn meta estimators,
-with the added benefit of reusing data and custom objective support.  For a worked example
-of regression, see :ref:`sphx_glr_python_examples_multioutput_regression.py`. For
-multi-label classification, the binary relevance strategy is used.  Input ``y`` should be
-of shape ``(n_samples, n_classes)`` with each column having a value of 0 or 1 to specify
-whether the sample is labeled as positive for respective class. Given a sample with 3
-output classes and 2 labels, the corresponding `y` should be encoded as ``[1, 0, 1]`` with
-the second class labeled as negative and the rest labeled as positive. At the moment
-XGBoost supports only dense matrix for labels.
+with the added benefit of reusing data and other integrated features like SHAP.  For a
+worked example of regression, see
+:ref:`sphx_glr_python_examples_multioutput_regression.py`. For multi-label classification,
+the binary relevance strategy is used.  Input ``y`` should be of shape ``(n_samples,
+n_classes)`` with each column having a value of 0 or 1 to specify whether the sample is
+labeled as positive for respective class. Given a sample with 3 output classes and 2
+labels, the corresponding `y` should be encoded as ``[1, 0, 1]`` with the second class
+labeled as negative and the rest labeled as positive. At the moment XGBoost supports only
+dense matrix for labels.
 
 .. code-block:: python
 
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 54970af6d..374958f75 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -197,6 +197,18 @@ __model_doc = f'''
         Experimental support for categorical data.  Do not set to true unless you are
         interested in development. Only valid when `gpu_hist` and dataframe are used.
 
+    max_cat_to_onehot : bool
+
+        .. versionadded:: 1.6.0
+
+        .. note:: This parameter is experimental
+
+        A threshold for deciding whether XGBoost should use one-hot encoding based split
+        for categorical data.  When number of categories is lesser than the threshold then
+        one-hot encoding is chosen, otherwise the categories will be partitioned into
+        children nodes.  Only relevant for regression and binary classification and
+        `approx` tree method.
+
     eval_metric : Optional[Union[str, List[str], Callable]]
 
         .. versionadded:: 1.6.0
@@ -267,16 +279,6 @@ __model_doc = f'''
             callbacks = [xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
                                                     save_best=True)]
 
-    max_cat_to_onehot : bool
-
-        .. versionadded:: 1.6.0
-
-        A threshold for deciding whether XGBoost should use one-hot encoding based split
-        for categorical data.  When number of categories is lesser than the threshold then
-        one-hot encoding is chosen, otherwise the categories will be partitioned into
-        children nodes.  Only relevant for regression and binary classification and
-        `approx` tree method.
-
     kwargs : dict, optional
         Keyword arguments for XGBoost Booster object.  Full documentation of parameters
         can be found :doc:`here </parameter>`.
@@ -490,10 +492,10 @@ class XGBModel(XGBModelBase):
         validate_parameters: Optional[bool] = None,
         predictor: Optional[str] = None,
         enable_categorical: bool = False,
+        max_cat_to_onehot: Optional[int] = None,
         eval_metric: Optional[Union[str, List[str], Callable]] = None,
         early_stopping_rounds: Optional[int] = None,
         callbacks: Optional[List[TrainingCallback]] = None,
-        max_cat_to_onehot: Optional[int] = None,
         **kwargs: Any
     ) -> None:
         if not SKLEARN_INSTALLED:
@@ -530,10 +532,10 @@ class XGBModel(XGBModelBase):
         self.validate_parameters = validate_parameters
         self.predictor = predictor
         self.enable_categorical = enable_categorical
+        self.max_cat_to_onehot = max_cat_to_onehot
         self.eval_metric = eval_metric
         self.early_stopping_rounds = early_stopping_rounds
         self.callbacks = callbacks
-        self.max_cat_to_onehot = max_cat_to_onehot
         if kwargs:
             self.kwargs = kwargs