[doc] Document Python inputs. (#8643)

2023-01-10 15:39:32 +08:00
parent 4e12f3e1bc
commit 1b58d81315
4 changed files with 113 additions and 33 deletions
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -619,11 +619,11 @@ class DataSplitMode(IntEnum):
 class DMatrix:  # pylint: disable=too-many-instance-attributes,too-many-public-methods
    """Data Matrix used in XGBoost.

-    DMatrix is an internal data structure that is used by XGBoost,
-    which is optimized for both memory efficiency and training speed.
-    You can construct DMatrix from multiple different sources of data.
-    """
+    DMatrix is an internal data structure that is used by XGBoost, which is optimized
+    for both memory efficiency and training speed.  You can construct DMatrix from
+    multiple different sources of data.

+    """
    @_deprecate_positional_args
    def __init__(
        self,
@@ -647,15 +647,9 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes,too-many-public-m
    ) -> None:
        """Parameters
        ----------
-        data : os.PathLike/string/numpy.array/scipy.sparse/pd.DataFrame/
-               dt.Frame/cudf.DataFrame/cupy.array/dlpack/arrow.Table
-
-            Data source of DMatrix.
-
-            When data is string or os.PathLike type, it represents the path libsvm
-            format txt file, csv file (by specifying uri parameter
-            'path_to_csv?format=csv'), or binary file that xgboost can read from.
-
+        data :
+            Data source of DMatrix. See :ref:`py-data` for a list of supported input
+            types.
        label : array_like
            Label of the training data.
        weight : array_like
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -939,7 +939,14 @@ class XGBModel(XGBModelBase):
        Parameters
        ----------
        X :
-            Feature matrix
+            Feature matrix. See :ref:`py-data` for a list of supported types.
+
+            When the ``tree_method`` is set to ``hist`` or ``gpu_hist``, internally, the
+            :py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix`
+            for conserving memory. However, this has performance implications when the
+            device of input data is not matched with algorithm. For instance, if the
+            input is a numpy array on CPU but ``gpu_hist`` is used for training, then
+            the data is first processed on CPU then transferred to GPU.
        y :
            Labels
        sample_weight :
@@ -982,6 +989,7 @@ class XGBModel(XGBModelBase):
        callbacks :
            .. deprecated:: 1.6.0
                Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.
+
        """
        with config_context(verbosity=self.verbosity):
            evals_result: TrainingCallback.EvalsLog = {}
@@ -1567,7 +1575,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
        Parameters
        ----------
        X : array_like
-            Feature matrix.
+            Feature matrix. See :ref:`py-data` for a list of supported types.
        ntree_limit : int
            Deprecated, use `iteration_range` instead.
        validate_features : bool
@@ -1846,7 +1854,14 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
        Parameters
        ----------
        X :
-            Feature matrix
+            Feature matrix. See :ref:`py-data` for a list of supported types.
+
+            When the ``tree_method`` is set to ``hist`` or ``gpu_hist``, internally, the
+            :py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix`
+            for conserving memory. However, this has performance implications when the
+            device of input data is not matched with algorithm. For instance, if the
+            input is a numpy array on CPU but ``gpu_hist`` is used for training, then
+            the data is first processed on CPU then transferred to GPU.
        y :
            Labels
        group :
@@ -1917,6 +1932,7 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
        callbacks :
            .. deprecated:: 1.6.0
                Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.
+
        """
        # check if group information is provided
        with config_context(verbosity=self.verbosity):