[doc] Document Python inputs. (#8643)

This commit is contained in:
Jiaming Yuan
2023-01-10 15:39:32 +08:00
committed by GitHub
parent 4e12f3e1bc
commit 1b58d81315
4 changed files with 113 additions and 33 deletions

View File

@@ -619,11 +619,11 @@ class DataSplitMode(IntEnum):
class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-methods
"""Data Matrix used in XGBoost.
DMatrix is an internal data structure that is used by XGBoost,
which is optimized for both memory efficiency and training speed.
You can construct DMatrix from multiple different sources of data.
"""
DMatrix is an internal data structure that is used by XGBoost, which is optimized
for both memory efficiency and training speed. You can construct DMatrix from
multiple different sources of data.
"""
@_deprecate_positional_args
def __init__(
self,
@@ -647,15 +647,9 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m
) -> None:
"""Parameters
----------
data : os.PathLike/string/numpy.array/scipy.sparse/pd.DataFrame/
dt.Frame/cudf.DataFrame/cupy.array/dlpack/arrow.Table
Data source of DMatrix.
When data is string or os.PathLike type, it represents the path libsvm
format txt file, csv file (by specifying uri parameter
'path_to_csv?format=csv'), or binary file that xgboost can read from.
data :
Data source of DMatrix. See :ref:`py-data` for a list of supported input
types.
label : array_like
Label of the training data.
weight : array_like

View File

@@ -939,7 +939,14 @@ class XGBModel(XGBModelBase):
Parameters
----------
X :
Feature matrix
Feature matrix. See :ref:`py-data` for a list of supported types.
When the ``tree_method`` is set to ``hist`` or ``gpu_hist``, internally, the
:py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix`
for conserving memory. However, this has performance implications when the
device of input data is not matched with algorithm. For instance, if the
input is a numpy array on CPU but ``gpu_hist`` is used for training, then
the data is first processed on CPU then transferred to GPU.
y :
Labels
sample_weight :
@@ -982,6 +989,7 @@ class XGBModel(XGBModelBase):
callbacks :
.. deprecated:: 1.6.0
Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.
"""
with config_context(verbosity=self.verbosity):
evals_result: TrainingCallback.EvalsLog = {}
@@ -1567,7 +1575,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
Parameters
----------
X : array_like
Feature matrix.
Feature matrix. See :ref:`py-data` for a list of supported types.
ntree_limit : int
Deprecated, use `iteration_range` instead.
validate_features : bool
@@ -1846,7 +1854,14 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
Parameters
----------
X :
Feature matrix
Feature matrix. See :ref:`py-data` for a list of supported types.
When the ``tree_method`` is set to ``hist`` or ``gpu_hist``, internally, the
:py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix`
for conserving memory. However, this has performance implications when the
device of input data is not matched with algorithm. For instance, if the
input is a numpy array on CPU but ``gpu_hist`` is used for training, then
the data is first processed on CPU then transferred to GPU.
y :
Labels
group :
@@ -1917,6 +1932,7 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
callbacks :
.. deprecated:: 1.6.0
Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.
"""
# check if group information is provided
with config_context(verbosity=self.verbosity):