From 175986b739bcdb07047d14d43d0075faeac37c2a Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 18 Jan 2023 07:52:18 +0800 Subject: [PATCH] [doc] Add missing document for pyspark ranker. [skip ci] (#8692) --- doc/python/python_api.rst | 10 ++++++++++ doc/tutorials/spark_estimator.rst | 6 +++--- python-package/xgboost/spark/__init__.py | 5 +++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst index 03b431c77..b27542a8b 100644 --- a/doc/python/python_api.rst +++ b/doc/python/python_api.rst @@ -173,3 +173,13 @@ PySpark API :members: :inherited-members: :show-inheritance: + +.. autoclass:: xgboost.spark.SparkXGBRanker + :members: + :inherited-members: + :show-inheritance: + +.. autoclass:: xgboost.spark.SparkXGBRankerModel + :members: + :inherited-members: + :show-inheritance: diff --git a/doc/tutorials/spark_estimator.rst b/doc/tutorials/spark_estimator.rst index aae9f9ef6..02ddb60ea 100644 --- a/doc/tutorials/spark_estimator.rst +++ b/doc/tutorials/spark_estimator.rst @@ -45,7 +45,7 @@ such as ``weight_col``, ``validation_indicator_col``, ``use_gpu``, for details p The following code snippet shows how to train a spark xgboost regressor model, first we need to prepare a training dataset as a spark dataframe contains -"label" column and "features" column(s), the "features" column(s) must be ``pyspark.ml.linalg.Vector` +"label" column and "features" column(s), the "features" column(s) must be ``pyspark.ml.linalg.Vector`` type or spark array type or a list of feature column names. @@ -56,7 +56,7 @@ type or spark array type or a list of feature column names. The following code snippet shows how to predict test data using a spark xgboost regressor model, first we need to prepare a test dataset as a spark dataframe contains -"features" and "label" column, the "features" column must be ``pyspark.ml.linalg.Vector` +"features" and "label" column, the "features" column must be ``pyspark.ml.linalg.Vector`` type or spark array type. .. code-block:: python @@ -97,7 +97,7 @@ Aside from the PySpark and XGBoost modules, we also need the `cuDF `_ package for handling Spark dataframe. We recommend using either Conda or Virtualenv to manage python dependencies for PySpark jobs. Please refer to `How to Manage Python Dependencies in PySpark -`_ +`_ for more details on PySpark dependency management. In short, to create a Python environment that can be sent to a remote cluster using diff --git a/python-package/xgboost/spark/__init__.py b/python-package/xgboost/spark/__init__.py index 7c18eeba4..224f87d03 100644 --- a/python-package/xgboost/spark/__init__.py +++ b/python-package/xgboost/spark/__init__.py @@ -1,5 +1,4 @@ -"""PySpark XGBoost integration interface -""" +"""PySpark XGBoost integration interface""" try: import pyspark @@ -10,6 +9,7 @@ from .estimator import ( SparkXGBClassifier, SparkXGBClassifierModel, SparkXGBRanker, + SparkXGBRankerModel, SparkXGBRegressor, SparkXGBRegressorModel, ) @@ -20,4 +20,5 @@ __all__ = [ "SparkXGBRegressor", "SparkXGBRegressorModel", "SparkXGBRanker", + "SparkXGBRankerModel", ]