[pyspark] Fixing xgboost.spark python doc (#8200)
Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
This commit is contained in:
parent
d03794ce7a
commit
651f0a8889
@ -141,7 +141,7 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
|
|||||||
|
|
||||||
SparkXGBClassifier doesn't support `validate_features` and `output_margin` param.
|
SparkXGBClassifier doesn't support `validate_features` and `output_margin` param.
|
||||||
|
|
||||||
SparkXGBRegressor doesn't support setting `nthread` xgboost param, instead, the `nthread`
|
SparkXGBClassifier doesn't support setting `nthread` xgboost param, instead, the `nthread`
|
||||||
param for each xgboost worker will be set equal to `spark.task.cpus` config value.
|
param for each xgboost worker will be set equal to `spark.task.cpus` config value.
|
||||||
|
|
||||||
|
|
||||||
@ -243,15 +243,15 @@ _set_pyspark_xgb_cls_param_attrs(SparkXGBClassifier, SparkXGBClassifierModel)
|
|||||||
|
|
||||||
class SparkXGBRanker(_SparkXGBEstimator):
|
class SparkXGBRanker(_SparkXGBEstimator):
|
||||||
"""SparkXGBRanker is a PySpark ML estimator. It implements the XGBoost
|
"""SparkXGBRanker is a PySpark ML estimator. It implements the XGBoost
|
||||||
classification algorithm based on XGBoost python library, and it can be used in
|
ranking algorithm based on XGBoost python library, and it can be used in
|
||||||
PySpark Pipeline and PySpark ML meta algorithms like
|
PySpark Pipeline and PySpark ML meta algorithms like
|
||||||
:py:class:`~pyspark.ml.tuning.CrossValidator`/
|
:py:class:`~pyspark.ml.tuning.CrossValidator`/
|
||||||
:py:class:`~pyspark.ml.tuning.TrainValidationSplit`/
|
:py:class:`~pyspark.ml.tuning.TrainValidationSplit`/
|
||||||
:py:class:`~pyspark.ml.classification.OneVsRest`
|
:py:class:`~pyspark.ml.classification.OneVsRest`
|
||||||
|
|
||||||
SparkXGBRanker automatically supports most of the parameters in
|
SparkXGBRanker automatically supports most of the parameters in
|
||||||
`xgboost.XGBClassifier` constructor and most of the parameters used in
|
`xgboost.XGBRanker` constructor and most of the parameters used in
|
||||||
:py:class:`xgboost.XGBClassifier` fit and predict method.
|
:py:class:`xgboost.XGBRanker` fit and predict method.
|
||||||
|
|
||||||
SparkXGBRanker doesn't support setting `gpu_id` but support another param `use_gpu`,
|
SparkXGBRanker doesn't support setting `gpu_id` but support another param `use_gpu`,
|
||||||
see doc below for more details.
|
see doc below for more details.
|
||||||
@ -273,21 +273,21 @@ class SparkXGBRanker(_SparkXGBEstimator):
|
|||||||
|
|
||||||
callbacks:
|
callbacks:
|
||||||
The export and import of the callback functions are at best effort. For
|
The export and import of the callback functions are at best effort. For
|
||||||
details, see :py:attr:`xgboost.spark.SparkXGBClassifier.callbacks` param doc.
|
details, see :py:attr:`xgboost.spark.SparkXGBRanker.callbacks` param doc.
|
||||||
validation_indicator_col:
|
validation_indicator_col:
|
||||||
For params related to `xgboost.XGBClassifier` training with
|
For params related to `xgboost.XGBRanker` training with
|
||||||
evaluation dataset's supervision,
|
evaluation dataset's supervision,
|
||||||
set :py:attr:`xgboost.spark.SparkXGBClassifier.validation_indicator_col`
|
set :py:attr:`xgboost.spark.XGBRanker.validation_indicator_col`
|
||||||
parameter instead of setting the `eval_set` parameter in `xgboost.XGBClassifier`
|
parameter instead of setting the `eval_set` parameter in `xgboost.XGBRanker`
|
||||||
fit method.
|
fit method.
|
||||||
weight_col:
|
weight_col:
|
||||||
To specify the weight of the training and validation dataset, set
|
To specify the weight of the training and validation dataset, set
|
||||||
:py:attr:`xgboost.spark.SparkXGBClassifier.weight_col` parameter instead of setting
|
:py:attr:`xgboost.spark.SparkXGBRanker.weight_col` parameter instead of setting
|
||||||
`sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBClassifier`
|
`sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBRanker`
|
||||||
fit method.
|
fit method.
|
||||||
xgb_model:
|
xgb_model:
|
||||||
Set the value to be the instance returned by
|
Set the value to be the instance returned by
|
||||||
:func:`xgboost.spark.SparkXGBClassifierModel.get_booster`.
|
:func:`xgboost.spark.SparkXGBRankerModel.get_booster`.
|
||||||
num_workers:
|
num_workers:
|
||||||
Integer that specifies the number of XGBoost workers to use.
|
Integer that specifies the number of XGBoost workers to use.
|
||||||
Each XGBoost worker corresponds to one spark task.
|
Each XGBoost worker corresponds to one spark task.
|
||||||
@ -313,23 +313,33 @@ class SparkXGBRanker(_SparkXGBEstimator):
|
|||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
|
||||||
>>> from xgboost.spark import SparkXGBClassifier
|
>>> from xgboost.spark import SparkXGBRanker
|
||||||
>>> from pyspark.ml.linalg import Vectors
|
>>> from pyspark.ml.linalg import Vectors
|
||||||
>>> df_train = spark.createDataFrame([
|
>>> ranker = SparkXGBRanker(qid_col="qid")
|
||||||
... (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
|
>>> df_train = spark.createDataFrame(
|
||||||
... (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
|
... [
|
||||||
... (Vectors.dense(4.0, 5.0, 6.0), 0, True, 1.0),
|
... (Vectors.dense(1.0, 2.0, 3.0), 0, 0),
|
||||||
... (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, True, 2.0),
|
... (Vectors.dense(4.0, 5.0, 6.0), 1, 0),
|
||||||
... ], ["features", "label", "isVal", "weight"])
|
... (Vectors.dense(9.0, 4.0, 8.0), 2, 0),
|
||||||
>>> df_test = spark.createDataFrame([
|
... (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
|
||||||
... (Vectors.dense(1.0, 2.0, 3.0), ),
|
... (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
|
||||||
... ], ["features"])
|
... (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
|
||||||
>>> xgb_classifier = SparkXGBClassifier(max_depth=5, missing=0.0,
|
... ],
|
||||||
... validation_indicator_col='isVal', weight_col='weight',
|
... ["features", "label", "qid"],
|
||||||
... early_stopping_rounds=1, eval_metric='logloss')
|
... )
|
||||||
>>> xgb_clf_model = xgb_classifier.fit(df_train)
|
>>> df_test = spark.createDataFrame(
|
||||||
>>> xgb_clf_model.transform(df_test).show()
|
... [
|
||||||
|
... (Vectors.dense(1.5, 2.0, 3.0), 0),
|
||||||
|
... (Vectors.dense(4.5, 5.0, 6.0), 0),
|
||||||
|
... (Vectors.dense(9.0, 4.5, 8.0), 0),
|
||||||
|
... (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1),
|
||||||
|
... (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1),
|
||||||
|
... (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1),
|
||||||
|
... ],
|
||||||
|
... ["features", "qid"],
|
||||||
|
... )
|
||||||
|
>>> model = ranker.fit(df_train)
|
||||||
|
>>> model.transform(df_test).show()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user