[pyspark] Handle the device parameter in pyspark. (#9390)

- Handle the new `device` parameter in PySpark.
- Deprecate the old `use_gpu` parameter.
This commit is contained in:
Jiaming Yuan
2023-07-18 08:47:03 +08:00
committed by GitHub
parent 2a0ff209ff
commit 6e18d3a290
10 changed files with 244 additions and 169 deletions

View File

@@ -154,7 +154,7 @@ def spark_diabetes_dataset_feature_cols(spark_session_with_gpu):
def test_sparkxgb_classifier_with_gpu(spark_iris_dataset):
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
classifier = SparkXGBClassifier(use_gpu=True, num_workers=num_workers)
classifier = SparkXGBClassifier(device="cuda", num_workers=num_workers)
train_df, test_df = spark_iris_dataset
model = classifier.fit(train_df)
pred_result_df = model.transform(test_df)
@@ -169,7 +169,7 @@ def test_sparkxgb_classifier_feature_cols_with_gpu(spark_iris_dataset_feature_co
train_df, test_df, feature_names = spark_iris_dataset_feature_cols
classifier = SparkXGBClassifier(
features_col=feature_names, use_gpu=True, num_workers=num_workers
features_col=feature_names, device="cuda", num_workers=num_workers
)
model = classifier.fit(train_df)
@@ -185,7 +185,7 @@ def test_cv_sparkxgb_classifier_feature_cols_with_gpu(spark_iris_dataset_feature
train_df, test_df, feature_names = spark_iris_dataset_feature_cols
classifier = SparkXGBClassifier(
features_col=feature_names, use_gpu=True, num_workers=num_workers
features_col=feature_names, device="cuda", num_workers=num_workers
)
grid = ParamGridBuilder().addGrid(classifier.max_depth, [6, 8]).build()
evaluator = MulticlassClassificationEvaluator(metricName="f1")
@@ -197,11 +197,24 @@ def test_cv_sparkxgb_classifier_feature_cols_with_gpu(spark_iris_dataset_feature
f1 = evaluator.evaluate(pred_result_df)
assert f1 >= 0.97
clf = SparkXGBClassifier(
features_col=feature_names, use_gpu=True, num_workers=num_workers
)
grid = ParamGridBuilder().addGrid(clf.max_depth, [6, 8]).build()
evaluator = MulticlassClassificationEvaluator(metricName="f1")
cv = CrossValidator(
estimator=clf, evaluator=evaluator, estimatorParamMaps=grid, numFolds=3
)
cvModel = cv.fit(train_df)
pred_result_df = cvModel.transform(test_df)
f1 = evaluator.evaluate(pred_result_df)
assert f1 >= 0.97
def test_sparkxgb_regressor_with_gpu(spark_diabetes_dataset):
from pyspark.ml.evaluation import RegressionEvaluator
regressor = SparkXGBRegressor(use_gpu=True, num_workers=num_workers)
regressor = SparkXGBRegressor(device="cuda", num_workers=num_workers)
train_df, test_df = spark_diabetes_dataset
model = regressor.fit(train_df)
pred_result_df = model.transform(test_df)
@@ -215,7 +228,7 @@ def test_sparkxgb_regressor_feature_cols_with_gpu(spark_diabetes_dataset_feature
train_df, test_df, feature_names = spark_diabetes_dataset_feature_cols
regressor = SparkXGBRegressor(
features_col=feature_names, use_gpu=True, num_workers=num_workers
features_col=feature_names, device="cuda", num_workers=num_workers
)
model = regressor.fit(train_df)