Fix pyspark parameter. (#9460)

- Don't pass the `use_gpu` parameter to the learner. - Fix GPU approx with PySpark.
2023-08-11 19:07:50 +08:00
parent 428f6cbbe2
commit bdc1a3c178
3 changed files with 16 additions and 9 deletions
--- a/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py
+++ b/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py
@@ -151,12 +151,18 @@ def spark_diabetes_dataset_feature_cols(spark_session_with_gpu):
    return train_df, test_df, data.feature_names


-def test_sparkxgb_classifier_with_gpu(spark_iris_dataset):
+@pytest.mark.parametrize("tree_method", ["hist", "approx"])
+def test_sparkxgb_classifier_with_gpu(tree_method: str, spark_iris_dataset) -> None:
    from pyspark.ml.evaluation import MulticlassClassificationEvaluator

-    classifier = SparkXGBClassifier(device="cuda", num_workers=num_workers)
+    classifier = SparkXGBClassifier(
+        device="cuda", num_workers=num_workers, tree_method=tree_method
+    )
    train_df, test_df = spark_iris_dataset
    model = classifier.fit(train_df)
+    config = json.loads(model.get_booster().save_config())
+    ctx = config["learner"]["generic_param"]
+    assert ctx["device"] == "cuda:0"
    pred_result_df = model.transform(test_df)
    evaluator = MulticlassClassificationEvaluator(metricName="f1")
    f1 = evaluator.evaluate(pred_result_df)
--- a/tests/test_distributed/test_with_spark/test_spark_local.py
+++ b/tests/test_distributed/test_with_spark/test_spark_local.py
@@ -456,7 +456,9 @@ def check_sub_dict_match(
            assert sub_dist[k] == whole_dict[k], f"check on {k} failed"


-def get_params_map(params_kv: dict, estimator: Type) -> dict:
+def get_params_map(
+    params_kv: dict, estimator: xgb.spark.core._SparkXGBEstimator
+) -> dict:
    return {getattr(estimator, k): v for k, v in params_kv.items()}


@@ -870,10 +872,10 @@ class TestPySparkLocal:

    def test_device_param(self, reg_data: RegData, clf_data: ClfData) -> None:
        clf = SparkXGBClassifier(device="cuda", tree_method="exact")
-        with pytest.raises(ValueError, match="not supported on GPU"):
+        with pytest.raises(ValueError, match="not supported for distributed"):
            clf.fit(clf_data.cls_df_train)
        regressor = SparkXGBRegressor(device="cuda", tree_method="exact")
-        with pytest.raises(ValueError, match="not supported on GPU"):
+        with pytest.raises(ValueError, match="not supported for distributed"):
            regressor.fit(reg_data.reg_df_train)

        reg = SparkXGBRegressor(device="cuda", tree_method="gpu_hist")