diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index 2150e5055..a072e9961 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -115,6 +115,7 @@ _pyspark_specific_params = [ "qid_col", "repartition_random_shuffle", "pred_contrib_col", + "use_gpu", ] _non_booster_params = ["missing", "n_estimators", "feature_types", "feature_weights"] @@ -349,11 +350,9 @@ class _SparkXGBParams( ) tree_method = self.getOrDefault(self.getParam("tree_method")) - if ( - self.getOrDefault(self.use_gpu) or use_cuda(self.getOrDefault(self.device)) - ) and not _can_use_qdm(tree_method): + if tree_method == "exact": raise ValueError( - f"The `{tree_method}` tree method is not supported on GPU." + "The `exact` tree method is not supported for distributed systems." ) if self.getOrDefault(self.features_cols): diff --git a/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py b/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py index a962f778e..a954d9d6c 100644 --- a/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py +++ b/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py @@ -151,12 +151,18 @@ def spark_diabetes_dataset_feature_cols(spark_session_with_gpu): return train_df, test_df, data.feature_names -def test_sparkxgb_classifier_with_gpu(spark_iris_dataset): +@pytest.mark.parametrize("tree_method", ["hist", "approx"]) +def test_sparkxgb_classifier_with_gpu(tree_method: str, spark_iris_dataset) -> None: from pyspark.ml.evaluation import MulticlassClassificationEvaluator - classifier = SparkXGBClassifier(device="cuda", num_workers=num_workers) + classifier = SparkXGBClassifier( + device="cuda", num_workers=num_workers, tree_method=tree_method + ) train_df, test_df = spark_iris_dataset model = classifier.fit(train_df) + config = json.loads(model.get_booster().save_config()) + ctx = config["learner"]["generic_param"] + assert ctx["device"] == "cuda:0" pred_result_df = model.transform(test_df) evaluator = MulticlassClassificationEvaluator(metricName="f1") f1 = evaluator.evaluate(pred_result_df) diff --git a/tests/test_distributed/test_with_spark/test_spark_local.py b/tests/test_distributed/test_with_spark/test_spark_local.py index 50eafb0a1..e323a3606 100644 --- a/tests/test_distributed/test_with_spark/test_spark_local.py +++ b/tests/test_distributed/test_with_spark/test_spark_local.py @@ -456,7 +456,9 @@ def check_sub_dict_match( assert sub_dist[k] == whole_dict[k], f"check on {k} failed" -def get_params_map(params_kv: dict, estimator: Type) -> dict: +def get_params_map( + params_kv: dict, estimator: xgb.spark.core._SparkXGBEstimator +) -> dict: return {getattr(estimator, k): v for k, v in params_kv.items()} @@ -870,10 +872,10 @@ class TestPySparkLocal: def test_device_param(self, reg_data: RegData, clf_data: ClfData) -> None: clf = SparkXGBClassifier(device="cuda", tree_method="exact") - with pytest.raises(ValueError, match="not supported on GPU"): + with pytest.raises(ValueError, match="not supported for distributed"): clf.fit(clf_data.cls_df_train) regressor = SparkXGBRegressor(device="cuda", tree_method="exact") - with pytest.raises(ValueError, match="not supported on GPU"): + with pytest.raises(ValueError, match="not supported for distributed"): regressor.fit(reg_data.reg_df_train) reg = SparkXGBRegressor(device="cuda", tree_method="gpu_hist")