Fix pyspark parameter. (#9460)
- Don't pass the `use_gpu` parameter to the learner. - Fix GPU approx with PySpark.
This commit is contained in:
parent
428f6cbbe2
commit
bdc1a3c178
@ -115,6 +115,7 @@ _pyspark_specific_params = [
|
|||||||
"qid_col",
|
"qid_col",
|
||||||
"repartition_random_shuffle",
|
"repartition_random_shuffle",
|
||||||
"pred_contrib_col",
|
"pred_contrib_col",
|
||||||
|
"use_gpu",
|
||||||
]
|
]
|
||||||
|
|
||||||
_non_booster_params = ["missing", "n_estimators", "feature_types", "feature_weights"]
|
_non_booster_params = ["missing", "n_estimators", "feature_types", "feature_weights"]
|
||||||
@ -349,11 +350,9 @@ class _SparkXGBParams(
|
|||||||
)
|
)
|
||||||
|
|
||||||
tree_method = self.getOrDefault(self.getParam("tree_method"))
|
tree_method = self.getOrDefault(self.getParam("tree_method"))
|
||||||
if (
|
if tree_method == "exact":
|
||||||
self.getOrDefault(self.use_gpu) or use_cuda(self.getOrDefault(self.device))
|
|
||||||
) and not _can_use_qdm(tree_method):
|
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"The `{tree_method}` tree method is not supported on GPU."
|
"The `exact` tree method is not supported for distributed systems."
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.getOrDefault(self.features_cols):
|
if self.getOrDefault(self.features_cols):
|
||||||
|
|||||||
@ -151,12 +151,18 @@ def spark_diabetes_dataset_feature_cols(spark_session_with_gpu):
|
|||||||
return train_df, test_df, data.feature_names
|
return train_df, test_df, data.feature_names
|
||||||
|
|
||||||
|
|
||||||
def test_sparkxgb_classifier_with_gpu(spark_iris_dataset):
|
@pytest.mark.parametrize("tree_method", ["hist", "approx"])
|
||||||
|
def test_sparkxgb_classifier_with_gpu(tree_method: str, spark_iris_dataset) -> None:
|
||||||
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
|
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
|
||||||
|
|
||||||
classifier = SparkXGBClassifier(device="cuda", num_workers=num_workers)
|
classifier = SparkXGBClassifier(
|
||||||
|
device="cuda", num_workers=num_workers, tree_method=tree_method
|
||||||
|
)
|
||||||
train_df, test_df = spark_iris_dataset
|
train_df, test_df = spark_iris_dataset
|
||||||
model = classifier.fit(train_df)
|
model = classifier.fit(train_df)
|
||||||
|
config = json.loads(model.get_booster().save_config())
|
||||||
|
ctx = config["learner"]["generic_param"]
|
||||||
|
assert ctx["device"] == "cuda:0"
|
||||||
pred_result_df = model.transform(test_df)
|
pred_result_df = model.transform(test_df)
|
||||||
evaluator = MulticlassClassificationEvaluator(metricName="f1")
|
evaluator = MulticlassClassificationEvaluator(metricName="f1")
|
||||||
f1 = evaluator.evaluate(pred_result_df)
|
f1 = evaluator.evaluate(pred_result_df)
|
||||||
|
|||||||
@ -456,7 +456,9 @@ def check_sub_dict_match(
|
|||||||
assert sub_dist[k] == whole_dict[k], f"check on {k} failed"
|
assert sub_dist[k] == whole_dict[k], f"check on {k} failed"
|
||||||
|
|
||||||
|
|
||||||
def get_params_map(params_kv: dict, estimator: Type) -> dict:
|
def get_params_map(
|
||||||
|
params_kv: dict, estimator: xgb.spark.core._SparkXGBEstimator
|
||||||
|
) -> dict:
|
||||||
return {getattr(estimator, k): v for k, v in params_kv.items()}
|
return {getattr(estimator, k): v for k, v in params_kv.items()}
|
||||||
|
|
||||||
|
|
||||||
@ -870,10 +872,10 @@ class TestPySparkLocal:
|
|||||||
|
|
||||||
def test_device_param(self, reg_data: RegData, clf_data: ClfData) -> None:
|
def test_device_param(self, reg_data: RegData, clf_data: ClfData) -> None:
|
||||||
clf = SparkXGBClassifier(device="cuda", tree_method="exact")
|
clf = SparkXGBClassifier(device="cuda", tree_method="exact")
|
||||||
with pytest.raises(ValueError, match="not supported on GPU"):
|
with pytest.raises(ValueError, match="not supported for distributed"):
|
||||||
clf.fit(clf_data.cls_df_train)
|
clf.fit(clf_data.cls_df_train)
|
||||||
regressor = SparkXGBRegressor(device="cuda", tree_method="exact")
|
regressor = SparkXGBRegressor(device="cuda", tree_method="exact")
|
||||||
with pytest.raises(ValueError, match="not supported on GPU"):
|
with pytest.raises(ValueError, match="not supported for distributed"):
|
||||||
regressor.fit(reg_data.reg_df_train)
|
regressor.fit(reg_data.reg_df_train)
|
||||||
|
|
||||||
reg = SparkXGBRegressor(device="cuda", tree_method="gpu_hist")
|
reg = SparkXGBRegressor(device="cuda", tree_method="gpu_hist")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user