From 10bb0a74efd8aef2ada0ecf46d41c09920f3d0ad Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sat, 14 Jan 2023 06:40:17 +0800
Subject: [PATCH] [backport] [CI] Skip pyspark sparse tests. (#8675) (#8678)

---
 tests/ci_build/conda_env/cpu_test.yml       |  5 +++--
 tests/python/test_spark/test_spark_local.py | 12 ++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/tests/ci_build/conda_env/cpu_test.yml b/tests/ci_build/conda_env/cpu_test.yml
index 98c7a5928..4d74c5832 100644
--- a/tests/ci_build/conda_env/cpu_test.yml
+++ b/tests/ci_build/conda_env/cpu_test.yml
@@ -36,7 +36,8 @@ dependencies:
 - cloudpickle
 - shap
 - modin
+# TODO: Replace it with pyspark>=3.4 once 3.4 released.
+# - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
+- pyspark>=3.3.1
 - pip:
   - datatable
-  # TODO: Replace it with pyspark>=3.4 once 3.4 released.
-  - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index 6757d2e34..574e96388 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -41,6 +41,16 @@ logging.getLogger("py4j").setLevel(logging.INFO)
 pytestmark = testing.timeout(60)
 
 
+def no_sparse_unwrap():
+    try:
+        from pyspark.sql.functions import unwrap_udt
+
+    except ImportError:
+        return {"reason": "PySpark<3.4", "condition": True}
+
+    return {"reason": "PySpark<3.4", "condition": False}
+
+
 class XgboostLocalTest(SparkTestCase):
     def setUp(self):
         logging.getLogger().setLevel("INFO")
@@ -985,6 +995,7 @@ class XgboostLocalTest(SparkTestCase):
         model = classifier.fit(self.cls_df_train)
         model.transform(self.cls_df_test).collect()
 
+    @pytest.mark.skipif(**no_sparse_unwrap())
     def test_regressor_with_sparse_optim(self):
         regressor = SparkXGBRegressor(missing=0.0)
         model = regressor.fit(self.reg_df_sparse_train)
@@ -1001,6 +1012,7 @@ class XgboostLocalTest(SparkTestCase):
         for row1, row2 in zip(pred_result, pred_result2):
             self.assertTrue(np.isclose(row1.prediction, row2.prediction, atol=1e-3))
 
+    @pytest.mark.skipif(**no_sparse_unwrap())
     def test_classifier_with_sparse_optim(self):
         cls = SparkXGBClassifier(missing=0.0)
         model = cls.fit(self.cls_df_sparse_train)