Extract dask and spark test into distributed test. (#8395)

- Move test files. - Run spark and dask separately to prevent conflicts. - Gather common code into the testing module.
2022-10-28 16:24:32 +08:00
parent f73520bfff
commit cfd2a9f872
34 changed files with 405 additions and 337 deletions
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -7,26 +7,16 @@ from hypothesis import assume, given, note, settings, strategies

 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.params import (
+    hist_parameter_strategy,
+    cat_parameter_strategy,
+)

 sys.path.append("tests/python")
 import test_updaters as test_up

 pytestmark = tm.timeout(30)

-parameter_strategy = strategies.fixed_dictionaries({
-    'max_depth': strategies.integers(0, 11),
-    'max_leaves': strategies.integers(0, 256),
-    'max_bin': strategies.integers(2, 1024),
-    'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
-    'min_child_weight': strategies.floats(0.5, 2.0),
-    'seed': strategies.integers(0, 10),
-    # We cannot enable subsampling as the training loss can increase
-    # 'subsample': strategies.floats(0.5, 1.0),
-    'colsample_bytree': strategies.floats(0.5, 1.0),
-    'colsample_bylevel': strategies.floats(0.5, 1.0),
-}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
-    x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
-

 def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
    result: xgb.callback.TrainingCallback.EvalsLog = {}
@@ -47,7 +37,7 @@ def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
 class TestGPUUpdaters:
    cputest = test_up.TestTreeMethod()

-    @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
+    @given(hist_parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
    @settings(deadline=None, max_examples=50, print_blob=True)
    def test_gpu_hist(self, param, num_rounds, dataset):
        param["tree_method"] = "gpu_hist"
@@ -82,9 +72,8 @@ class TestGPUUpdaters:

    @given(
        tm.categorical_dataset_strategy,
-        test_up.exact_parameter_strategy,
-        test_up.hist_parameter_strategy,
-        test_up.cat_parameter_strategy,
+        hist_parameter_strategy,
+        cat_parameter_strategy,
        strategies.integers(4, 32),
    )
    @settings(deadline=None, max_examples=20, print_blob=True)
@@ -92,12 +81,10 @@ class TestGPUUpdaters:
    def test_categorical(
        self,
        dataset: tm.TestDataset,
-        exact_parameters: Dict[str, Any],
        hist_parameters: Dict[str, Any],
        cat_parameters: Dict[str, Any],
        n_rounds: int,
    ) -> None:
-        cat_parameters.update(exact_parameters)
        cat_parameters.update(hist_parameters)
        cat_parameters["tree_method"] = "gpu_hist"

@@ -105,8 +92,8 @@ class TestGPUUpdaters:
        tm.non_increasing(results["train"]["rmse"])

    @given(
-        test_up.hist_parameter_strategy,
-        test_up.cat_parameter_strategy,
+        hist_parameter_strategy,
+        cat_parameter_strategy,
    )
    @settings(deadline=None, max_examples=10, print_blob=True)
    def test_categorical_ames_housing(
@@ -149,8 +136,11 @@ class TestGPUUpdaters:
        self.cputest.run_invalid_category("gpu_hist")

    @pytest.mark.skipif(**tm.no_cupy())
-    @given(parameter_strategy, strategies.integers(1, 20),
-           tm.dataset_strategy)
+    @given(
+        hist_parameter_strategy,
+        strategies.integers(1, 20),
+        tm.dataset_strategy
+    )
    @settings(deadline=None, max_examples=20, print_blob=True)
    def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
        # We cannot handle empty dataset yet
@@ -161,8 +151,11 @@ class TestGPUUpdaters:
        note(result)
        assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)

-    @given(parameter_strategy, strategies.integers(1, 3),
-           tm.dataset_strategy)
+    @given(
+        hist_parameter_strategy,
+        strategies.integers(1, 3),
+        tm.dataset_strategy
+    )
    @settings(deadline=None, max_examples=10, print_blob=True)
    def test_external_memory(self, param, num_rounds, dataset):
        if dataset.name.endswith("-l1"):