Extract dask and spark test into distributed test. (#8395)

- Move test files.
- Run spark and dask separately to prevent conflicts.
- Gather common code into the testing module.
This commit is contained in:
Jiaming Yuan
2022-10-28 16:24:32 +08:00
committed by GitHub
parent f73520bfff
commit cfd2a9f872
34 changed files with 405 additions and 337 deletions

View File

@@ -7,26 +7,16 @@ from hypothesis import assume, given, note, settings, strategies
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.params import (
hist_parameter_strategy,
cat_parameter_strategy,
)
sys.path.append("tests/python")
import test_updaters as test_up
pytestmark = tm.timeout(30)
parameter_strategy = strategies.fixed_dictionaries({
'max_depth': strategies.integers(0, 11),
'max_leaves': strategies.integers(0, 256),
'max_bin': strategies.integers(2, 1024),
'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
'min_child_weight': strategies.floats(0.5, 2.0),
'seed': strategies.integers(0, 10),
# We cannot enable subsampling as the training loss can increase
# 'subsample': strategies.floats(0.5, 1.0),
'colsample_bytree': strategies.floats(0.5, 1.0),
'colsample_bylevel': strategies.floats(0.5, 1.0),
}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
result: xgb.callback.TrainingCallback.EvalsLog = {}
@@ -47,7 +37,7 @@ def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
class TestGPUUpdaters:
cputest = test_up.TestTreeMethod()
@given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
@given(hist_parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
@settings(deadline=None, max_examples=50, print_blob=True)
def test_gpu_hist(self, param, num_rounds, dataset):
param["tree_method"] = "gpu_hist"
@@ -82,9 +72,8 @@ class TestGPUUpdaters:
@given(
tm.categorical_dataset_strategy,
test_up.exact_parameter_strategy,
test_up.hist_parameter_strategy,
test_up.cat_parameter_strategy,
hist_parameter_strategy,
cat_parameter_strategy,
strategies.integers(4, 32),
)
@settings(deadline=None, max_examples=20, print_blob=True)
@@ -92,12 +81,10 @@ class TestGPUUpdaters:
def test_categorical(
self,
dataset: tm.TestDataset,
exact_parameters: Dict[str, Any],
hist_parameters: Dict[str, Any],
cat_parameters: Dict[str, Any],
n_rounds: int,
) -> None:
cat_parameters.update(exact_parameters)
cat_parameters.update(hist_parameters)
cat_parameters["tree_method"] = "gpu_hist"
@@ -105,8 +92,8 @@ class TestGPUUpdaters:
tm.non_increasing(results["train"]["rmse"])
@given(
test_up.hist_parameter_strategy,
test_up.cat_parameter_strategy,
hist_parameter_strategy,
cat_parameter_strategy,
)
@settings(deadline=None, max_examples=10, print_blob=True)
def test_categorical_ames_housing(
@@ -149,8 +136,11 @@ class TestGPUUpdaters:
self.cputest.run_invalid_category("gpu_hist")
@pytest.mark.skipif(**tm.no_cupy())
@given(parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@given(
hist_parameter_strategy,
strategies.integers(1, 20),
tm.dataset_strategy
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
# We cannot handle empty dataset yet
@@ -161,8 +151,11 @@ class TestGPUUpdaters:
note(result)
assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)
@given(parameter_strategy, strategies.integers(1, 3),
tm.dataset_strategy)
@given(
hist_parameter_strategy,
strategies.integers(1, 3),
tm.dataset_strategy
)
@settings(deadline=None, max_examples=10, print_blob=True)
def test_external_memory(self, param, num_rounds, dataset):
if dataset.name.endswith("-l1"):