diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 7bf3cf45b..70e536101 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -603,26 +603,6 @@ sparse_datasets_strategy = strategies.sampled_from( ] ) -_unweighted_datasets_strategy = strategies.sampled_from( - [ - TestDataset( - "calif_housing", get_california_housing, "reg:squarederror", "rmse" - ), - TestDataset( - "calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae" - ), - TestDataset("cancer", get_cancer, "binary:logistic", "logloss"), - TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"), - TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"), - TestDataset( - "empty", - lambda: (np.empty((0, 100)), np.empty(0)), - "reg:squarederror", - "rmse", - ), - ] -) - def make_datasets_with_margin( unweighted_strategy: strategies.SearchStrategy, @@ -664,7 +644,28 @@ def make_datasets_with_margin( # A strategy for drawing from a set of example datasets. May add random weights to the # dataset -dataset_strategy = make_datasets_with_margin(_unweighted_datasets_strategy)() +@memory.cache +def make_dataset_strategy() -> Callable: + _unweighted_datasets_strategy = strategies.sampled_from( + [ + TestDataset( + "calif_housing", get_california_housing, "reg:squarederror", "rmse" + ), + TestDataset( + "calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae" + ), + TestDataset("cancer", get_cancer, "binary:logistic", "logloss"), + TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"), + TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"), + TestDataset( + "empty", + lambda: (np.empty((0, 100)), np.empty(0)), + "reg:squarederror", + "rmse", + ), + ] + ) + return make_datasets_with_margin(_unweighted_datasets_strategy)() _unweighted_multi_datasets_strategy = strategies.sampled_from( diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py index 40c5d4845..04f9be256 100644 --- a/tests/python-gpu/test_gpu_linear.py +++ b/tests/python-gpu/test_gpu_linear.py @@ -28,8 +28,7 @@ def train_result(param, dmat, num_rounds): class TestGPULinear: - @given(parameter_strategy, strategies.integers(10, 50), - tm.dataset_strategy) + @given(parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy()) @settings(deadline=None, max_examples=20, print_blob=True) def test_gpu_coordinate(self, param, num_rounds, dataset): assume(len(dataset.y) > 0) @@ -45,7 +44,7 @@ class TestGPULinear: @given( parameter_strategy, strategies.integers(10, 50), - tm.dataset_strategy, + tm.make_dataset_strategy(), strategies.floats(1e-5, 0.8), strategies.floats(1e-5, 0.8) ) diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index c4d9abba5..dba2e9aeb 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -265,8 +265,9 @@ class TestGPUPredict: base_margin = cudf.Series(rng.randn(rows)) self.run_inplace_base_margin(booster, dtrain, X, base_margin) - @given(strategies.integers(1, 10), - tm.dataset_strategy, shap_parameter_strategy) + @given( + strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy + ) @settings(deadline=None, max_examples=20, print_blob=True) def test_shap(self, num_rounds, dataset, param): if dataset.name.endswith("-l1"): # not supported by the exact tree method @@ -281,8 +282,9 @@ class TestGPUPredict: assume(len(dataset.y) > 0) assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3) - @given(strategies.integers(1, 10), - tm.dataset_strategy, shap_parameter_strategy) + @given( + strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy + ) @settings(deadline=None, max_examples=10, print_blob=True) def test_shap_interactions(self, num_rounds, dataset, param): if dataset.name.endswith("-l1"): # not supported by the exact tree method @@ -335,7 +337,7 @@ class TestGPUPredict: np.testing.assert_equal(cpu_leaf, gpu_leaf) - @given(predict_parameter_strategy, tm.dataset_strategy) + @given(predict_parameter_strategy, tm.make_dataset_strategy()) @settings(deadline=None, max_examples=20, print_blob=True) def test_predict_leaf_gbtree(self, param, dataset): # Unsupported for random forest @@ -346,7 +348,7 @@ class TestGPUPredict: param['tree_method'] = 'gpu_hist' self.run_predict_leaf_booster(param, 10, dataset) - @given(predict_parameter_strategy, tm.dataset_strategy) + @given(predict_parameter_strategy, tm.make_dataset_strategy()) @settings(deadline=None, max_examples=20, print_blob=True) def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None: # Unsupported for random forest diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index 75e403dbe..8522f41d3 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -48,7 +48,9 @@ class TestGPUUpdatersMulti: class TestGPUUpdaters: cputest = test_up.TestTreeMethod() - @given(hist_parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) + @given( + hist_parameter_strategy, strategies.integers(1, 20), tm.make_dataset_strategy() + ) @settings(deadline=None, max_examples=50, print_blob=True) def test_gpu_hist(self, param, num_rounds, dataset): param["tree_method"] = "gpu_hist" @@ -150,7 +152,7 @@ class TestGPUUpdaters: @given( hist_parameter_strategy, strategies.integers(1, 20), - tm.dataset_strategy + tm.make_dataset_strategy(), ) @settings(deadline=None, max_examples=20, print_blob=True) def test_gpu_hist_device_dmatrix( @@ -171,7 +173,7 @@ class TestGPUUpdaters: @given( hist_parameter_strategy, strategies.integers(1, 3), - tm.dataset_strategy + tm.make_dataset_strategy(), ) @settings(deadline=None, max_examples=10, print_blob=True) def test_external_memory(self, param, num_rounds, dataset): @@ -213,7 +215,7 @@ class TestGPUUpdaters: np.testing.assert_allclose(predictions, 0.0, 1e-6) @pytest.mark.mgpu - @given(tm.dataset_strategy, strategies.integers(0, 10)) + @given(tm.make_dataset_strategy(), strategies.integers(0, 10)) @settings(deadline=None, max_examples=10, print_blob=True) def test_specified_gpu_id_gpu_update(self, dataset, gpu_id): param = {'tree_method': 'gpu_hist', 'gpu_id': gpu_id} diff --git a/tests/python/test_linear.py b/tests/python/test_linear.py index a7b0dccdb..0a198a036 100644 --- a/tests/python/test_linear.py +++ b/tests/python/test_linear.py @@ -28,8 +28,12 @@ def train_result(param, dmat, num_rounds): class TestLinear: - @given(parameter_strategy, strategies.integers(10, 50), - tm.dataset_strategy, coord_strategy) + @given( + parameter_strategy, + strategies.integers(10, 50), + tm.make_dataset_strategy(), + coord_strategy + ) @settings(deadline=None, max_examples=20, print_blob=True) def test_coordinate(self, param, num_rounds, dataset, coord_param): param['updater'] = 'coord_descent' @@ -45,7 +49,7 @@ class TestLinear: @given( parameter_strategy, strategies.integers(10, 50), - tm.dataset_strategy, + tm.make_dataset_strategy(), coord_strategy, strategies.floats(1e-5, 0.8), strategies.floats(1e-5, 0.8) @@ -61,8 +65,9 @@ class TestLinear: note(result) assert tm.non_increasing([result[0], result[-1]]) - @given(parameter_strategy, strategies.integers(10, 50), - tm.dataset_strategy) + @given( + parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy() + ) @settings(deadline=None, max_examples=20, print_blob=True) def test_shotgun(self, param, num_rounds, dataset): param['updater'] = 'shotgun' @@ -77,9 +82,13 @@ class TestLinear: sampled_result = result assert tm.non_increasing(sampled_result) - @given(parameter_strategy, strategies.integers(10, 50), - tm.dataset_strategy, strategies.floats(1e-5, 1.0), - strategies.floats(1e-5, 1.0)) + @given( + parameter_strategy, + strategies.integers(10, 50), + tm.make_dataset_strategy(), + strategies.floats(1e-5, 1.0), + strategies.floats(1e-5, 1.0) + ) @settings(deadline=None, max_examples=20, print_blob=True) def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd): param['updater'] = 'shotgun' diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index 78097a4ea..0a9013eaa 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -87,8 +87,9 @@ class TestTreeMethod: USE_ONEHOT = np.iinfo(np.int32).max USE_PART = 1 - @given(exact_parameter_strategy, strategies.integers(1, 20), - tm.dataset_strategy) + @given( + exact_parameter_strategy, strategies.integers(1, 20), tm.make_dataset_strategy() + ) @settings(deadline=None, print_blob=True) def test_exact(self, param, num_rounds, dataset): if dataset.name.endswith("-l1"): @@ -102,7 +103,7 @@ class TestTreeMethod: exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20), - tm.dataset_strategy, + tm.make_dataset_strategy(), ) @settings(deadline=None, print_blob=True) def test_approx(self, param, hist_param, num_rounds, dataset): @@ -141,7 +142,7 @@ class TestTreeMethod: exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20), - tm.dataset_strategy + tm.make_dataset_strategy() ) @settings(deadline=None, print_blob=True) def test_hist(self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset) -> None: diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py index 2e3b031c1..503169d2c 100644 --- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py +++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py @@ -243,7 +243,7 @@ class TestDistributedGPU: @given( params=hist_parameter_strategy, num_rounds=strategies.integers(1, 20), - dataset=tm.dataset_strategy, + dataset=tm.make_dataset_strategy(), dmatrix_type=strategies.sampled_from( [dxgb.DaskDMatrix, dxgb.DaskQuantileDMatrix] ), diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index 5e9303a46..c119d3940 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -1458,9 +1458,10 @@ class TestWithDask: else: assert history[-1] < history[0] - @given(params=hist_parameter_strategy, - dataset=tm.dataset_strategy) - @settings(deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True) + @given(params=hist_parameter_strategy, dataset=tm.make_dataset_strategy()) + @settings( + deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True + ) def test_hist( self, params: Dict, dataset: tm.TestDataset, client: "Client" ) -> None: @@ -1524,7 +1525,7 @@ class TestWithDask: rmse = result["history"]["Valid"]["rmse"][-1] assert rmse < 32.0 - @given(params=hist_parameter_strategy, dataset=tm.dataset_strategy) + @given(params=hist_parameter_strategy, dataset=tm.make_dataset_strategy()) @settings( deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True )