diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py
index 7bf3cf45b..70e536101 100644
--- a/python-package/xgboost/testing/__init__.py
+++ b/python-package/xgboost/testing/__init__.py
@@ -603,26 +603,6 @@ sparse_datasets_strategy = strategies.sampled_from(
     ]
 )
 
-_unweighted_datasets_strategy = strategies.sampled_from(
-    [
-        TestDataset(
-            "calif_housing", get_california_housing, "reg:squarederror", "rmse"
-        ),
-        TestDataset(
-            "calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae"
-        ),
-        TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
-        TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
-        TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"),
-        TestDataset(
-            "empty",
-            lambda: (np.empty((0, 100)), np.empty(0)),
-            "reg:squarederror",
-            "rmse",
-        ),
-    ]
-)
-
 
 def make_datasets_with_margin(
     unweighted_strategy: strategies.SearchStrategy,
@@ -664,7 +644,28 @@ def make_datasets_with_margin(
 
 # A strategy for drawing from a set of example datasets. May add random weights to the
 # dataset
-dataset_strategy = make_datasets_with_margin(_unweighted_datasets_strategy)()
+@memory.cache
+def make_dataset_strategy() -> Callable:
+    _unweighted_datasets_strategy = strategies.sampled_from(
+        [
+            TestDataset(
+                "calif_housing", get_california_housing, "reg:squarederror", "rmse"
+            ),
+            TestDataset(
+                "calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae"
+            ),
+            TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
+            TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
+            TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"),
+            TestDataset(
+                "empty",
+                lambda: (np.empty((0, 100)), np.empty(0)),
+                "reg:squarederror",
+                "rmse",
+            ),
+        ]
+    )
+    return make_datasets_with_margin(_unweighted_datasets_strategy)()
 
 
 _unweighted_multi_datasets_strategy = strategies.sampled_from(
diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py
index 40c5d4845..04f9be256 100644
--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@@ -28,8 +28,7 @@ def train_result(param, dmat, num_rounds):
 
 
 class TestGPULinear:
-    @given(parameter_strategy, strategies.integers(10, 50),
-           tm.dataset_strategy)
+    @given(parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy())
     @settings(deadline=None, max_examples=20, print_blob=True)
     def test_gpu_coordinate(self, param, num_rounds, dataset):
         assume(len(dataset.y) > 0)
@@ -45,7 +44,7 @@ class TestGPULinear:
     @given(
         parameter_strategy,
         strategies.integers(10, 50),
-        tm.dataset_strategy,
+        tm.make_dataset_strategy(),
         strategies.floats(1e-5, 0.8),
         strategies.floats(1e-5, 0.8)
     )
diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py
index c4d9abba5..dba2e9aeb 100644
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -265,8 +265,9 @@ class TestGPUPredict:
         base_margin = cudf.Series(rng.randn(rows))
         self.run_inplace_base_margin(booster, dtrain, X, base_margin)
 
-    @given(strategies.integers(1, 10),
-           tm.dataset_strategy, shap_parameter_strategy)
+    @given(
+        strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
+    )
     @settings(deadline=None, max_examples=20, print_blob=True)
     def test_shap(self, num_rounds, dataset, param):
         if dataset.name.endswith("-l1"):  # not supported by the exact tree method
@@ -281,8 +282,9 @@ class TestGPUPredict:
         assume(len(dataset.y) > 0)
         assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)
 
-    @given(strategies.integers(1, 10),
-           tm.dataset_strategy, shap_parameter_strategy)
+    @given(
+        strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
+    )
     @settings(deadline=None, max_examples=10, print_blob=True)
     def test_shap_interactions(self, num_rounds, dataset, param):
         if dataset.name.endswith("-l1"):  # not supported by the exact tree method
@@ -335,7 +337,7 @@ class TestGPUPredict:
 
         np.testing.assert_equal(cpu_leaf, gpu_leaf)
 
-    @given(predict_parameter_strategy, tm.dataset_strategy)
+    @given(predict_parameter_strategy, tm.make_dataset_strategy())
     @settings(deadline=None, max_examples=20, print_blob=True)
     def test_predict_leaf_gbtree(self, param, dataset):
         # Unsupported for random forest
@@ -346,7 +348,7 @@ class TestGPUPredict:
         param['tree_method'] = 'gpu_hist'
         self.run_predict_leaf_booster(param, 10, dataset)
 
-    @given(predict_parameter_strategy, tm.dataset_strategy)
+    @given(predict_parameter_strategy, tm.make_dataset_strategy())
     @settings(deadline=None, max_examples=20, print_blob=True)
     def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None:
         # Unsupported for random forest
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index 75e403dbe..8522f41d3 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -48,7 +48,9 @@ class TestGPUUpdatersMulti:
 class TestGPUUpdaters:
     cputest = test_up.TestTreeMethod()
 
-    @given(hist_parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
+    @given(
+        hist_parameter_strategy, strategies.integers(1, 20), tm.make_dataset_strategy()
+    )
     @settings(deadline=None, max_examples=50, print_blob=True)
     def test_gpu_hist(self, param, num_rounds, dataset):
         param["tree_method"] = "gpu_hist"
@@ -150,7 +152,7 @@ class TestGPUUpdaters:
     @given(
         hist_parameter_strategy,
         strategies.integers(1, 20),
-        tm.dataset_strategy
+        tm.make_dataset_strategy(),
     )
     @settings(deadline=None, max_examples=20, print_blob=True)
     def test_gpu_hist_device_dmatrix(
@@ -171,7 +173,7 @@ class TestGPUUpdaters:
     @given(
         hist_parameter_strategy,
         strategies.integers(1, 3),
-        tm.dataset_strategy
+        tm.make_dataset_strategy(),
     )
     @settings(deadline=None, max_examples=10, print_blob=True)
     def test_external_memory(self, param, num_rounds, dataset):
@@ -213,7 +215,7 @@ class TestGPUUpdaters:
         np.testing.assert_allclose(predictions, 0.0, 1e-6)
 
     @pytest.mark.mgpu
-    @given(tm.dataset_strategy, strategies.integers(0, 10))
+    @given(tm.make_dataset_strategy(), strategies.integers(0, 10))
     @settings(deadline=None, max_examples=10, print_blob=True)
     def test_specified_gpu_id_gpu_update(self, dataset, gpu_id):
         param = {'tree_method': 'gpu_hist', 'gpu_id': gpu_id}
diff --git a/tests/python/test_linear.py b/tests/python/test_linear.py
index a7b0dccdb..0a198a036 100644
--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@@ -28,8 +28,12 @@ def train_result(param, dmat, num_rounds):
 
 
 class TestLinear:
-    @given(parameter_strategy, strategies.integers(10, 50),
-           tm.dataset_strategy, coord_strategy)
+    @given(
+        parameter_strategy,
+        strategies.integers(10, 50),
+        tm.make_dataset_strategy(),
+        coord_strategy
+    )
     @settings(deadline=None, max_examples=20, print_blob=True)
     def test_coordinate(self, param, num_rounds, dataset, coord_param):
         param['updater'] = 'coord_descent'
@@ -45,7 +49,7 @@ class TestLinear:
     @given(
         parameter_strategy,
         strategies.integers(10, 50),
-        tm.dataset_strategy,
+        tm.make_dataset_strategy(),
         coord_strategy,
         strategies.floats(1e-5, 0.8),
         strategies.floats(1e-5, 0.8)
@@ -61,8 +65,9 @@ class TestLinear:
         note(result)
         assert tm.non_increasing([result[0], result[-1]])
 
-    @given(parameter_strategy, strategies.integers(10, 50),
-           tm.dataset_strategy)
+    @given(
+        parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy()
+    )
     @settings(deadline=None, max_examples=20, print_blob=True)
     def test_shotgun(self, param, num_rounds, dataset):
         param['updater'] = 'shotgun'
@@ -77,9 +82,13 @@ class TestLinear:
             sampled_result = result
         assert tm.non_increasing(sampled_result)
 
-    @given(parameter_strategy, strategies.integers(10, 50),
-           tm.dataset_strategy, strategies.floats(1e-5, 1.0),
-           strategies.floats(1e-5, 1.0))
+    @given(
+        parameter_strategy,
+        strategies.integers(10, 50),
+        tm.make_dataset_strategy(),
+        strategies.floats(1e-5, 1.0),
+        strategies.floats(1e-5, 1.0)
+    )
     @settings(deadline=None, max_examples=20, print_blob=True)
     def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
         param['updater'] = 'shotgun'
diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py
index 78097a4ea..0a9013eaa 100644
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -87,8 +87,9 @@ class TestTreeMethod:
     USE_ONEHOT = np.iinfo(np.int32).max
     USE_PART = 1
 
-    @given(exact_parameter_strategy, strategies.integers(1, 20),
-           tm.dataset_strategy)
+    @given(
+        exact_parameter_strategy, strategies.integers(1, 20), tm.make_dataset_strategy()
+    )
     @settings(deadline=None, print_blob=True)
     def test_exact(self, param, num_rounds, dataset):
         if dataset.name.endswith("-l1"):
@@ -102,7 +103,7 @@ class TestTreeMethod:
         exact_parameter_strategy,
         hist_parameter_strategy,
         strategies.integers(1, 20),
-        tm.dataset_strategy,
+        tm.make_dataset_strategy(),
     )
     @settings(deadline=None, print_blob=True)
     def test_approx(self, param, hist_param, num_rounds, dataset):
@@ -141,7 +142,7 @@ class TestTreeMethod:
         exact_parameter_strategy,
         hist_parameter_strategy,
         strategies.integers(1, 20),
-        tm.dataset_strategy
+        tm.make_dataset_strategy()
     )
     @settings(deadline=None, print_blob=True)
     def test_hist(self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
index 2e3b031c1..503169d2c 100644
--- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
@@ -243,7 +243,7 @@ class TestDistributedGPU:
     @given(
         params=hist_parameter_strategy,
         num_rounds=strategies.integers(1, 20),
-        dataset=tm.dataset_strategy,
+        dataset=tm.make_dataset_strategy(),
         dmatrix_type=strategies.sampled_from(
             [dxgb.DaskDMatrix, dxgb.DaskQuantileDMatrix]
         ),
diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py
index 5e9303a46..c119d3940 100644
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@@ -1458,9 +1458,10 @@ class TestWithDask:
         else:
             assert history[-1] < history[0]
 
-    @given(params=hist_parameter_strategy,
-           dataset=tm.dataset_strategy)
-    @settings(deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True)
+    @given(params=hist_parameter_strategy, dataset=tm.make_dataset_strategy())
+    @settings(
+        deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True
+    )
     def test_hist(
             self, params: Dict, dataset: tm.TestDataset, client: "Client"
     ) -> None:
@@ -1524,7 +1525,7 @@ class TestWithDask:
         rmse = result["history"]["Valid"]["rmse"][-1]
         assert rmse < 32.0
 
-    @given(params=hist_parameter_strategy, dataset=tm.dataset_strategy)
+    @given(params=hist_parameter_strategy, dataset=tm.make_dataset_strategy())
     @settings(
         deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True
     )