diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py
index e8bcc676d..b234da175 100644
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@@ -50,8 +50,8 @@ for train_index, test_index in kf.split(X):
 print("Parameter optimization")
 xgb_model = xgb.XGBRegressor(n_jobs=1)
 clf = GridSearchCV(xgb_model,
-                   {'max_depth': [2, 4, 6],
-                    'n_estimators': [50, 100, 200]}, verbose=1, n_jobs=1)
+                   {'max_depth': [2, 4],
+                    'n_estimators': [50, 100]}, verbose=1, n_jobs=1, cv=3)
 clf.fit(X, y)
 print(clf.best_score_)
 print(clf.best_params_)
diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu
index 5c7d19ee6..d149638ac 100644
--- a/tests/ci_build/Dockerfile.gpu
+++ b/tests/ci_build/Dockerfile.gpu
@@ -24,7 +24,7 @@ RUN \
     mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
         python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
         dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
-        numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
+        numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
         pyspark cloudpickle cuda-python=11.7.0 && \
     mamba clean --all && \
     conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
diff --git a/tests/ci_build/conda_env/cpu_test.yml b/tests/ci_build/conda_env/cpu_test.yml
index e1e70d812..98c7a5928 100644
--- a/tests/ci_build/conda_env/cpu_test.yml
+++ b/tests/ci_build/conda_env/cpu_test.yml
@@ -22,6 +22,7 @@ dependencies:
 - sh
 - mock
 - pytest
+- pytest-timeout
 - pytest-cov
 - python-kubernetes
 - urllib3
diff --git a/tests/python-gpu/test_gpu_data_iterator.py b/tests/python-gpu/test_gpu_data_iterator.py
index 3a31c93b0..9753a51e0 100644
--- a/tests/python-gpu/test_gpu_data_iterator.py
+++ b/tests/python-gpu/test_gpu_data_iterator.py
@@ -5,10 +5,9 @@ import pytest
 import sys
 
 sys.path.append("tests/python")
-from test_data_iterator import SingleBatch, make_batches
 from test_data_iterator import test_single_batch as cpu_single_batch
 from test_data_iterator import run_data_iterator
-from testing import IteratorForTest, no_cupy
+from testing import no_cupy
 
 
 def test_gpu_single_batch() -> None:
@@ -21,16 +20,14 @@ def test_gpu_single_batch() -> None:
     strategies.integers(1, 7),
     strategies.integers(0, 8),
     strategies.booleans(),
+    strategies.booleans(),
 )
-@settings(deadline=None, print_blob=True)
+@settings(deadline=None, max_examples=10, print_blob=True)
 def test_gpu_data_iterator(
-    n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool
+    n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool
 ) -> None:
     run_data_iterator(
-        n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, True
-    )
-    run_data_iterator(
-        n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, False
+        n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy
     )
 
 
diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py
index 7bad8b72f..4ac1d481e 100644
--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@@ -6,6 +6,8 @@ sys.path.append("tests/python")
 import testing as tm
 
 
+pytestmark = pytest.mark.timeout(10)
+
 parameter_strategy = strategies.fixed_dictionaries({
     'booster': strategies.just('gblinear'),
     'eta': strategies.floats(0.01, 0.25),
@@ -30,7 +32,7 @@ def train_result(param, dmat, num_rounds):
 class TestGPULinear:
     @given(parameter_strategy, strategies.integers(10, 50),
            tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_gpu_coordinate(self, param, num_rounds, dataset):
         assume(len(dataset.y) > 0)
         param['updater'] = 'gpu_coord_descent'
@@ -49,7 +51,7 @@ class TestGPULinear:
         strategies.floats(1e-5, 0.8),
         strategies.floats(1e-5, 0.8)
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
         assume(len(dataset.y) > 0)
         param['updater'] = 'gpu_coord_descent'
diff --git a/tests/python-gpu/test_gpu_pickling.py b/tests/python-gpu/test_gpu_pickling.py
index d368c1ceb..fa414b569 100644
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@@ -15,6 +15,8 @@ import testing as tm
 model_path = './model.pkl'
 
 
+pytestmark = pytest.mark.timeout(30)
+
 def build_dataset():
     N = 10
     x = np.linspace(0, N*N, N*N)
@@ -65,6 +67,7 @@ class TestPickling:
         assert status == 0
         os.remove(model_path)
 
+    # TODO: This test is too slow
     @pytest.mark.skipif(**tm.no_sklearn())
     def test_pickling(self):
         x, y = build_dataset()
diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py
index 8976113ca..7a60a634f 100644
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -32,6 +32,7 @@ predict_parameter_strategy = strategies.fixed_dictionaries({
     'num_parallel_tree': strategies.sampled_from([1, 4]),
 })
 
+pytestmark = pytest.mark.timeout(20)
 
 class TestGPUPredict:
     def test_predict(self):
@@ -264,7 +265,7 @@ class TestGPUPredict:
 
     @given(strategies.integers(1, 10),
            tm.dataset_strategy, shap_parameter_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_shap(self, num_rounds, dataset, param):
         if dataset.name.endswith("-l1"):  # not supported by the exact tree method
             return
@@ -280,7 +281,7 @@ class TestGPUPredict:
 
     @given(strategies.integers(1, 10),
            tm.dataset_strategy, shap_parameter_strategy)
-    @settings(deadline=None, max_examples=20, print_blob=True)
+    @settings(deadline=None, max_examples=10, print_blob=True)
     def test_shap_interactions(self, num_rounds, dataset, param):
         if dataset.name.endswith("-l1"):  # not supported by the exact tree method
             return
@@ -333,14 +334,14 @@ class TestGPUPredict:
         np.testing.assert_equal(cpu_leaf, gpu_leaf)
 
     @given(predict_parameter_strategy, tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_predict_leaf_gbtree(self, param, dataset):
         param['booster'] = 'gbtree'
         param['tree_method'] = 'gpu_hist'
         self.run_predict_leaf_booster(param, 10, dataset)
 
     @given(predict_parameter_strategy, tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_predict_leaf_dart(self, param, dataset):
         param['booster'] = 'dart'
         param['tree_method'] = 'gpu_hist'
@@ -351,7 +352,7 @@ class TestGPUPredict:
     @given(df=data_frames([column('x0', elements=strategies.integers(min_value=0, max_value=3)),
                            column('x1', elements=strategies.integers(min_value=0, max_value=5))],
                           index=range_indexes(min_size=20, max_size=50)))
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_predict_categorical_split(self, df):
         from sklearn.metrics import mean_squared_error
 
diff --git a/tests/python-gpu/test_gpu_ranking.py b/tests/python-gpu/test_gpu_ranking.py
index e95fb78b1..d3f1afc24 100644
--- a/tests/python-gpu/test_gpu_ranking.py
+++ b/tests/python-gpu/test_gpu_ranking.py
@@ -6,10 +6,12 @@ import shutil
 import urllib.request
 import zipfile
 import sys
+import pytest
 sys.path.append("tests/python")
 
 import testing as tm            # noqa
 
+pytestmark = pytest.mark.timeout(10)
 
 class TestRanking:
     @classmethod
@@ -96,7 +98,7 @@ class TestRanking:
         # specify validations set to watch performance
         watchlist = [(cls.dtest, 'eval'), (cls.dtrain, 'train')]
 
-        num_trees = 2500
+        num_trees = 100
         check_metric_improvement_rounds = 10
 
         evals_result = {}
diff --git a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
index ce5b9d8c8..6836718fe 100644
--- a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
+++ b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
@@ -7,7 +7,7 @@ import sklearn
 sys.path.append("tests/python")
 import testing as tm
 
-if tm.no_dask()["condition"]:
+if tm.no_spark()["condition"]:
     pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
 if sys.platform.startswith("win"):
     pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index 7f29a92e6..e190e7205 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -1,7 +1,6 @@
 from typing import Dict, Any
 import numpy as np
 import sys
-import gc
 import pytest
 import xgboost as xgb
 from hypothesis import given, strategies, assume, settings, note
@@ -10,6 +9,7 @@ sys.path.append("tests/python")
 import testing as tm
 import test_updaters as test_up
 
+pytestmark = pytest.mark.timeout(30)
 
 parameter_strategy = strategies.fixed_dictionaries({
     'max_depth': strategies.integers(0, 11),
@@ -46,7 +46,7 @@ class TestGPUUpdaters:
     cputest = test_up.TestTreeMethod()
 
     @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=50, print_blob=True)
     def test_gpu_hist(self, param, num_rounds, dataset):
         param["tree_method"] = "gpu_hist"
         param = dataset.set_params(param)
@@ -73,7 +73,7 @@ class TestGPUUpdaters:
 
     @given(strategies.integers(10, 400), strategies.integers(3, 8),
            strategies.integers(1, 2), strategies.integers(4, 7))
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     @pytest.mark.skipif(**tm.no_pandas())
     def test_categorical_ohe(self, rows, cols, rounds, cats):
         self.cputest.run_categorical_ohe(rows, cols, rounds, cats, "gpu_hist")
@@ -85,7 +85,7 @@ class TestGPUUpdaters:
         test_up.cat_parameter_strategy,
         strategies.integers(4, 32),
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     @pytest.mark.skipif(**tm.no_pandas())
     def test_categorical(
         self,
@@ -106,7 +106,7 @@ class TestGPUUpdaters:
         test_up.hist_parameter_strategy,
         test_up.cat_parameter_strategy,
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=10, print_blob=True)
     def test_categorical_ames_housing(
         self,
         hist_parameters: Dict[str, Any],
@@ -125,7 +125,7 @@ class TestGPUUpdaters:
         strategies.integers(3, 8),
         strategies.integers(4, 7)
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     @pytest.mark.skipif(**tm.no_pandas())
     def test_categorical_missing(self, rows, cols, cats):
         self.cputest.run_categorical_missing(rows, cols, cats, "gpu_hist")
@@ -149,7 +149,7 @@ class TestGPUUpdaters:
     @pytest.mark.skipif(**tm.no_cupy())
     @given(parameter_strategy, strategies.integers(1, 20),
            tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
         # We cannot handle empty dataset yet
         assume(len(dataset.y) > 0)
@@ -159,9 +159,9 @@ class TestGPUUpdaters:
         note(result)
         assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)
 
-    @given(parameter_strategy, strategies.integers(1, 20),
+    @given(parameter_strategy, strategies.integers(1, 3),
            tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=10, print_blob=True)
     def test_external_memory(self, param, num_rounds, dataset):
         if dataset.name.endswith("-l1"):
             return
@@ -172,7 +172,6 @@ class TestGPUUpdaters:
         m = dataset.get_external_dmat()
         external_result = train_result(param, m, num_rounds)
         del m
-        gc.collect()
         assert tm.non_increasing(external_result['train'][dataset.metric])
 
     def test_empty_dmatrix_prediction(self):
diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py
index 87d5a651d..227c2a874 100644
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -61,7 +61,7 @@ def test_boost_from_prediction_gpu_hist():
 
 
 def test_num_parallel_tree():
-    twskl.run_calif_housing_rf_regression("gpu_hist")
+    twskl.run_housing_rf_regression("gpu_hist")
 
 
 @pytest.mark.skipif(**tm.no_pandas())
diff --git a/tests/python/test_data_iterator.py b/tests/python/test_data_iterator.py
index f4d424b83..71e0e2f26 100644
--- a/tests/python/test_data_iterator.py
+++ b/tests/python/test_data_iterator.py
@@ -6,6 +6,7 @@ import pytest
 from hypothesis import given, strategies, settings
 from scipy.sparse import csr_matrix
 
+pytestmark = pytest.mark.timeout(30)
 
 def test_single_batch(tree_method: str = "approx") -> None:
     from sklearn.datasets import load_breast_cancer
@@ -134,7 +135,7 @@ def run_data_iterator(
     strategies.integers(0, 13),
     strategies.booleans(),
 )
-@settings(deadline=None, print_blob=True)
+@settings(deadline=None, max_examples=10, print_blob=True)
 def test_data_iterator(
     n_samples_per_batch: int,
     n_features: int,
diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py
index 4c1f1fb38..6dc678446 100644
--- a/tests/python/test_demos.py
+++ b/tests/python/test_demos.py
@@ -4,6 +4,7 @@ import pytest
 import testing as tm
 import sys
 
+pytestmark = pytest.mark.timeout(30)
 
 ROOT_DIR = tm.PROJECT_ROOT
 DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
diff --git a/tests/python/test_linear.py b/tests/python/test_linear.py
index e7574cd03..e1e7fbdf9 100644
--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@@ -1,7 +1,10 @@
 import testing as tm
+import pytest
 from hypothesis import strategies, given, settings, note
 import xgboost as xgb
 
+pytestmark = pytest.mark.timeout(10)
+
 parameter_strategy = strategies.fixed_dictionaries({
     'booster': strategies.just('gblinear'),
     'eta': strategies.floats(0.01, 0.25),
@@ -26,7 +29,7 @@ def train_result(param, dmat, num_rounds):
 class TestLinear:
     @given(parameter_strategy, strategies.integers(10, 50),
            tm.dataset_strategy, coord_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_coordinate(self, param, num_rounds, dataset, coord_param):
         param['updater'] = 'coord_descent'
         param.update(coord_param)
@@ -46,7 +49,7 @@ class TestLinear:
         strategies.floats(1e-5, 0.8),
         strategies.floats(1e-5, 0.8)
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd):
         param['updater'] = 'coord_descent'
         param['alpha'] = alpha
@@ -59,7 +62,7 @@ class TestLinear:
 
     @given(parameter_strategy, strategies.integers(10, 50),
            tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_shotgun(self, param, num_rounds, dataset):
         param['updater'] = 'shotgun'
         param = dataset.set_params(param)
@@ -76,7 +79,7 @@ class TestLinear:
     @given(parameter_strategy, strategies.integers(10, 50),
            tm.dataset_strategy, strategies.floats(1e-5, 1.0),
            strategies.floats(1e-5, 1.0))
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
         param['updater'] = 'shotgun'
         param['alpha'] = alpha
diff --git a/tests/python/test_openmp.py b/tests/python/test_openmp.py
index 8af6ca4b0..847c0c9e2 100644
--- a/tests/python/test_openmp.py
+++ b/tests/python/test_openmp.py
@@ -8,6 +8,7 @@ import pytest
 
 import testing as tm
 
+pytestmark = pytest.mark.timeout(10)
 
 class TestOMP:
     def test_omp(self):
@@ -49,14 +50,15 @@ class TestOMP:
         print('test approx ...')
         param['tree_method'] = 'approx'
 
+        n_trials = 10
         param['nthread'] = 1
-        auc_1, pred_1 = consist_test('approx_thread_1', 100)
+        auc_1, pred_1 = consist_test('approx_thread_1', n_trials)
 
         param['nthread'] = 2
-        auc_2, pred_2 = consist_test('approx_thread_2', 100)
+        auc_2, pred_2 = consist_test('approx_thread_2', n_trials)
 
         param['nthread'] = 3
-        auc_3, pred_3 = consist_test('approx_thread_3', 100)
+        auc_3, pred_3 = consist_test('approx_thread_3', n_trials)
 
         assert auc_1 == auc_2 == auc_3
         assert np.array_equal(auc_1, auc_2)
@@ -66,13 +68,13 @@ class TestOMP:
         param['tree_method'] = 'hist'
 
         param['nthread'] = 1
-        auc_1, pred_1 = consist_test('hist_thread_1', 100)
+        auc_1, pred_1 = consist_test('hist_thread_1', n_trials)
 
         param['nthread'] = 2
-        auc_2, pred_2 = consist_test('hist_thread_2', 100)
+        auc_2, pred_2 = consist_test('hist_thread_2', n_trials)
 
         param['nthread'] = 3
-        auc_3, pred_3 = consist_test('hist_thread_3', 100)
+        auc_3, pred_3 = consist_test('hist_thread_3', n_trials)
 
         assert auc_1 == auc_2 == auc_3
         assert np.array_equal(auc_1, auc_2)
diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index 3894bed4b..d530a572b 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -16,10 +16,7 @@ if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
     pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
 
 from pyspark.ml import Pipeline, PipelineModel
-from pyspark.ml.evaluation import (
-    BinaryClassificationEvaluator,
-    MulticlassClassificationEvaluator,
-)
+from pyspark.ml.evaluation import BinaryClassificationEvaluator
 from pyspark.ml.feature import VectorAssembler
 from pyspark.ml.functions import vector_to_array
 from pyspark.ml.linalg import Vectors
@@ -40,6 +37,8 @@ from .utils import SparkTestCase
 
 logging.getLogger("py4j").setLevel(logging.INFO)
 
+pytestmark = pytest.mark.timeout(60)
+
 
 class XgboostLocalTest(SparkTestCase):
     def setUp(self):
@@ -711,17 +710,10 @@ class XgboostLocalTest(SparkTestCase):
             estimatorParamMaps=paramMaps,
             evaluator=BinaryClassificationEvaluator(),
             seed=1,
+            numFolds=2,
         )
         cvBinModel = cvBin.fit(self.cls_df_train_large)
         cvBinModel.transform(self.cls_df_test)
-        cvMulti = CrossValidator(
-            estimator=xgb_classifer,
-            estimatorParamMaps=paramMaps,
-            evaluator=MulticlassClassificationEvaluator(),
-            seed=1,
-        )
-        cvMultiModel = cvMulti.fit(self.multi_cls_df_train_large)
-        cvMultiModel.transform(self.multi_cls_df_test)
 
     def test_callbacks(self):
         from xgboost.callback import LearningRateScheduler
@@ -889,35 +881,6 @@ class XgboostLocalTest(SparkTestCase):
             )
 
     def test_classifier_with_weight_eval(self):
-        # with weight
-        classifier_with_weight = SparkXGBClassifier(weight_col="weight")
-        model_with_weight = classifier_with_weight.fit(
-            self.cls_df_train_with_eval_weight
-        )
-        pred_result_with_weight = model_with_weight.transform(
-            self.cls_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_weight:
-            self.assertTrue(
-                np.allclose(row.probability, row.expected_prob_with_weight, atol=1e-3)
-            )
-        # with eval
-        classifier_with_eval = SparkXGBClassifier(**self.cls_params_with_eval)
-        model_with_eval = classifier_with_eval.fit(self.cls_df_train_with_eval_weight)
-        self.assertTrue(
-            np.isclose(
-                model_with_eval._xgb_sklearn_model.best_score,
-                self.cls_with_eval_best_score,
-                atol=1e-3,
-            )
-        )
-        pred_result_with_eval = model_with_eval.transform(
-            self.cls_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_eval:
-            self.assertTrue(
-                np.allclose(row.probability, row.expected_prob_with_eval, atol=1e-3)
-            )
         # with weight and eval
         # Added scale_pos_weight because in 1.4.2, the original answer returns 0.5 which
         # doesn't really indicate this working correctly.
diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index bdd432a75..3b8df9996 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -44,6 +44,8 @@ from xgboost.dask import DaskDMatrix
 
 dask.config.set({"distributed.scheduler.allowed-failures": False})
 
+pytestmark = pytest.mark.timeout(30)
+
 if hasattr(HealthCheck, 'function_scoped_fixture'):
     suppress = [HealthCheck.function_scoped_fixture]
 else:
@@ -381,7 +383,7 @@ def test_categorical(client: "Client") -> None:
 
 
 def test_dask_predict_shape_infer(client: "Client") -> None:
-    X, y = make_classification(n_samples=1000, n_informative=5, n_classes=3)
+    X, y = make_classification(n_samples=kRows, n_informative=5, n_classes=3)
     X_ = dd.from_array(X, chunksize=100)
     y_ = dd.from_array(y, chunksize=100)
     dtrain = xgb.dask.DaskDMatrix(client, data=X_, label=y_)
@@ -522,8 +524,8 @@ def test_boost_from_prediction(tree_method: str, client: "Client") -> None:
 
 
 def test_inplace_predict(client: "Client") -> None:
-    from sklearn.datasets import fetch_california_housing
-    X_, y_ = fetch_california_housing(return_X_y=True)
+    from sklearn.datasets import load_diabetes
+    X_, y_ = load_diabetes(return_X_y=True)
     X, y = dd.from_array(X_, chunksize=32), dd.from_array(y_, chunksize=32)
     reg = xgb.dask.DaskXGBRegressor(n_estimators=4).fit(X, y)
     booster = reg.get_booster()
@@ -841,7 +843,7 @@ def run_empty_dmatrix_cls(client: "Client", parameters: dict) -> None:
 def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) -> None:
     from sklearn import datasets
     n_samples = 100
-    n_features = 97
+    n_features = 7
     rng = np.random.RandomState(1994)
 
     make_classification = partial(
@@ -894,9 +896,9 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
 
 
 def test_empty_dmatrix_auc() -> None:
-    with LocalCluster(n_workers=8, dashboard_address=":0") as cluster:
+    with LocalCluster(n_workers=4, dashboard_address=":0") as cluster:
         with Client(cluster) as client:
-            run_empty_dmatrix_auc(client, "hist", 8)
+            run_empty_dmatrix_auc(client, "hist", 4)
 
 
 def run_auc(client: "Client", tree_method: str) -> None:
@@ -1033,7 +1035,7 @@ async def run_dask_classifier_asyncio(scheduler_address: str) -> None:
 
 
 def test_with_asyncio() -> None:
-    with LocalCluster(dashboard_address=":0") as cluster:
+    with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
         with Client(cluster) as client:
             address = client.scheduler.address
             output = asyncio.run(run_from_dask_array_asyncio(address))
@@ -1420,11 +1422,11 @@ class TestWithDask:
 
     @given(params=hist_parameter_strategy,
            dataset=tm.dataset_strategy)
-    @settings(deadline=None, suppress_health_check=suppress, print_blob=True)
+    @settings(deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True)
     def test_hist(
             self, params: Dict, dataset: tm.TestDataset, client: "Client"
     ) -> None:
-        num_rounds = 30
+        num_rounds = 10
         self.run_updater_test(client, params, num_rounds, dataset, 'hist')
 
     def test_quantile_dmatrix(self, client: Client) -> None:
@@ -1465,11 +1467,11 @@ class TestWithDask:
 
     @given(params=exact_parameter_strategy,
            dataset=tm.dataset_strategy)
-    @settings(deadline=None, suppress_health_check=suppress, print_blob=True)
+    @settings(deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True)
     def test_approx(
         self, client: "Client", params: Dict, dataset: tm.TestDataset
     ) -> None:
-        num_rounds = 30
+        num_rounds = 10
         self.run_updater_test(client, params, num_rounds, dataset, 'approx')
 
     def run_quantile(self, name: str) -> None:
@@ -1773,16 +1775,16 @@ class TestWithDask:
         assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
 
     def test_shap(self, client: "Client") -> None:
-        from sklearn.datasets import fetch_california_housing, load_digits
-        X, y = fetch_california_housing(return_X_y=True)
+        from sklearn.datasets import load_diabetes, load_iris
+        X, y = load_diabetes(return_X_y=True)
         params: Dict[str, Any] = {'objective': 'reg:squarederror'}
         self.run_shap(X, y, params, client)
 
-        X, y = load_digits(return_X_y=True)
-        params = {'objective': 'multi:softmax', 'num_class': 10}
+        X, y = load_iris(return_X_y=True)
+        params = {'objective': 'multi:softmax', 'num_class': 3}
         self.run_shap(X, y, params, client)
 
-        params = {'objective': 'multi:softprob', 'num_class': 10}
+        params = {'objective': 'multi:softprob', 'num_class': 3}
         self.run_shap(X, y, params, client)
 
         self.run_shap_cls_sklearn(X, y, client)
@@ -1818,8 +1820,8 @@ class TestWithDask:
                            1e-5, 1e-5)
 
     def test_shap_interactions(self, client: "Client") -> None:
-        from sklearn.datasets import fetch_california_housing
-        X, y = fetch_california_housing(return_X_y=True)
+        from sklearn.datasets import load_diabetes
+        X, y = load_diabetes(return_X_y=True)
         params = {'objective': 'reg:squarederror'}
         self.run_shap_interactions(X, y, params, client)
 
diff --git a/tests/python/test_with_modin.py b/tests/python/test_with_modin.py
index 0ed59d3f8..4932d1c1f 100644
--- a/tests/python/test_with_modin.py
+++ b/tests/python/test_with_modin.py
@@ -14,10 +14,6 @@ except ImportError:
 pytestmark = pytest.mark.skipif(**tm.no_modin())
 
 
-dpath = 'demo/data/'
-rng = np.random.RandomState(1994)
-
-
 class TestModin:
     @pytest.mark.xfail
     def test_modin(self):
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 7edc392f0..62c51f5b7 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -12,7 +12,7 @@ import json
 
 rng = np.random.RandomState(1994)
 
-pytestmark = pytest.mark.skipif(**tm.no_sklearn())
+pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), pytest.mark.timeout(30)]
 
 from sklearn.utils.estimator_checks import parametrize_with_checks
 
@@ -328,10 +328,10 @@ def test_select_feature():
 
 
 def test_num_parallel_tree():
-    from sklearn.datasets import fetch_california_housing
+    from sklearn.datasets import load_diabetes
 
     reg = xgb.XGBRegressor(n_estimators=4, num_parallel_tree=4, tree_method="hist")
-    X, y = fetch_california_housing(return_X_y=True)
+    X, y = load_diabetes(return_X_y=True)
     bst = reg.fit(X=X, y=y)
     dump = bst.get_booster().get_dump(dump_format="json")
     assert len(dump) == 16
@@ -352,7 +352,7 @@ def test_num_parallel_tree():
     )
 
 
-def test_calif_housing_regression():
+def test_regression():
     from sklearn.metrics import mean_squared_error
     from sklearn.datasets import fetch_california_housing
     from sklearn.model_selection import KFold
@@ -381,7 +381,7 @@ def test_calif_housing_regression():
             xgb_model.feature_names_in_
 
 
-def run_calif_housing_rf_regression(tree_method):
+def run_housing_rf_regression(tree_method):
     from sklearn.metrics import mean_squared_error
     from sklearn.datasets import fetch_california_housing
     from sklearn.model_selection import KFold
@@ -401,8 +401,8 @@ def run_calif_housing_rf_regression(tree_method):
         rfreg.fit(X, y, early_stopping_rounds=10)
 
 
-def test_calif_housing_rf_regression():
-    run_calif_housing_rf_regression("hist")
+def test_rf_regression():
+    run_housing_rf_regression("hist")
 
 
 def test_parameter_tuning():
@@ -411,9 +411,9 @@ def test_parameter_tuning():
 
     X, y = fetch_california_housing(return_X_y=True)
     xgb_model = xgb.XGBRegressor(learning_rate=0.1)
-    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
-                                   'n_estimators': [50, 100, 200]},
-                       cv=3, verbose=1)
+    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4],
+                                   'n_estimators': [50, 200]},
+                       cv=2, verbose=1)
     clf.fit(X, y)
     assert clf.best_score_ < 0.7
     assert clf.best_params_ == {'n_estimators': 200, 'max_depth': 4}
@@ -840,13 +840,13 @@ def test_save_load_model():
 
 
 def test_RFECV():
-    from sklearn.datasets import fetch_california_housing
+    from sklearn.datasets import load_diabetes
     from sklearn.datasets import load_breast_cancer
     from sklearn.datasets import load_iris
     from sklearn.feature_selection import RFECV
 
     # Regression
-    X, y = fetch_california_housing(return_X_y=True)
+    X, y = load_diabetes(return_X_y=True)
     bst = xgb.XGBRegressor(booster='gblinear', learning_rate=0.1,
                            n_estimators=10,
                            objective='reg:squarederror',
@@ -861,7 +861,7 @@ def test_RFECV():
                             n_estimators=10,
                             objective='binary:logistic',
                             random_state=0, verbosity=0)
-    rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='roc_auc')
+    rfecv = RFECV(estimator=bst, step=0.5, cv=3, scoring='roc_auc')
     rfecv.fit(X, y)
 
     # Multi-class classification
@@ -872,7 +872,7 @@ def test_RFECV():
                             objective='multi:softprob',
                             random_state=0, reg_alpha=0.001, reg_lambda=0.01,
                             scale_pos_weight=0.5, verbosity=0)
-    rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_log_loss')
+    rfecv = RFECV(estimator=bst, step=0.5, cv=3, scoring='neg_log_loss')
     rfecv.fit(X, y)
 
     X[0:4, :] = np.nan          # verify scikit_learn doesn't throw with nan
@@ -881,7 +881,7 @@ def test_RFECV():
     rfecv.fit(X, y)
 
     cls = xgb.XGBClassifier()
-    rfecv = RFECV(estimator=cls, step=1, cv=3,
+    rfecv = RFECV(estimator=cls, step=0.5, cv=3,
                   scoring='neg_mean_squared_error')
     rfecv.fit(X, y)
 
@@ -1155,7 +1155,7 @@ def run_boost_from_prediction_multi_clasas(
 
 @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
 def test_boost_from_prediction(tree_method):
-    from sklearn.datasets import load_breast_cancer, load_digits, make_regression
+    from sklearn.datasets import load_breast_cancer, load_iris, make_regression
     import pandas as pd
 
     X, y = load_breast_cancer(return_X_y=True)
@@ -1163,7 +1163,7 @@ def test_boost_from_prediction(tree_method):
     run_boost_from_prediction_binary(tree_method, X, y, None)
     run_boost_from_prediction_binary(tree_method, X, y, pd.DataFrame)
 
-    X, y = load_digits(return_X_y=True)
+    X, y = load_iris(return_X_y=True)
 
     run_boost_from_prediction_multi_clasas(xgb.XGBClassifier, tree_method, X, y, None)
     run_boost_from_prediction_multi_clasas(