diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 5a873ab88..751b5a1c4 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -65,7 +65,7 @@ def _check_rf_callback(
         )
 
 
-_SklObjective = Optional[
+SklObjective = Optional[
     Union[str, Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]]
 ]
 
@@ -144,7 +144,7 @@ __model_doc = f"""
         Boosting learning rate (xgb's "eta")
     verbosity : Optional[int]
         The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
-    objective : {_SklObjective}
+    objective : {SklObjective}
         Specify the learning task and the corresponding learning objective or
         a custom objective function to be used (see note below).
     booster: Optional[str]
@@ -546,7 +546,7 @@ class XGBModel(XGBModelBase):
         learning_rate: Optional[float] = None,
         n_estimators: int = 100,
         verbosity: Optional[int] = None,
-        objective: _SklObjective = None,
+        objective: SklObjective = None,
         booster: Optional[str] = None,
         tree_method: Optional[str] = None,
         n_jobs: Optional[int] = None,
@@ -1409,7 +1409,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
     def __init__(
         self,
         *,
-        objective: _SklObjective = "binary:logistic",
+        objective: SklObjective = "binary:logistic",
         use_label_encoder: Optional[bool] = None,
         **kwargs: Any,
     ) -> None:
@@ -1712,7 +1712,7 @@ class XGBRegressor(XGBModel, XGBRegressorBase):
     # pylint: disable=missing-docstring
     @_deprecate_positional_args
     def __init__(
-        self, *, objective: _SklObjective = "reg:squarederror", **kwargs: Any
+        self, *, objective: SklObjective = "reg:squarederror", **kwargs: Any
     ) -> None:
         super().__init__(objective=objective, **kwargs)
 
diff --git a/python-package/xgboost/testing.py b/python-package/xgboost/testing.py
deleted file mode 100644
index 598ddac5b..000000000
--- a/python-package/xgboost/testing.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""Utilities for defining Python tests."""
-
-import socket
-from platform import system
-from typing import Any, TypedDict
-
-PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
-
-
-def has_ipv6() -> bool:
-    """Check whether IPv6 is enabled on this host."""
-    # connection error in macos, still need some fixes.
-    if system() not in ("Linux", "Windows"):
-        return False
-
-    if socket.has_ipv6:
-        try:
-            with socket.socket(
-                socket.AF_INET6, socket.SOCK_STREAM
-            ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
-                server.bind(("::1", 0))
-                port = server.getsockname()[1]
-                server.listen()
-
-                client.connect(("::1", port))
-                conn, _ = server.accept()
-
-                client.sendall("abc".encode())
-                msg = conn.recv(3).decode()
-                # if the code can be executed to this point, the message should be
-                # correct.
-                assert msg == "abc"
-            return True
-        except OSError:
-            pass
-    return False
-
-
-def skip_ipv6() -> PytestSkip:
-    """PyTest skip mark for IPv6."""
-    return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
-
-
-def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
-    """Make a pytest mark for the `pytest-timeout` package.
-
-    Parameters
-    ----------
-    sec :
-        Timeout seconds.
-    enable :
-        Control whether timeout should be applied, used for debugging.
-
-    Returns
-    -------
-    pytest.mark.timeout
-    """
-    import pytest  # pylint: disable=import-error
-
-    # This is disabled for now due to regression caused by conflicts between federated
-    # learning build and the CI container environment.
-    if enable:
-        return pytest.mark.timeout(sec, *args, **kwargs)
-    return pytest.mark.timeout(None, *args, **kwargs)
diff --git a/tests/python/testing.py b/python-package/xgboost/testing/__init__.py
similarity index 64%
rename from tests/python/testing.py
rename to python-package/xgboost/testing/__init__.py
index 63d33de97..99ff72423 100644
--- a/tests/python/testing.py
+++ b/python-package/xgboost/testing/__init__.py
@@ -1,192 +1,190 @@
-from concurrent.futures import ThreadPoolExecutor
-import os
+"""Utilities for defining Python tests. The module is private and subject to frequent
+change without notice.
+
+"""
+# pylint: disable=invalid-name,missing-function-docstring,import-error
+import gc
+import importlib.util
 import multiprocessing
-from typing import Tuple, Union, List, Sequence, Callable
+import os
+import platform
+import socket
+import sys
 import urllib
 import zipfile
-import sys
-from typing import Optional, Dict, Any
+from concurrent.futures import ThreadPoolExecutor
 from contextlib import contextmanager
 from io import StringIO
-from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
-import pytest
-import gc
-import xgboost as xgb
-from xgboost.core import ArrayLike
-import numpy as np
-from scipy import sparse
-import platform
+from platform import system
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    TypedDict,
+    Union,
+)
 
-hypothesis = pytest.importorskip('hypothesis')
-sklearn = pytest.importorskip('sklearn')
+import numpy as np
+import pytest
+from scipy import sparse
+from xgboost.core import ArrayLike
+from xgboost.sklearn import SklObjective
+
+import xgboost as xgb
+
+hypothesis = pytest.importorskip("hypothesis")
+
+# pylint:disable=wrong-import-position,wrong-import-order
 from hypothesis import strategies
 from hypothesis.extra.numpy import arrays
-from joblib import Memory
-from sklearn import datasets
 
-try:
-    import cupy as cp
-except ImportError:
-    cp = None
+joblib = pytest.importorskip("joblib")
+datasets = pytest.importorskip("sklearn.datasets")
 
-memory = Memory('./cachedir', verbose=0)
+Memory = joblib.Memory
+
+memory = Memory("./cachedir", verbose=0)
+
+PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
 
 
-def no_ubjson():
-    reason = "ubjson is not intsalled."
-    try:
-        import ubjson           # noqa
-        return {"condition": False, "reason": reason}
-    except ImportError:
-        return {"condition": True, "reason": reason}
+def has_ipv6() -> bool:
+    """Check whether IPv6 is enabled on this host."""
+    # connection error in macos, still need some fixes.
+    if system() not in ("Linux", "Windows"):
+        return False
+
+    if socket.has_ipv6:
+        try:
+            with socket.socket(
+                socket.AF_INET6, socket.SOCK_STREAM
+            ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
+                server.bind(("::1", 0))
+                port = server.getsockname()[1]
+                server.listen()
+
+                client.connect(("::1", port))
+                conn, _ = server.accept()
+
+                client.sendall("abc".encode())
+                msg = conn.recv(3).decode()
+                # if the code can be executed to this point, the message should be
+                # correct.
+                assert msg == "abc"
+            return True
+        except OSError:
+            pass
+    return False
 
 
-def no_sklearn():
-    return {'condition': not SKLEARN_INSTALLED,
-            'reason': 'Scikit-Learn is not installed'}
+def no_mod(name: str) -> PytestSkip:
+    spec = importlib.util.find_spec(name)
+    return {"condition": spec is None, "reason": f"{name} is not installed."}
 
 
-def no_dask():
-    try:
-        import pkg_resources
-
-        pkg_resources.get_distribution("dask")
-        DASK_INSTALLED = True
-    except pkg_resources.DistributionNotFound:
-        DASK_INSTALLED = False
-    return {"condition": not DASK_INSTALLED, "reason": "Dask is not installed"}
+def no_ipv6() -> PytestSkip:
+    """PyTest skip mark for IPv6."""
+    return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
 
 
-def no_spark():
-    try:
-        import pyspark          # noqa
-        SPARK_INSTALLED = True
-    except ImportError:
-        SPARK_INSTALLED = False
-    return {"condition": not SPARK_INSTALLED, "reason": "Spark is not installed"}
+def no_ubjson() -> PytestSkip:
+    return no_mod("ubjson")
 
 
-def no_pandas():
-    return {'condition': not PANDAS_INSTALLED,
-            'reason': 'Pandas is not installed.'}
+def no_sklearn() -> PytestSkip:
+    return no_mod("sklearn")
 
 
-def no_arrow():
-    reason = "pyarrow is not installed"
-    try:
-        import pyarrow  # noqa
-        return {"condition": False, "reason": reason}
-    except ImportError:
-        return {"condition": True, "reason": reason}
+def no_dask() -> PytestSkip:
+    return no_mod("dask")
 
 
-def no_modin():
-    reason = 'Modin is not installed.'
-    try:
-        import modin.pandas as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_spark() -> PytestSkip:
+    return no_mod("pyspark")
 
 
-def no_dt():
-    import importlib.util
-    spec = importlib.util.find_spec('datatable')
-    return {'condition': spec is None,
-            'reason': 'Datatable is not installed.'}
+def no_pandas() -> PytestSkip:
+    return no_mod("pandas")
 
 
-def no_matplotlib():
-    reason = 'Matplotlib is not installed.'
+def no_arrow() -> PytestSkip:
+    return no_mod("pyarrow")
+
+
+def no_modin() -> PytestSkip:
+    return no_mod("modin")
+
+
+def no_dt() -> PytestSkip:
+    return no_mod("datatable")
+
+
+def no_matplotlib() -> PytestSkip:
+    reason = "Matplotlib is not installed."
     try:
         import matplotlib.pyplot as _  # noqa
-        return {'condition': False,
-                'reason': reason}
+
+        return {"condition": False, "reason": reason}
     except ImportError:
-        return {'condition': True,
-                'reason': reason}
+        return {"condition": True, "reason": reason}
 
 
-def no_dask_cuda():
-    reason = 'dask_cuda is not installed.'
-    try:
-        import dask_cuda as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_dask_cuda() -> PytestSkip:
+    return no_mod("dask_cuda")
 
 
-def no_cudf():
-    try:
-        import cudf  # noqa
-        CUDF_INSTALLED = True
-    except ImportError:
-        CUDF_INSTALLED = False
-
-    return {'condition': not CUDF_INSTALLED,
-            'reason': 'CUDF is not installed'}
+def no_cudf() -> PytestSkip:
+    return no_mod("cudf")
 
 
-def no_cupy():
-    reason = 'cupy is not installed.'
-    try:
-        import cupy as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_cupy() -> PytestSkip:
+    return no_mod("cupy")
 
 
-def no_dask_cudf():
-    reason = 'dask_cudf is not installed.'
-    try:
-        import dask_cudf as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_dask_cudf() -> PytestSkip:
+    return no_mod("dask_cudf")
 
 
-def no_json_schema():
-    reason = 'jsonschema is not installed'
-    try:
-        import jsonschema  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_json_schema() -> PytestSkip:
+    return no_mod("jsonschema")
 
 
-def no_graphviz():
-    reason = 'graphviz is not installed'
-    try:
-        import graphviz  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_graphviz() -> PytestSkip:
+    return no_mod("graphviz")
 
 
-def no_multiple(*args):
+def no_multiple(*args: Any) -> PytestSkip:
     condition = False
-    reason = ''
+    reason = ""
     for arg in args:
-        condition = (condition or arg['condition'])
-        if arg['condition']:
-            reason = arg['reason']
+        condition = condition or arg["condition"]
+        if arg["condition"]:
+            reason = arg["reason"]
             break
-    return {'condition': condition, 'reason': reason}
+    return {"condition": condition, "reason": reason}
 
 
-def skip_s390x():
+def skip_s390x() -> PytestSkip:
     condition = platform.machine() == "s390x"
     reason = "Known to fail on s390x"
     return {"condition": condition, "reason": reason}
 
 
 class IteratorForTest(xgb.core.DataIter):
+    """Iterator for testing streaming DMatrix. (external memory, quantile)"""
+
     def __init__(
         self,
         X: Sequence,
         y: Sequence,
         w: Optional[Sequence],
-        cache: Optional[str] = "./"
+        cache: Optional[str] = "./",
     ) -> None:
         assert len(X) == len(y)
         self.X = X
@@ -242,7 +240,7 @@ def make_batches(
         rng = cupy.random.RandomState(1994)
     else:
         rng = np.random.RandomState(1994)
-    for i in range(n_batches):
+    for _ in range(n_batches):
         _X = rng.randn(n_samples_per_batch, n_features)
         _y = rng.randn(n_samples_per_batch)
         _w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
@@ -259,7 +257,7 @@ def make_batches_sparse(
     y = []
     w = []
     rng = np.random.RandomState(1994)
-    for i in range(n_batches):
+    for _ in range(n_batches):
         _X = sparse.random(
             n_samples_per_batch,
             n_features,
@@ -276,8 +274,9 @@ def make_batches_sparse(
     return X, y, w
 
 
-# Contains a dataset in numpy format as well as the relevant objective and metric
 class TestDataset:
+    """Contains a dataset in numpy format as well as the relevant objective and metric."""
+
     def __init__(
         self, name: str, get_dataset: Callable, objective: str, metric: str
     ) -> None:
@@ -289,18 +288,24 @@ class TestDataset:
         self.margin: Optional[np.ndarray] = None
 
     def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]:
-        params_in['objective'] = self.objective
-        params_in['eval_metric'] = self.metric
+        params_in["objective"] = self.objective
+        params_in["eval_metric"] = self.metric
         if self.objective == "multi:softmax":
             params_in["num_class"] = int(np.max(self.y) + 1)
         return params_in
 
     def get_dmat(self) -> xgb.DMatrix:
         return xgb.DMatrix(
-            self.X, self.y, self.w, base_margin=self.margin, enable_categorical=True
+            self.X,
+            self.y,
+            weight=self.w,
+            base_margin=self.margin,
+            enable_categorical=True,
         )
 
     def get_device_dmat(self) -> xgb.DeviceQuantileDMatrix:
+        import cupy as cp
+
         w = None if self.w is None else cp.array(self.w)
         X = cp.array(self.X, dtype=np.float32)
         y = cp.array(self.y, dtype=np.float32)
@@ -318,9 +323,9 @@ class TestDataset:
             beg = i * per_batch
             end = min((i + 1) * per_batch, n_samples)
             assert end != beg
-            X = self.X[beg: end, ...]
-            y = self.y[beg: end]
-            w = self.w[beg: end] if self.w is not None else None
+            X = self.X[beg:end, ...]
+            y = self.y[beg:end]
+            w = self.w[beg:end] if self.w is not None else None
             predictor.append(X)
             response.append(y)
             if w is not None:
@@ -334,25 +339,24 @@ class TestDataset:
 
 
 @memory.cache
-def get_california_housing():
+def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
     data = datasets.fetch_california_housing()
     return data.data, data.target
 
 
 @memory.cache
-def get_digits():
+def get_digits() -> Tuple[np.ndarray, np.ndarray]:
     data = datasets.load_digits()
     return data.data, data.target
 
 
 @memory.cache
-def get_cancer():
-    data = datasets.load_breast_cancer()
-    return data.data, data.target
+def get_cancer() -> Tuple[np.ndarray, np.ndarray]:
+    return datasets.load_breast_cancer(return_X_y=True)
 
 
 @memory.cache
-def get_sparse():
+def get_sparse() -> Tuple[np.ndarray, np.ndarray]:
     rng = np.random.RandomState(199)
     n = 2000
     sparsity = 0.75
@@ -366,7 +370,7 @@ def get_sparse():
 
 
 @memory.cache
-def get_ames_housing():
+def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]:
     """
     Number of samples: 1460
     Number of features: 20
@@ -374,22 +378,23 @@ def get_ames_housing():
     Number of numerical features: 10
     """
     from sklearn.datasets import fetch_openml
+
     X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True)
 
-    categorical_columns_subset: list[str] = [
-        "BldgType",             # 5 cats, no nan
-        "GarageFinish",         # 3 cats, nan
-        "LotConfig",            # 5 cats, no nan
-        "Functional",           # 7 cats, no nan
-        "MasVnrType",           # 4 cats, nan
-        "HouseStyle",           # 8 cats, no nan
-        "FireplaceQu",          # 5 cats, nan
-        "ExterCond",            # 5 cats, no nan
-        "ExterQual",            # 4 cats, no nan
-        "PoolQC",               # 3 cats, nan
+    categorical_columns_subset: List[str] = [
+        "BldgType",  # 5 cats, no nan
+        "GarageFinish",  # 3 cats, nan
+        "LotConfig",  # 5 cats, no nan
+        "Functional",  # 7 cats, no nan
+        "MasVnrType",  # 4 cats, nan
+        "HouseStyle",  # 8 cats, no nan
+        "FireplaceQu",  # 5 cats, nan
+        "ExterCond",  # 5 cats, no nan
+        "ExterQual",  # 4 cats, no nan
+        "PoolQC",  # 3 cats, nan
     ]
 
-    numerical_columns_subset: list[str] = [
+    numerical_columns_subset: List[str] = [
         "3SsnPorch",
         "Fireplaces",
         "BsmtHalfBath",
@@ -408,32 +413,70 @@ def get_ames_housing():
 
 
 @memory.cache
-def get_mq2008(dpath):
+def get_mq2008(
+    dpath: str,
+) -> Tuple[
+    sparse.csr_matrix,
+    np.ndarray,
+    np.ndarray,
+    sparse.csr_matrix,
+    np.ndarray,
+    np.ndarray,
+    sparse.csr_matrix,
+    np.ndarray,
+    np.ndarray,
+]:
     from sklearn.datasets import load_svmlight_files
 
-    src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
-    target = dpath + '/MQ2008.zip'
+    src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
+    target = dpath + "/MQ2008.zip"
     if not os.path.exists(target):
         urllib.request.urlretrieve(url=src, filename=target)
 
-    with zipfile.ZipFile(target, 'r') as f:
+    with zipfile.ZipFile(target, "r") as f:
         f.extractall(path=dpath)
 
-    (x_train, y_train, qid_train, x_test, y_test, qid_test,
-     x_valid, y_valid, qid_valid) = load_svmlight_files(
-         (dpath + "MQ2008/Fold1/train.txt",
-          dpath + "MQ2008/Fold1/test.txt",
-          dpath + "MQ2008/Fold1/vali.txt"),
-         query_id=True, zero_based=False)
+    (
+        x_train,
+        y_train,
+        qid_train,
+        x_test,
+        y_test,
+        qid_test,
+        x_valid,
+        y_valid,
+        qid_valid,
+    ) = load_svmlight_files(
+        (
+            dpath + "MQ2008/Fold1/train.txt",
+            dpath + "MQ2008/Fold1/test.txt",
+            dpath + "MQ2008/Fold1/vali.txt",
+        ),
+        query_id=True,
+        zero_based=False,
+    )
 
-    return (x_train, y_train, qid_train, x_test, y_test, qid_test,
-            x_valid, y_valid, qid_valid)
+    return (
+        x_train,
+        y_train,
+        qid_train,
+        x_test,
+        y_test,
+        qid_test,
+        x_valid,
+        y_valid,
+        qid_valid,
+    )
 
 
 @memory.cache
 def make_categorical(
-    n_samples: int, n_features: int, n_categories: int, onehot: bool, sparsity=0.0,
-):
+    n_samples: int,
+    n_features: int,
+    n_categories: int,
+    onehot: bool,
+    sparsity: float = 0.0,
+) -> Tuple[ArrayLike, np.ndarray]:
     import pandas as pd
 
     rng = np.random.RandomState(1994)
@@ -457,7 +500,9 @@ def make_categorical(
 
     if sparsity > 0.0:
         for i in range(n_features):
-            index = rng.randint(low=0, high=n_samples-1, size=int(n_samples * sparsity))
+            index = rng.randint(
+                low=0, high=n_samples - 1, size=int(n_samples * sparsity)
+            )
             df.iloc[index, i] = np.NaN
             assert n_categories == np.unique(df.dtypes[i].categories).size
 
@@ -466,9 +511,9 @@ def make_categorical(
     return df, label
 
 
-def _cat_sampled_from():
+def _cat_sampled_from() -> strategies.SearchStrategy:
     @strategies.composite
-    def _make_cat(draw):
+    def _make_cat(draw: Callable) -> Tuple[int, int, int, float]:
         n_samples = draw(strategies.integers(2, 512))
         n_features = draw(strategies.integers(1, 4))
         n_cats = draw(strategies.integers(1, 128))
@@ -483,7 +528,7 @@ def _cat_sampled_from():
         )
         return n_samples, n_features, n_cats, sparsity
 
-    def _build(args):
+    def _build(args: Tuple[int, int, int, float]) -> TestDataset:
         n_samples = args[0]
         n_features = args[1]
         n_cats = args[2]
@@ -495,12 +540,13 @@ def _cat_sampled_from():
             "rmse",
         )
 
-    return _make_cat().map(_build)
+    return _make_cat().map(_build)  # pylint: disable=no-member
 
 
-categorical_dataset_strategy = _cat_sampled_from()
+categorical_dataset_strategy: strategies.SearchStrategy = _cat_sampled_from()
 
 
+# pylint: disable=too-many-locals
 @memory.cache
 def make_sparse_regression(
     n_samples: int, n_features: int, sparsity: float, as_dense: bool
@@ -530,8 +576,7 @@ def make_sparse_regression(
 
     # Use multi-thread to speed up the generation, convenient if you use this function
     # for benchmarking.
-    n_threads = multiprocessing.cpu_count()
-    n_threads = min(n_threads, n_features)
+    n_threads = min(multiprocessing.cpu_count(), n_features)
 
     def random_csc(t_id: int) -> sparse.csc_matrix:
         rng = np.random.default_rng(1994 * t_id)
@@ -653,7 +698,7 @@ _unweighted_datasets_strategy = strategies.sampled_from(
 
 
 @strategies.composite
-def _dataset_weight_margin(draw):
+def _dataset_weight_margin(draw: Callable) -> TestDataset:
     data: TestDataset = draw(_unweighted_datasets_strategy)
     if draw(strategies.booleans()):
         data.w = draw(
@@ -673,6 +718,7 @@ def _dataset_weight_margin(draw):
                 elements=strategies.floats(0.5, 1.0),
             )
         )
+        assert data.margin is not None
         if num_class != 1:
             data.margin = data.margin.reshape(data.y.shape[0], num_class)
 
@@ -684,24 +730,24 @@ def _dataset_weight_margin(draw):
 dataset_strategy = _dataset_weight_margin()
 
 
-def non_increasing(L, tolerance=1e-4):
+def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:
     return all((y - x) < tolerance for x, y in zip(L, L[1:]))
 
 
-def eval_error_metric(predt, dtrain: xgb.DMatrix):
+def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
     """Evaluation metric for xgb.train"""
     label = dtrain.get_label()
     r = np.zeros(predt.shape)
     gt = predt > 0.5
     if predt.size == 0:
-        return "CustomErr", 0
+        return "CustomErr", np.float64(0.0)
     r[gt] = 1 - label[gt]
     le = predt <= 0.5
     r[le] = label[le]
-    return 'CustomErr', np.sum(r)
+    return "CustomErr", np.sum(r)
 
 
-def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> float:
+def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> np.float64:
     """Evaluation metric that looks like metrics provided by sklearn."""
     r = np.zeros(y_score.shape)
     gt = y_score > 0.5
@@ -717,13 +763,15 @@ def root_mean_square(y_true: np.ndarray, y_score: np.ndarray) -> float:
     return rmse
 
 
-def softmax(x):
+def softmax(x: np.ndarray) -> np.ndarray:
     e = np.exp(x)
     return e / np.sum(e)
 
 
-def softprob_obj(classes):
-    def objective(labels, predt):
+def softprob_obj(classes: int) -> SklObjective:
+    def objective(
+        labels: np.ndarray, predt: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray]:
         rows = labels.shape[0]
         grad = np.zeros((rows, classes), dtype=float)
         hess = np.zeros((rows, classes), dtype=float)
@@ -746,29 +794,33 @@ def softprob_obj(classes):
 
 
 class DirectoryExcursion:
-    def __init__(self, path: os.PathLike, cleanup=False):
-        '''Change directory.  Change back and optionally cleaning up the directory when exit.
+    """Change directory.  Change back and optionally cleaning up the directory when
+    exit.
 
-        '''
+    """
+
+    def __init__(self, path: os.PathLike, cleanup: bool = False):
         self.path = path
         self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))
         self.cleanup = cleanup
-        self.files = {}
+        self.files: Set[str] = set()
 
-    def __enter__(self):
+    def __enter__(self) -> None:
         os.chdir(self.path)
         if self.cleanup:
             self.files = {
                 os.path.join(root, f)
-                for root, subdir, files in os.walk(self.path) for f in files
+                for root, subdir, files in os.walk(os.path.expanduser(self.path))
+                for f in files
             }
 
-    def __exit__(self, *args):
+    def __exit__(self, *args: Any) -> None:
         os.chdir(self.curdir)
         if self.cleanup:
             files = {
                 os.path.join(root, f)
-                for root, subdir, files in os.walk(self.path) for f in files
+                for root, subdir, files in os.walk(os.path.expanduser(self.path))
+                for f in files
             }
             diff = files.difference(self.files)
             for f in diff:
@@ -776,7 +828,7 @@ class DirectoryExcursion:
 
 
 @contextmanager
-def captured_output():
+def captured_output() -> Generator[Tuple[StringIO, StringIO], None, None]:
     """Reassign stdout temporarily in order to test printed statements
     Taken from:
     https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
@@ -793,14 +845,46 @@ def captured_output():
         sys.stdout, sys.stderr = old_out, old_err
 
 
-try:
-    # Python 3.7+
-    from contextlib import nullcontext as noop_context
-except ImportError:
-    # Python 3.6
-    from contextlib import suppress as noop_context
+def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
+    """Make a pytest mark for the `pytest-timeout` package.
+
+    Parameters
+    ----------
+    sec :
+        Timeout seconds.
+    enable :
+        Control whether timeout should be applied, used for debugging.
+
+    Returns
+    -------
+    pytest.mark.timeout
+    """
+
+    if enable:
+        return pytest.mark.timeout(sec, *args, **kwargs)
+    return pytest.mark.timeout(None, *args, **kwargs)
 
 
-CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
-PROJECT_ROOT = os.path.normpath(
-    os.path.join(CURDIR, os.path.pardir, os.path.pardir))
+def demo_dir(path: str) -> str:
+    """Look for the demo directory based on the test file name."""
+    path = normpath(os.path.dirname(path))
+    while True:
+        subdirs = [f.path for f in os.scandir(path) if f.is_dir()]
+        subdirs = [os.path.basename(d) for d in subdirs]
+        if "demo" in subdirs:
+            return os.path.join(path, "demo")
+        new_path = normpath(os.path.join(path, os.path.pardir))
+        assert new_path != path
+        path = new_path
+
+
+def normpath(path: str) -> str:
+    return os.path.normpath(os.path.abspath(path))
+
+
+def data_dir(path: str) -> str:
+    return os.path.join(demo_dir(path), "data")
+
+
+def project_root(path: str) -> str:
+    return normpath(os.path.join(demo_dir(path), os.path.pardir))
diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py
index 84ad02f46..973d5f072 100644
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -121,12 +121,14 @@ if __name__ == "__main__":
                 "python-package/xgboost/sklearn.py",
                 "python-package/xgboost/spark",
                 "python-package/xgboost/federated.py",
-                "python-package/xgboost/testing.py",
+                "python-package/xgboost/testing",
                 # tests
                 "tests/python/test_config.py",
+                "tests/python/test_data_iterator.py",
                 "tests/python/test_spark/",
                 "tests/python/test_quantile_dmatrix.py",
                 "tests/python-gpu/test_gpu_spark/",
+                "tests/python-gpu/test_gpu_data_iterator.py",
                 "tests/ci_build/lint_python.py",
                 # demo
                 "demo/guide-python/cat_in_the_dat.py",
diff --git a/tests/python-gpu/conftest.py b/tests/python-gpu/conftest.py
index 52299e13e..317622a34 100644
--- a/tests/python-gpu/conftest.py
+++ b/tests/python-gpu/conftest.py
@@ -1,9 +1,7 @@
-import sys
 import pytest
-import logging
 
-sys.path.append("tests/python")
-import testing as tm                          # noqa
+from xgboost import testing as tm  # noqa
+
 
 def has_rmm():
     try:
@@ -34,8 +32,8 @@ def local_cuda_client(request, pytestconfig):
         kwargs['rmm_pool_size'] = '2GB'
     if tm.no_dask_cuda()['condition']:
         raise ImportError('The local_cuda_cluster fixture requires dask_cuda package')
-    from dask_cuda import LocalCUDACluster
     from dask.distributed import Client
+    from dask_cuda import LocalCUDACluster
     yield Client(LocalCUDACluster(**kwargs))
 
 def pytest_addoption(parser):
diff --git a/tests/python-gpu/load_pickle.py b/tests/python-gpu/load_pickle.py
index 45f33bb16..f12dde360 100644
--- a/tests/python-gpu/load_pickle.py
+++ b/tests/python-gpu/load_pickle.py
@@ -1,16 +1,14 @@
 '''Loading a pickled model generated by test_pickling.py, only used by
 `test_gpu_with_dask.py`'''
-import os
-import numpy as np
-import xgboost as xgb
 import json
+import os
+
+import numpy as np
 import pytest
-import sys
+from test_gpu_pickling import build_dataset, load_pickle, model_path
 
-from test_gpu_pickling import build_dataset, model_path, load_pickle
-
-sys.path.append("tests/python")
-import testing as tm
+import xgboost as xgb
+from xgboost import testing as tm
 
 
 class TestLoadPickle:
diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py
index 2c3a7a760..385f1b83e 100644
--- a/tests/python-gpu/test_device_quantile_dmatrix.py
+++ b/tests/python-gpu/test_device_quantile_dmatrix.py
@@ -5,10 +5,10 @@ import pytest
 from hypothesis import given, settings, strategies
 
 import xgboost as xgb
+from xgboost import testing as tm
 
 sys.path.append("tests/python")
 import test_quantile_dmatrix as tqd
-import testing as tm
 
 
 class TestDeviceQuantileDMatrix:
diff --git a/tests/python-gpu/test_from_cudf.py b/tests/python-gpu/test_from_cudf.py
index f924fc348..f22e69b22 100644
--- a/tests/python-gpu/test_from_cudf.py
+++ b/tests/python-gpu/test_from_cudf.py
@@ -2,11 +2,12 @@ import json
 import sys
 
 import numpy as np
-import xgboost as xgb
 import pytest
 
+import xgboost as xgb
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
-import testing as tm
 from test_dmatrix import set_base_margin_info
 
 
@@ -85,8 +86,8 @@ def _test_from_cudf(DMatrixT):
 
 
 def _test_cudf_training(DMatrixT):
-    from cudf import DataFrame as df
     import pandas as pd
+    from cudf import DataFrame as df
     np.random.seed(1)
     X = pd.DataFrame(np.random.randn(50, 10))
     y = pd.DataFrame(np.random.randn(50))
@@ -109,8 +110,8 @@ def _test_cudf_training(DMatrixT):
 
 
 def _test_cudf_metainfo(DMatrixT):
-    from cudf import DataFrame as df
     import pandas as pd
+    from cudf import DataFrame as df
     n = 100
     X = np.random.random((n, 2))
     dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))
@@ -247,9 +248,9 @@ Arrow specification.'''
 @pytest.mark.skipif(**tm.no_sklearn())
 @pytest.mark.skipif(**tm.no_pandas())
 def test_cudf_training_with_sklearn():
+    import pandas as pd
     from cudf import DataFrame as df
     from cudf import Series as ss
-    import pandas as pd
     np.random.seed(1)
     X = pd.DataFrame(np.random.randn(50, 10))
     y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))
diff --git a/tests/python-gpu/test_from_cupy.py b/tests/python-gpu/test_from_cupy.py
index 77fa694e5..841ab7d34 100644
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@@ -1,12 +1,15 @@
-import numpy as np
-import xgboost as xgb
 import sys
+
+import numpy as np
 import pytest
 
+import xgboost as xgb
+
 sys.path.append("tests/python")
-import testing as tm
 from test_dmatrix import set_base_margin_info
 
+from xgboost import testing as tm
+
 
 def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
     '''Test constructing DMatrix from cupy'''
diff --git a/tests/python-gpu/test_gpu_basic_models.py b/tests/python-gpu/test_gpu_basic_models.py
index 9e955eac2..83d1a2557 100644
--- a/tests/python-gpu/test_gpu_basic_models.py
+++ b/tests/python-gpu/test_gpu_basic_models.py
@@ -1,13 +1,18 @@
-import sys
 import os
+import sys
+
 import numpy as np
-import xgboost as xgb
 import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
+import test_basic_models as test_bm
+
 # Don't import the test class, otherwise they will run twice.
 import test_callback as test_cb  # noqa
-import test_basic_models as test_bm
-import testing as tm
+
 rng = np.random.RandomState(1994)
 
 
diff --git a/tests/python-gpu/test_gpu_data_iterator.py b/tests/python-gpu/test_gpu_data_iterator.py
index 9753a51e0..23e495bcc 100644
--- a/tests/python-gpu/test_gpu_data_iterator.py
+++ b/tests/python-gpu/test_gpu_data_iterator.py
@@ -1,13 +1,12 @@
-import numpy as np
-import xgboost as xgb
-from hypothesis import given, strategies, settings
-import pytest
 import sys
 
+import pytest
+from hypothesis import given, settings, strategies
+from xgboost.testing import no_cupy
+
 sys.path.append("tests/python")
-from test_data_iterator import test_single_batch as cpu_single_batch
 from test_data_iterator import run_data_iterator
-from testing import no_cupy
+from test_data_iterator import test_single_batch as cpu_single_batch
 
 
 def test_gpu_single_batch() -> None:
@@ -24,7 +23,11 @@ def test_gpu_single_batch() -> None:
 )
 @settings(deadline=None, max_examples=10, print_blob=True)
 def test_gpu_data_iterator(
-    n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool
+    n_samples_per_batch: int,
+    n_features: int,
+    n_batches: int,
+    subsample: bool,
+    use_cupy: bool,
 ) -> None:
     run_data_iterator(
         n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy
diff --git a/tests/python-gpu/test_gpu_demos.py b/tests/python-gpu/test_gpu_demos.py
index 54909da48..ef181a67e 100644
--- a/tests/python-gpu/test_gpu_demos.py
+++ b/tests/python-gpu/test_gpu_demos.py
@@ -1,10 +1,13 @@
 import os
 import subprocess
 import sys
+
 import pytest
+
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
-import testing as tm
-import test_demos as td         # noqa
+import test_demos as td  # noqa
 
 
 @pytest.mark.skipif(**tm.no_cupy())
@@ -31,6 +34,6 @@ def test_categorical_demo():
 @pytest.mark.skipif(**tm.no_cupy())
 @pytest.mark.mgpu
 def test_dask_training():
-    script = os.path.join(tm.PROJECT_ROOT, 'demo', 'dask', 'gpu_training.py')
+    script = os.path.join(tm.demo_dir(__file__), 'dask', 'gpu_training.py')
     cmd = ['python', script]
-    subprocess.check_call(cmd)
\ No newline at end of file
+    subprocess.check_call(cmd)
diff --git a/tests/python-gpu/test_gpu_eval_metrics.py b/tests/python-gpu/test_gpu_eval_metrics.py
index 1282e115a..cb4d8eb6c 100644
--- a/tests/python-gpu/test_gpu_eval_metrics.py
+++ b/tests/python-gpu/test_gpu_eval_metrics.py
@@ -1,7 +1,9 @@
 import sys
-import xgboost
+
 import pytest
 
+import xgboost
+
 sys.path.append("tests/python")
 import test_eval_metrics as test_em  # noqa
 
diff --git a/tests/python-gpu/test_gpu_interaction_constraints.py b/tests/python-gpu/test_gpu_interaction_constraints.py
index 885cf5bf9..ee85cf075 100644
--- a/tests/python-gpu/test_gpu_interaction_constraints.py
+++ b/tests/python-gpu/test_gpu_interaction_constraints.py
@@ -1,8 +1,11 @@
-import numpy as np
 import sys
+
+import numpy as np
+
 sys.path.append("tests/python")
 # Don't import the test class, otherwise they will run twice.
 import test_interaction_constraints as test_ic  # noqa
+
 rng = np.random.RandomState(1994)
 
 
diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py
index 5cd63e514..40c5d4845 100644
--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@@ -1,15 +1,10 @@
-import sys
-
 import pytest
 from hypothesis import assume, given, note, settings, strategies
 
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 
-sys.path.append("tests/python")
-import testing as tm
-
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)
 
 parameter_strategy = strategies.fixed_dictionaries({
     'booster': strategies.just('gblinear'),
diff --git a/tests/python-gpu/test_gpu_pickling.py b/tests/python-gpu/test_gpu_pickling.py
index 4b321bece..7c452926e 100644
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@@ -3,20 +3,17 @@ import json
 import os
 import pickle
 import subprocess
-import sys
 
 import numpy as np
 import pytest
 
 import xgboost as xgb
-from xgboost import XGBClassifier, testing
-
-sys.path.append("tests/python")
-import testing as tm
+from xgboost import XGBClassifier
+from xgboost import testing as tm
 
 model_path = './model.pkl'
 
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
 
 
 def build_dataset():
diff --git a/tests/python-gpu/test_gpu_plotting.py b/tests/python-gpu/test_gpu_plotting.py
index f12f895a0..22b3b41fc 100644
--- a/tests/python-gpu/test_gpu_plotting.py
+++ b/tests/python-gpu/test_gpu_plotting.py
@@ -1,10 +1,11 @@
 import sys
+
 import pytest
 
-sys.path.append("tests/python")
-import testing as tm
-import test_plotting as tp
+from xgboost import testing as tm
 
+sys.path.append("tests/python")
+import test_plotting as tp
 
 pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))
 
diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py
index 3dedb0637..63154e775 100644
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -6,7 +6,7 @@ from hypothesis import assume, given, settings, strategies
 from xgboost.compat import PANDAS_INSTALLED
 
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 
 if PANDAS_INSTALLED:
     from hypothesis.extra.pandas import column, data_frames, range_indexes
@@ -16,7 +16,6 @@ else:
     column, data_frames, range_indexes = noop, noop, noop
 
 sys.path.append("tests/python")
-import testing as tm
 from test_predict import run_predict_leaf  # noqa
 from test_predict import run_threaded_predict  # noqa
 
@@ -33,7 +32,7 @@ predict_parameter_strategy = strategies.fixed_dictionaries({
     'num_parallel_tree': strategies.sampled_from([1, 4]),
 })
 
-pytestmark = testing.timeout(20)
+pytestmark = tm.timeout(20)
 
 
 class TestGPUPredict:
@@ -227,8 +226,8 @@ class TestGPUPredict:
     @pytest.mark.skipif(**tm.no_cupy())
     @pytest.mark.skipif(**tm.no_cudf())
     def test_inplace_predict_cudf(self):
-        import cupy as cp
         import cudf
+        import cupy as cp
         import pandas as pd
         rows = 1000
         cols = 10
@@ -379,8 +378,8 @@ class TestGPUPredict:
     @pytest.mark.skipif(**tm.no_cupy())
     @pytest.mark.parametrize("n_classes", [2, 3])
     def test_predict_dart(self, n_classes):
-        from sklearn.datasets import make_classification
         import cupy as cp
+        from sklearn.datasets import make_classification
         n_samples = 1000
         X_, y_ = make_classification(
             n_samples=n_samples, n_informative=5, n_classes=n_classes
diff --git a/tests/python-gpu/test_gpu_ranking.py b/tests/python-gpu/test_gpu_ranking.py
index 059d9325a..d86c1aa14 100644
--- a/tests/python-gpu/test_gpu_ranking.py
+++ b/tests/python-gpu/test_gpu_ranking.py
@@ -1,20 +1,15 @@
 import itertools
 import os
 import shutil
-import sys
 import urllib.request
 import zipfile
 
 import numpy as np
 
 import xgboost
-from xgboost import testing
+from xgboost import testing as tm
 
-sys.path.append("tests/python")
-
-import testing as tm  # noqa
-
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)
 
 
 class TestRanking:
@@ -24,8 +19,9 @@ class TestRanking:
         Download and setup the test fixtures
         """
         from sklearn.datasets import load_svmlight_files
+
         # download the test data
-        cls.dpath = os.path.join(tm.PROJECT_ROOT, "demo/rank/")
+        cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
         src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
         target = os.path.join(cls.dpath, "MQ2008.zip")
 
diff --git a/tests/python-gpu/test_gpu_spark/test_data.py b/tests/python-gpu/test_gpu_spark/test_data.py
index 523973250..b529fa6ab 100644
--- a/tests/python-gpu/test_gpu_spark/test_data.py
+++ b/tests/python-gpu/test_gpu_spark/test_data.py
@@ -1,13 +1,8 @@
 import sys
-from typing import List
 
-import numpy as np
-import pandas as pd
 import pytest
 
-sys.path.append("tests/python")
-
-import testing as tm
+from xgboost import testing as tm
 
 if tm.no_spark()["condition"]:
     pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
@@ -15,6 +10,7 @@ if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
     pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
 
 
+sys.path.append("tests/python")
 from test_spark.test_data import run_dmatrix_ctor
 
 
diff --git a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
index bcae96dc5..b8f529218 100644
--- a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
+++ b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
@@ -6,8 +6,7 @@ import sys
 import pytest
 import sklearn
 
-sys.path.append("tests/python")
-import testing as tm
+from xgboost import testing as tm
 
 if tm.no_spark()["condition"]:
     pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
diff --git a/tests/python-gpu/test_gpu_training_continuation.py b/tests/python-gpu/test_gpu_training_continuation.py
index 7fa17d4be..6a908af27 100644
--- a/tests/python-gpu/test_gpu_training_continuation.py
+++ b/tests/python-gpu/test_gpu_training_continuation.py
@@ -1,7 +1,9 @@
-import numpy as np
-import xgboost as xgb
 import json
 
+import numpy as np
+
+import xgboost as xgb
+
 rng = np.random.RandomState(1994)
 
 
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index e86152327..10fbe3d35 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -6,13 +6,12 @@ import pytest
 from hypothesis import assume, given, note, settings, strategies
 
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 
 sys.path.append("tests/python")
 import test_updaters as test_up
-import testing as tm
 
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
 
 parameter_strategy = strategies.fixed_dictionaries({
     'max_depth': strategies.integers(0, 11),
diff --git a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
index 09787a439..356845a01 100644
--- a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
@@ -1,52 +1,54 @@
 """Copyright 2019-2022 XGBoost contributors"""
-import sys
-import os
-from typing import Type, TypeVar, Any, Dict, List, Union
-import pytest
-import numpy as np
 import asyncio
-import xgboost
+import os
 import subprocess
+import sys
 from collections import OrderedDict
 from inspect import signature
-from hypothesis import given, strategies, settings, note
+from typing import Any, Dict, Type, TypeVar, Union
+
+import numpy as np
+import pytest
+from hypothesis import given, note, settings, strategies
 from hypothesis._settings import duration
 from test_gpu_updaters import parameter_strategy
 
+import xgboost
+from xgboost import testing as tm
+
 if sys.platform.startswith("win"):
     pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
 
 sys.path.append("tests/python")
-import testing as tm  # noqa
 
 if tm.no_dask_cuda()["condition"]:
     pytest.skip(tm.no_dask_cuda()["reason"], allow_module_level=True)
 
 
-from test_with_dask import run_empty_dmatrix_reg  # noqa
-from test_with_dask import run_empty_dmatrix_auc  # noqa
+from test_with_dask import _get_client_workers  # noqa
+from test_with_dask import generate_array  # noqa
+from test_with_dask import make_categorical  # noqa
 from test_with_dask import run_auc  # noqa
 from test_with_dask import run_boost_from_prediction  # noqa
 from test_with_dask import run_boost_from_prediction_multi_class  # noqa
-from test_with_dask import run_dask_classifier  # noqa
-from test_with_dask import run_empty_dmatrix_cls  # noqa
-from test_with_dask import _get_client_workers  # noqa
-from test_with_dask import generate_array  # noqa
-from test_with_dask import kCols as random_cols  # noqa
-from test_with_dask import suppress  # noqa
-from test_with_dask import run_tree_stats  # noqa
 from test_with_dask import run_categorical  # noqa
-from test_with_dask import make_categorical  # noqa
-
+from test_with_dask import run_dask_classifier  # noqa
+from test_with_dask import run_empty_dmatrix_auc  # noqa
+from test_with_dask import run_empty_dmatrix_cls  # noqa
+from test_with_dask import run_empty_dmatrix_reg  # noqa
+from test_with_dask import run_tree_stats  # noqa
+from test_with_dask import suppress  # noqa
+from test_with_dask import kCols as random_cols  # noqa
 
 try:
-    import dask.dataframe as dd
-    from xgboost import dask as dxgb
-    import xgboost as xgb
-    from dask.distributed import Client
-    from dask import array as da
-    from dask_cuda import LocalCUDACluster, utils
     import cudf
+    import dask.dataframe as dd
+    from dask import array as da
+    from dask.distributed import Client
+    from dask_cuda import LocalCUDACluster, utils
+
+    import xgboost as xgb
+    from xgboost import dask as dxgb
 except ImportError:
     pass
 
@@ -334,9 +336,9 @@ class TestDistributedGPU:
 
     @pytest.mark.skipif(**tm.no_dask_cudf())
     def test_empty_partition(self, local_cuda_client: Client) -> None:
-        import dask_cudf
         import cudf
         import cupy
+        import dask_cudf
 
         mult = 100
         df = cudf.DataFrame(
diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py
index 227c2a874..8ecb4bdc7 100644
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -1,13 +1,15 @@
 import json
-import xgboost as xgb
-import pytest
-import tempfile
-import sys
-import numpy as np
 import os
+import sys
+import tempfile
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
 
 sys.path.append("tests/python")
-import testing as tm               # noqa
 import test_with_sklearn as twskl  # noqa
 
 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@@ -38,9 +40,9 @@ def test_gpu_binary_classification():
 @pytest.mark.skipif(**tm.no_cupy())
 @pytest.mark.skipif(**tm.no_cudf())
 def test_boost_from_prediction_gpu_hist():
-    from sklearn.datasets import load_breast_cancer, load_digits
-    import cupy as cp
     import cudf
+    import cupy as cp
+    from sklearn.datasets import load_breast_cancer, load_digits
 
     tree_method = "gpu_hist"
     X, y = load_breast_cancer(return_X_y=True)
@@ -68,12 +70,12 @@ def test_num_parallel_tree():
 @pytest.mark.skipif(**tm.no_cudf())
 @pytest.mark.skipif(**tm.no_sklearn())
 def test_categorical():
-    import pandas as pd
     import cudf
     import cupy as cp
+    import pandas as pd
     from sklearn.datasets import load_svmlight_file
 
-    data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
+    data_dir = tm.data_dir(__file__)
     X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train"))
     clf = xgb.XGBClassifier(
         tree_method="gpu_hist",
@@ -123,9 +125,9 @@ def test_categorical():
 @pytest.mark.skipif(**tm.no_cupy())
 @pytest.mark.skipif(**tm.no_cudf())
 def test_classififer():
-    from sklearn.datasets import load_digits
-    import cupy as cp
     import cudf
+    import cupy as cp
+    from sklearn.datasets import load_digits
 
     X, y = load_digits(return_X_y=True)
     y *= 10
diff --git a/tests/python-gpu/test_large_input.py b/tests/python-gpu/test_large_input.py
index 4c8e06a6f..310dd6f10 100644
--- a/tests/python-gpu/test_large_input.py
+++ b/tests/python-gpu/test_large_input.py
@@ -1,23 +1,23 @@
-import numpy as np
-import xgboost as xgb
-import cupy as cp
-import time
-import pytest
-
-
-# Test for integer overflow or out of memory exceptions
-def test_large_input():
-    available_bytes, _ = cp.cuda.runtime.memGetInfo()
-    # 15 GB
-    required_bytes = 1.5e+10
-    if available_bytes < required_bytes:
-        pytest.skip("Not enough memory on this device")
-    n = 1000
-    m = ((1 << 31) + n - 1) // n
-    assert (np.log2(m * n) > 31)
-    X = cp.ones((m, n), dtype=np.float32)
-    y = cp.ones(m)
-    dmat = xgb.DeviceQuantileDMatrix(X, y)
-    booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
-    del y
-    booster.inplace_predict(X)
+import cupy as cp
+import numpy as np
+import pytest
+
+import xgboost as xgb
+
+
+# Test for integer overflow or out of memory exceptions
+def test_large_input():
+    available_bytes, _ = cp.cuda.runtime.memGetInfo()
+    # 15 GB
+    required_bytes = 1.5e+10
+    if available_bytes < required_bytes:
+        pytest.skip("Not enough memory on this device")
+    n = 1000
+    m = ((1 << 31) + n - 1) // n
+    assert (np.log2(m * n) > 31)
+    X = cp.ones((m, n), dtype=np.float32)
+    y = cp.ones(m)
+    dmat = xgb.DeviceQuantileDMatrix(X, y)
+    booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
+    del y
+    booster.inplace_predict(X)
diff --git a/tests/python-gpu/test_monotonic_constraints.py b/tests/python-gpu/test_monotonic_constraints.py
index fdecf0306..3bf4f0deb 100644
--- a/tests/python-gpu/test_monotonic_constraints.py
+++ b/tests/python-gpu/test_monotonic_constraints.py
@@ -1,11 +1,12 @@
 import sys
-import numpy as np
 
+import numpy as np
 import pytest
 
 import xgboost as xgb
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
-import testing as tm
 import test_monotone_constraints as tmc
 
 rng = np.random.RandomState(1994)
diff --git a/tests/python/generate_models.py b/tests/python/generate_models.py
index 7b881355e..2a2444e8f 100644
--- a/tests/python/generate_models.py
+++ b/tests/python/generate_models.py
@@ -1,7 +1,9 @@
-import xgboost
-import numpy as np
 import os
 
+import numpy as np
+
+import xgboost
+
 kRounds = 2
 kRows = 1000
 kCols = 4
diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py
index e155ab047..fab2a6eca 100644
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -1,12 +1,13 @@
-# -*- coding: utf-8 -*-
-import numpy as np
-import os
-import xgboost as xgb
-import pytest
 import json
-from pathlib import Path
+import os
 import tempfile
-import testing as tm
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
 
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py
index 82d0096cf..06f666da1 100644
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -1,13 +1,15 @@
-import numpy as np
-import xgboost as xgb
-import os
 import json
-import testing as tm
-import pytest
 import locale
+import os
 import tempfile
 
-dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
+dpath = tm.data_dir(__file__)
 
 rng = np.random.RandomState(1994)
 
@@ -36,8 +38,8 @@ class TestModels:
         param = {'verbosity': 0, 'objective': 'binary:logistic',
                  'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
                  'nthread': 1}
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
         watchlist = [(dtest, 'eval'), (dtrain, 'train')]
         num_round = 4
         bst = xgb.train(param, dtrain, num_round, watchlist)
@@ -49,8 +51,8 @@ class TestModels:
         assert err < 0.2
 
     def test_dart(self):
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
         param = {'max_depth': 5, 'objective': 'binary:logistic',
                  'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1}
         # specify validations set to watch performance
@@ -116,7 +118,7 @@ class TestModels:
 
     def test_boost_from_prediction(self):
         # Re-construct dtrain here to avoid modification
-        margined = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        margined = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
         bst = xgb.train({'tree_method': 'hist'}, margined, 1)
         predt_0 = bst.predict(margined, output_margin=True)
         margined.set_base_margin(predt_0)
@@ -124,13 +126,13 @@ class TestModels:
         predt_1 = bst.predict(margined)
 
         assert np.any(np.abs(predt_1 - predt_0) > 1e-6)
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
         bst = xgb.train({'tree_method': 'hist'}, dtrain, 2)
         predt_2 = bst.predict(dtrain)
         assert np.all(np.abs(predt_2 - predt_1) < 1e-6)
 
     def test_boost_from_existing_model(self):
-        X = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        X = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
         booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4)
         assert booster.num_boosted_rounds() == 4
         booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4,
@@ -150,8 +152,8 @@ class TestModels:
             'objective': 'reg:logistic',
             "tree_method": tree_method
         }
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
         watchlist = [(dtest, 'eval'), (dtrain, 'train')]
         num_round = 10
 
@@ -197,8 +199,8 @@ class TestModels:
         self.run_custom_objective()
 
     def test_multi_eval_metric(self):
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
         watchlist = [(dtest, 'eval'), (dtrain, 'train')]
         param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1,
                  'objective': 'binary:logistic'}
@@ -220,7 +222,7 @@ class TestModels:
             param['scale_pos_weight'] = ratio
             return (dtrain, dtest, param)
 
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
         xgb.cv(param, dtrain, num_round, nfold=5,
                metrics={'auc'}, seed=0, fpreproc=fpreproc)
 
@@ -228,7 +230,7 @@ class TestModels:
         param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
                  'objective': 'binary:logistic'}
         num_round = 2
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
         xgb.cv(param, dtrain, num_round, nfold=5,
                metrics={'error'}, seed=0, show_stdv=False)
 
@@ -346,7 +348,7 @@ class TestModels:
         os.remove(model_path)
 
         try:
-            dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+            dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
             xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
         except ValueError as e:
             e_str = str(e)
diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py
index dcd898ac0..3e972345b 100644
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -1,9 +1,12 @@
-from typing import Union
-import xgboost as xgb
-import pytest
 import os
-import testing as tm
 import tempfile
+from contextlib import nullcontext
+from typing import Union
+
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
 
 # We use the dataset for tests.
 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@@ -271,13 +274,14 @@ class TestCallbacks:
         """Test learning rate scheduler, used by both CPU and GPU tests."""
         scheduler = xgb.callback.LearningRateScheduler
 
-        dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dpath = tm.data_dir(__file__)
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
+
         watchlist = [(dtest, 'eval'), (dtrain, 'train')]
         num_round = 4
 
-        warning_check = tm.noop_context()
+        warning_check = nullcontext()
 
         # learning_rates as a list
         # init eta with 0 to check whether learning_rates work
diff --git a/tests/python/test_cli.py b/tests/python/test_cli.py
index aef9bc586..69e8df83d 100644
--- a/tests/python/test_cli.py
+++ b/tests/python/test_cli.py
@@ -1,11 +1,13 @@
-import os
-import tempfile
-import platform
-import xgboost
-import subprocess
-import numpy
 import json
-import testing as tm
+import os
+import platform
+import subprocess
+import tempfile
+
+import numpy
+
+import xgboost
+from xgboost import testing as tm
 
 
 class TestCLI:
@@ -29,7 +31,7 @@ data = {data_path}
 eval[test] = {data_path}
 '''
 
-    PROJECT_ROOT = tm.PROJECT_ROOT
+    PROJECT_ROOT = tm.project_root(__file__)
 
     def get_exe(self):
         if platform.system() == 'Windows':
diff --git a/tests/python/test_data_iterator.py b/tests/python/test_data_iterator.py
index 0416bd8a4..cf81288e8 100644
--- a/tests/python/test_data_iterator.py
+++ b/tests/python/test_data_iterator.py
@@ -1,14 +1,16 @@
+from typing import Dict, List
+
 import numpy as np
 import pytest
 from hypothesis import given, settings, strategies
 from scipy.sparse import csr_matrix
-from testing import IteratorForTest, make_batches, non_increasing
 from xgboost.data import SingleBatchInternalIter as SingleBatch
+from xgboost.testing import IteratorForTest, make_batches, non_increasing
 
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
 
 
 def test_single_batch(tree_method: str = "approx") -> None:
@@ -83,7 +85,7 @@ def run_data_iterator(
     if tree_method == "gpu_hist":
         parameters["sampling_method"] = "gradient_based"
 
-    results_from_it: xgb.callback.EvaluationMonitor.EvalsLog = {}
+    results_from_it: Dict[str, Dict[str, List[float]]] = {}
     from_it = xgb.train(
         parameters,
         Xy,
@@ -106,7 +108,7 @@ def run_data_iterator(
     assert Xy.num_row() == n_samples_per_batch * n_batches
     assert Xy.num_col() == n_features
 
-    results_from_arrays: xgb.callback.EvaluationMonitor.EvalsLog = {}
+    results_from_arrays: Dict[str, Dict[str, List[float]]] = {}
     from_arrays = xgb.train(
         parameters,
         Xy,
diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py
index 63e44c0b0..97a462ff9 100644
--- a/tests/python/test_demos.py
+++ b/tests/python/test_demos.py
@@ -3,14 +3,12 @@ import subprocess
 import sys
 
 import pytest
-import testing as tm
 
-from xgboost import testing
+from xgboost import testing as tm
 
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
 
-ROOT_DIR = tm.PROJECT_ROOT
-DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
+DEMO_DIR = tm.demo_dir(__file__)
 PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, 'guide-python')
 CLI_DEMO_DIR = os.path.join(DEMO_DIR, 'CLI')
 
@@ -156,7 +154,7 @@ def test_cli_regression_demo():
     cmd = ['python', script, 'machine.txt', '1']
     subprocess.check_call(cmd, cwd=reg_dir)
 
-    exe = os.path.join(tm.PROJECT_ROOT, 'xgboost')
+    exe = os.path.join(DEMO_DIR, os.path.pardir, 'xgboost')
     conf = os.path.join(reg_dir, 'machine.conf')
     subprocess.check_call([exe, conf], cwd=reg_dir)
 
diff --git a/tests/python/test_dmatrix.py b/tests/python/test_dmatrix.py
index b7933eac4..def369027 100644
--- a/tests/python/test_dmatrix.py
+++ b/tests/python/test_dmatrix.py
@@ -4,11 +4,11 @@ import tempfile
 import numpy as np
 import pytest
 import scipy.sparse
-import testing as tm
 from hypothesis import given, settings, strategies
 from scipy.sparse import csr_matrix, rand
 
 import xgboost as xgb
+from xgboost import testing as tm
 
 rng = np.random.RandomState(1)
 
diff --git a/tests/python/test_dt.py b/tests/python/test_dt.py
index b62b1317b..eee874b16 100644
--- a/tests/python/test_dt.py
+++ b/tests/python/test_dt.py
@@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
-import pytest
 import numpy as np
+import pytest
 
-import testing as tm
 import xgboost as xgb
+from xgboost import testing as tm
 
 try:
     import datatable as dt
diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py
index 29f8fb4b0..ab1aebc77 100644
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -1,8 +1,9 @@
-import xgboost as xgb
-import testing as tm
 import numpy as np
 import pytest
 
+import xgboost as xgb
+from xgboost import testing as tm
+
 rng = np.random.RandomState(1994)
 
 
diff --git a/tests/python/test_eval_metrics.py b/tests/python/test_eval_metrics.py
index 72263e3d5..24e3817ce 100644
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@@ -1,8 +1,9 @@
-import xgboost as xgb
-import testing as tm
 import numpy as np
 import pytest
 
+import xgboost as xgb
+from xgboost import testing as tm
+
 rng = np.random.RandomState(1337)
 
 
@@ -254,8 +255,8 @@ class TestEvalMetrics:
         self.run_roc_auc_multi("hist", n_samples, weighted)
 
     def run_pr_auc_binary(self, tree_method):
-        from sklearn.metrics import precision_recall_curve, auc
         from sklearn.datasets import make_classification
+        from sklearn.metrics import auc, precision_recall_curve
         X, y = make_classification(128, 4, n_classes=2, random_state=1994)
         clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
         clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
diff --git a/tests/python/test_interaction_constraints.py b/tests/python/test_interaction_constraints.py
index 18d416501..96d2ba7dc 100644
--- a/tests/python/test_interaction_constraints.py
+++ b/tests/python/test_interaction_constraints.py
@@ -1,9 +1,9 @@
-# -*- coding: utf-8 -*-
 import numpy as np
-import xgboost
-import testing as tm
 import pytest
 
+import xgboost
+from xgboost import testing as tm
+
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
 
diff --git a/tests/python/test_linear.py b/tests/python/test_linear.py
index 78e604635..b3dbf35f1 100644
--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@@ -1,10 +1,9 @@
-import testing as tm
 from hypothesis import given, note, settings, strategies
 
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)
 
 
 parameter_strategy = strategies.fixed_dictionaries({
diff --git a/tests/python/test_model_compatibility.py b/tests/python/test_model_compatibility.py
index 88549e1f2..a46715e42 100644
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@@ -1,12 +1,14 @@
-import xgboost
-import os
-import generate_models as gm
-import testing as tm
-import json
-import zipfile
-import pytest
 import copy
+import json
+import os
 import urllib.request
+import zipfile
+
+import generate_models as gm
+import pytest
+
+import xgboost
+from xgboost import testing as tm
 
 
 def run_model_param_check(config):
diff --git a/tests/python/test_monotone_constraints.py b/tests/python/test_monotone_constraints.py
index ae2c2917d..4dbfaa60d 100644
--- a/tests/python/test_monotone_constraints.py
+++ b/tests/python/test_monotone_constraints.py
@@ -1,8 +1,9 @@
 import numpy as np
-import xgboost as xgb
-import testing as tm
 import pytest
 
+import xgboost as xgb
+from xgboost import testing as tm
+
 dpath = 'demo/data/'
 
 
diff --git a/tests/python/test_openmp.py b/tests/python/test_openmp.py
index 950d15d86..c53363736 100644
--- a/tests/python/test_openmp.py
+++ b/tests/python/test_openmp.py
@@ -4,12 +4,11 @@ import tempfile
 
 import numpy as np
 import pytest
-import testing as tm
 
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)
 
 
 class TestOMP:
@@ -86,7 +85,7 @@ class TestOMP:
     def test_with_omp_thread_limit(self):
         args = [
             "python", os.path.join(
-                tm.PROJECT_ROOT, "tests", "python", "with_omp_limit.py"
+                os.path.dirname(tm.normpath(__file__)), "with_omp_limit.py"
             )
         ]
         results = []
diff --git a/tests/python/test_parse_tree.py b/tests/python/test_parse_tree.py
index 4957b93bf..885c0f1e2 100644
--- a/tests/python/test_parse_tree.py
+++ b/tests/python/test_parse_tree.py
@@ -1,8 +1,8 @@
-import xgboost as xgb
 import numpy as np
 import pytest
-import testing as tm
 
+import xgboost as xgb
+from xgboost import testing as tm
 
 pytestmark = pytest.mark.skipif(**tm.no_pandas())
 
diff --git a/tests/python/test_pickling.py b/tests/python/test_pickling.py
index 37bbc6c13..161a5fd4e 100644
--- a/tests/python/test_pickling.py
+++ b/tests/python/test_pickling.py
@@ -1,9 +1,10 @@
-import pickle
-import numpy as np
-import xgboost as xgb
-import os
 import json
+import os
+import pickle
 
+import numpy as np
+
+import xgboost as xgb
 
 kRows = 100
 kCols = 10
diff --git a/tests/python/test_plotting.py b/tests/python/test_plotting.py
index 0167fb62d..dc45cd254 100644
--- a/tests/python/test_plotting.py
+++ b/tests/python/test_plotting.py
@@ -1,15 +1,16 @@
 import json
-import numpy as np
-import xgboost as xgb
-import testing as tm
 
+import numpy as np
 import pytest
 
+import xgboost as xgb
+from xgboost import testing as tm
+
 try:
     import matplotlib
     matplotlib.use('Agg')
-    from matplotlib.axes import Axes
     from graphviz import Source
+    from matplotlib.axes import Axes
 except ImportError:
     pass
 
diff --git a/tests/python/test_predict.py b/tests/python/test_predict.py
index f4ea944e8..daf916198 100644
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -1,12 +1,13 @@
 '''Tests for running inplace prediction.'''
 from concurrent.futures import ThreadPoolExecutor
-import numpy as np
-from scipy import sparse
-import pytest
-import pandas as pd
 
-import testing as tm
+import numpy as np
+import pandas as pd
+import pytest
+from scipy import sparse
+
 import xgboost as xgb
+from xgboost import testing as tm
 
 
 def run_threaded_predict(X, rows, predict_func):
diff --git a/tests/python/test_quantile_dmatrix.py b/tests/python/test_quantile_dmatrix.py
index 65ccfa4e5..56b2a7d90 100644
--- a/tests/python/test_quantile_dmatrix.py
+++ b/tests/python/test_quantile_dmatrix.py
@@ -4,7 +4,7 @@ import numpy as np
 import pytest
 from hypothesis import given, settings, strategies
 from scipy import sparse
-from testing import (
+from xgboost.testing import (
     IteratorForTest,
     make_batches,
     make_batches_sparse,
diff --git a/tests/python/test_ranking.py b/tests/python/test_ranking.py
index 98bca122f..da2411983 100644
--- a/tests/python/test_ranking.py
+++ b/tests/python/test_ranking.py
@@ -1,13 +1,15 @@
-import numpy as np
-from scipy.sparse import csr_matrix
-import testing as tm
-import xgboost
-import os
 import itertools
+import os
 import shutil
 import urllib.request
 import zipfile
 
+import numpy as np
+from scipy.sparse import csr_matrix
+
+import xgboost
+from xgboost import testing as tm
+
 
 def test_ranking_with_unweighted_data():
     Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])
diff --git a/tests/python/test_shap.py b/tests/python/test_shap.py
index 54a95c8c2..4d861ad6e 100644
--- a/tests/python/test_shap.py
+++ b/tests/python/test_shap.py
@@ -1,11 +1,12 @@
-# -*- coding: utf-8 -*-
-import numpy as np
-import xgboost as xgb
 import itertools
 import re
+
+import numpy as np
 import scipy
 import scipy.special
 
+import xgboost as xgb
+
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
 
diff --git a/tests/python/test_spark/test_data.py b/tests/python/test_spark/test_data.py
index cf9063b71..20b31998e 100644
--- a/tests/python/test_spark/test_data.py
+++ b/tests/python/test_spark/test_data.py
@@ -4,7 +4,8 @@ from typing import List
 import numpy as np
 import pandas as pd
 import pytest
-import testing as tm
+
+from xgboost import testing as tm
 
 if tm.no_spark()["condition"]:
     pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index 03981d955..758c5c87b 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -6,10 +6,9 @@ import uuid
 
 import numpy as np
 import pytest
-import testing as tm
 
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 
 if tm.no_spark()["condition"]:
     pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
@@ -38,7 +37,7 @@ from .utils import SparkTestCase
 
 logging.getLogger("py4j").setLevel(logging.INFO)
 
-pytestmark = testing.timeout(60)
+pytestmark = tm.timeout(60)
 
 
 class XgboostLocalTest(SparkTestCase):
diff --git a/tests/python/test_spark/test_spark_local_cluster.py b/tests/python/test_spark/test_spark_local_cluster.py
index 9276e08f3..3f375644f 100644
--- a/tests/python/test_spark/test_spark_local_cluster.py
+++ b/tests/python/test_spark/test_spark_local_cluster.py
@@ -6,7 +6,8 @@ import uuid
 
 import numpy as np
 import pytest
-import testing as tm
+
+from xgboost import testing as tm
 
 if tm.no_spark()["condition"]:
     pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
diff --git a/tests/python/test_spark/utils.py b/tests/python/test_spark/utils.py
index 23968fbcc..0ed9f8521 100644
--- a/tests/python/test_spark/utils.py
+++ b/tests/python/test_spark/utils.py
@@ -6,9 +6,10 @@ import tempfile
 import unittest
 
 import pytest
-import testing as tm
 from six import StringIO
 
+from xgboost import testing as tm
+
 if tm.no_spark()["condition"]:
     pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
 if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
diff --git a/tests/python/test_survival.py b/tests/python/test_survival.py
index 1fb931545..7a297c191 100644
--- a/tests/python/test_survival.py
+++ b/tests/python/test_survival.py
@@ -1,11 +1,13 @@
-import testing as tm
-import pytest
-import numpy as np
-import xgboost as xgb
 import json
 import os
 
-dpath = os.path.join(tm.PROJECT_ROOT, 'demo', 'data')
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
+dpath = tm.data_dir(__file__)
 
 
 def test_aft_survival_toy_data():
diff --git a/tests/python/test_tracker.py b/tests/python/test_tracker.py
index 67543a968..11d3be36f 100644
--- a/tests/python/test_tracker.py
+++ b/tests/python/test_tracker.py
@@ -3,10 +3,10 @@ import sys
 
 import numpy as np
 import pytest
-import testing as tm
 
 import xgboost as xgb
-from xgboost import RabitTracker, testing
+from xgboost import RabitTracker
+from xgboost import testing as tm
 
 if sys.platform.startswith("win"):
     pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@@ -61,7 +61,7 @@ def test_rabit_ops():
             run_rabit_ops(client, n_workers)
 
 
-@pytest.mark.skipif(**testing.skip_ipv6())
+@pytest.mark.skipif(**tm.no_ipv6())
 @pytest.mark.skipif(**tm.no_dask())
 def test_rabit_ops_ipv6():
     import dask
diff --git a/tests/python/test_training_continuation.py b/tests/python/test_training_continuation.py
index 31a408170..258af760c 100644
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -1,10 +1,11 @@
-import xgboost as xgb
-import testing as tm
-import numpy as np
-import pytest
 import os
 import tempfile
 
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
 
 rng = np.random.RandomState(1337)
 
diff --git a/tests/python/test_tree_regularization.py b/tests/python/test_tree_regularization.py
index 92fa9fb51..ae8e539a0 100644
--- a/tests/python/test_tree_regularization.py
+++ b/tests/python/test_tree_regularization.py
@@ -1,8 +1,8 @@
 import numpy as np
-import xgboost as xgb
-
 from numpy.testing import assert_approx_equal
 
+import xgboost as xgb
+
 train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))
 
 
diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py
index e28f17386..e8e43e8e7 100644
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -1,11 +1,13 @@
 import json
 from string import ascii_lowercase
-from typing import Dict, Any
-import testing as tm
-import pytest
-import xgboost as xgb
+from typing import Any, Dict
+
 import numpy as np
-from hypothesis import given, strategies, settings, note
+import pytest
+from hypothesis import given, note, settings, strategies
+
+import xgboost as xgb
+from xgboost import testing as tm
 
 exact_parameter_strategy = strategies.fixed_dictionaries({
     'nthread': strategies.integers(1, 4),
diff --git a/tests/python/test_with_arrow.py b/tests/python/test_with_arrow.py
index ad2448294..8b7bce9eb 100644
--- a/tests/python/test_with_arrow.py
+++ b/tests/python/test_with_arrow.py
@@ -1,14 +1,16 @@
-import unittest
-import pytest
-import numpy as np
-import testing as tm
-import xgboost as xgb
 import os
+import unittest
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
 
 try:
+    import pandas as pd
     import pyarrow as pa
     import pyarrow.csv as pc
-    import pandas as pd
 except ImportError:
     pass
 
@@ -73,7 +75,7 @@ class TestArrowTable(unittest.TestCase):
         np.testing.assert_allclose(preds1, preds2)
 
     def test_arrow_survival(self):
-        data = os.path.join(tm.PROJECT_ROOT, "demo", "data", "veterans_lung_cancer.csv")
+        data = os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
         table = pc.read_csv(data)
         y_lower_bound = table["Survival_label_lower_bound"]
         y_upper_bound = table["Survival_label_upper_bound"]
diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index c06232e99..d54aba6ca 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -20,7 +20,6 @@ import numpy as np
 import pytest
 import scipy
 import sklearn
-import testing as tm
 from hypothesis import HealthCheck, given, note, settings
 from sklearn.datasets import make_classification, make_regression
 from test_predict import verify_leaf_output
@@ -29,7 +28,7 @@ from test_with_sklearn import run_data_initialization, run_feature_weights
 from xgboost.data import _is_cudf_df
 
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 
 if sys.platform.startswith("win"):
     pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@@ -45,7 +44,7 @@ from xgboost.dask import DaskDMatrix
 
 dask.config.set({"distributed.scheduler.allowed-failures": False})
 
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
 
 if hasattr(HealthCheck, 'function_scoped_fixture'):
     suppress = [HealthCheck.function_scoped_fixture]
@@ -1116,8 +1115,9 @@ def test_predict_with_meta(client: "Client") -> None:
 
 
 def run_aft_survival(client: "Client", dmatrix_t: Type) -> None:
-    df = dd.read_csv(os.path.join(tm.PROJECT_ROOT, 'demo', 'data',
-                                  'veterans_lung_cancer.csv'))
+    df = dd.read_csv(
+        os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
+    )
     y_lower_bound = df['Survival_label_lower_bound']
     y_upper_bound = df['Survival_label_upper_bound']
     X = df.drop(['Survival_label_lower_bound',
diff --git a/tests/python/test_with_modin.py b/tests/python/test_with_modin.py
index 4932d1c1f..3f1f9cf97 100644
--- a/tests/python/test_with_modin.py
+++ b/tests/python/test_with_modin.py
@@ -1,10 +1,10 @@
-# -*- coding: utf-8 -*-
 import numpy as np
-import xgboost as xgb
-import testing as tm
 import pytest
 from test_dmatrix import set_base_margin_info
 
+import xgboost as xgb
+from xgboost import testing as tm
+
 try:
     import modin.pandas as md
 except ImportError:
diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py
index e4289c1cd..209e5cf6f 100644
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -1,11 +1,13 @@
 import os
 import tempfile
+
 import numpy as np
-import xgboost as xgb
-import testing as tm
 import pytest
 from test_dmatrix import set_base_margin_info
 
+import xgboost as xgb
+from xgboost import testing as tm
+
 try:
     import pandas as pd
 except ImportError:
diff --git a/tests/python/test_with_shap.py b/tests/python/test_with_shap.py
index 1e03e0700..3103e1b7e 100644
--- a/tests/python/test_with_shap.py
+++ b/tests/python/test_with_shap.py
@@ -1,7 +1,8 @@
 import numpy as np
-import xgboost as xgb
 import pytest
 
+import xgboost as xgb
+
 try:
     import shap
 except ImportError:
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 17114d2dd..0a2c8fabc 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -8,14 +8,13 @@ from typing import Callable, Optional
 
 import numpy as np
 import pytest
-import testing as tm
 from sklearn.utils.estimator_checks import parametrize_with_checks
 
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 
 rng = np.random.RandomState(1994)
-pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), testing.timeout(30)]
+pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)]
 
 
 def test_binary_classification():
@@ -155,11 +154,10 @@ def test_ranking():
 
 
 def test_stacking_regression():
-    from sklearn.model_selection import train_test_split
     from sklearn.datasets import load_diabetes
+    from sklearn.ensemble import RandomForestRegressor, StackingRegressor
     from sklearn.linear_model import RidgeCV
-    from sklearn.ensemble import RandomForestRegressor
-    from sklearn.ensemble import StackingRegressor
+    from sklearn.model_selection import train_test_split
 
     X, y = load_diabetes(return_X_y=True)
     estimators = [
@@ -177,13 +175,13 @@ def test_stacking_regression():
 
 
 def test_stacking_classification():
-    from sklearn.model_selection import train_test_split
     from sklearn.datasets import load_iris
-    from sklearn.svm import LinearSVC
-    from sklearn.linear_model import LogisticRegression
-    from sklearn.preprocessing import StandardScaler
-    from sklearn.pipeline import make_pipeline
     from sklearn.ensemble import StackingClassifier
+    from sklearn.linear_model import LogisticRegression
+    from sklearn.model_selection import train_test_split
+    from sklearn.pipeline import make_pipeline
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.svm import LinearSVC
 
     X, y = load_iris(return_X_y=True)
     estimators = [
@@ -354,8 +352,8 @@ def test_num_parallel_tree():
 
 
 def test_regression():
-    from sklearn.metrics import mean_squared_error
     from sklearn.datasets import fetch_california_housing
+    from sklearn.metrics import mean_squared_error
     from sklearn.model_selection import KFold
 
     X, y = fetch_california_housing(return_X_y=True)
@@ -383,8 +381,8 @@ def test_regression():
 
 
 def run_housing_rf_regression(tree_method):
-    from sklearn.metrics import mean_squared_error
     from sklearn.datasets import fetch_california_housing
+    from sklearn.metrics import mean_squared_error
     from sklearn.model_selection import KFold
 
     X, y = fetch_california_housing(return_X_y=True)
@@ -407,8 +405,8 @@ def test_rf_regression():
 
 
 def test_parameter_tuning():
-    from sklearn.model_selection import GridSearchCV
     from sklearn.datasets import fetch_california_housing
+    from sklearn.model_selection import GridSearchCV
 
     X, y = fetch_california_housing(return_X_y=True)
     xgb_model = xgb.XGBRegressor(learning_rate=0.1)
@@ -421,8 +419,8 @@ def test_parameter_tuning():
 
 
 def test_regression_with_custom_objective():
-    from sklearn.metrics import mean_squared_error
     from sklearn.datasets import fetch_california_housing
+    from sklearn.metrics import mean_squared_error
     from sklearn.model_selection import KFold
 
     def objective_ls(y_true, y_pred):
@@ -539,8 +537,8 @@ def test_sklearn_plotting():
     import matplotlib
     matplotlib.use('Agg')
 
-    from matplotlib.axes import Axes
     from graphviz import Source
+    from matplotlib.axes import Axes
 
     ax = xgb.plot_importance(classifier)
     assert isinstance(ax, Axes)
@@ -666,8 +664,8 @@ def test_kwargs_error():
 
 
 def test_kwargs_grid_search():
-    from sklearn.model_selection import GridSearchCV
     from sklearn import datasets
+    from sklearn.model_selection import GridSearchCV
 
     params = {'tree_method': 'hist'}
     clf = xgb.XGBClassifier(n_estimators=1, learning_rate=1.0, **params)
@@ -841,9 +839,7 @@ def test_save_load_model():
 
 
 def test_RFECV():
-    from sklearn.datasets import load_diabetes
-    from sklearn.datasets import load_breast_cancer
-    from sklearn.datasets import load_iris
+    from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
     from sklearn.feature_selection import RFECV
 
     # Regression
@@ -1046,8 +1042,9 @@ def run_feature_weights(X, y, fw, tree_method, model=xgb.XGBRegressor):
         with open(model_path) as fd:
             model = json.load(fd)
 
-        parser_path = os.path.join(tm.PROJECT_ROOT, 'demo', 'json-model',
-                                   'json_parser.py')
+        parser_path = os.path.join(
+            tm.demo_dir(__file__), "json-model", "json_parser.py"
+        )
         spec = importlib.util.spec_from_file_location("JsonParser",
                                                       parser_path)
         foo = importlib.util.module_from_spec(spec)
@@ -1162,8 +1159,8 @@ def run_boost_from_prediction_multi_clasas(
 
 @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
 def test_boost_from_prediction(tree_method):
-    from sklearn.datasets import load_breast_cancer, load_iris, make_regression
     import pandas as pd
+    from sklearn.datasets import load_breast_cancer, load_iris, make_regression
 
     X, y = load_breast_cancer(return_X_y=True)
 
diff --git a/tests/python/with_omp_limit.py b/tests/python/with_omp_limit.py
index 950ec0364..856914e96 100644
--- a/tests/python/with_omp_limit.py
+++ b/tests/python/with_omp_limit.py
@@ -1,7 +1,9 @@
-import xgboost as xgb
+import sys
+
 from sklearn.datasets import make_classification
 from sklearn.metrics import roc_auc_score
-import sys
+
+import xgboost as xgb
 
 
 def run_omp(output_path: str):