Move Python testing utilities into xgboost module. (#8379)

- Add typehints. - Fixes for pylint. Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
2022-10-26 16:56:11 +08:00 · 2022-10-26 16:56:11 +08:00 · cf70864fa3
commit cf70864fa3
parent 7e53189e7c
66 changed files with 652 additions and 595 deletions
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@ -65,7 +65,7 @@ def _check_rf_callback(
        )


-_SklObjective = Optional[
+SklObjective = Optional[
    Union[str, Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]]
 ]

@ -144,7 +144,7 @@ __model_doc = f"""
        Boosting learning rate (xgb's "eta")
    verbosity : Optional[int]
        The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
-    objective : {_SklObjective}
+    objective : {SklObjective}
        Specify the learning task and the corresponding learning objective or
        a custom objective function to be used (see note below).
    booster: Optional[str]
@ -546,7 +546,7 @@ class XGBModel(XGBModelBase):
        learning_rate: Optional[float] = None,
        n_estimators: int = 100,
        verbosity: Optional[int] = None,
-        objective: _SklObjective = None,
+        objective: SklObjective = None,
        booster: Optional[str] = None,
        tree_method: Optional[str] = None,
        n_jobs: Optional[int] = None,
@ -1409,7 +1409,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
    def __init__(
        self,
        *,
-        objective: _SklObjective = "binary:logistic",
+        objective: SklObjective = "binary:logistic",
        use_label_encoder: Optional[bool] = None,
        **kwargs: Any,
    ) -> None:
@ -1712,7 +1712,7 @@ class XGBRegressor(XGBModel, XGBRegressorBase):
    # pylint: disable=missing-docstring
    @_deprecate_positional_args
    def __init__(
-        self, *, objective: _SklObjective = "reg:squarederror", **kwargs: Any
+        self, *, objective: SklObjective = "reg:squarederror", **kwargs: Any
    ) -> None:
        super().__init__(objective=objective, **kwargs)

--- a/python-package/xgboost/testing.py
+++ b/python-package/xgboost/testing.py
@ -1,64 +0,0 @@
-"""Utilities for defining Python tests."""
-
-import socket
-from platform import system
-from typing import Any, TypedDict
-
-PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
-
-
-def has_ipv6() -> bool:
-    """Check whether IPv6 is enabled on this host."""
-    # connection error in macos, still need some fixes.
-    if system() not in ("Linux", "Windows"):
-        return False
-
-    if socket.has_ipv6:
-        try:
-            with socket.socket(
-                socket.AF_INET6, socket.SOCK_STREAM
-            ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
-                server.bind(("::1", 0))
-                port = server.getsockname()[1]
-                server.listen()
-
-                client.connect(("::1", port))
-                conn, _ = server.accept()
-
-                client.sendall("abc".encode())
-                msg = conn.recv(3).decode()
-                # if the code can be executed to this point, the message should be
-                # correct.
-                assert msg == "abc"
-            return True
-        except OSError:
-            pass
-    return False
-
-
-def skip_ipv6() -> PytestSkip:
-    """PyTest skip mark for IPv6."""
-    return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
-
-
-def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
-    """Make a pytest mark for the `pytest-timeout` package.
-
-    Parameters
-    ----------
-    sec :
-        Timeout seconds.
-    enable :
-        Control whether timeout should be applied, used for debugging.
-
-    Returns
-    -------
-    pytest.mark.timeout
-    """
-    import pytest  # pylint: disable=import-error
-
-    # This is disabled for now due to regression caused by conflicts between federated
-    # learning build and the CI container environment.
-    if enable:
-        return pytest.mark.timeout(sec, *args, **kwargs)
-    return pytest.mark.timeout(None, *args, **kwargs)
--- a/python-package/xgboost/testing/init.py
+++ b/python-package/xgboost/testing/init.py
@ -1,192 +1,190 @@
-from concurrent.futures import ThreadPoolExecutor
-import os
+"""Utilities for defining Python tests. The module is private and subject to frequent
+change without notice.
+
+"""
+# pylint: disable=invalid-name,missing-function-docstring,import-error
+import gc
+import importlib.util
 import multiprocessing
-from typing import Tuple, Union, List, Sequence, Callable
+import os
+import platform
+import socket
+import sys
 import urllib
 import zipfile
-import sys
-from typing import Optional, Dict, Any
+from concurrent.futures import ThreadPoolExecutor
 from contextlib import contextmanager
 from io import StringIO
-from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
-import pytest
-import gc
-import xgboost as xgb
-from xgboost.core import ArrayLike
-import numpy as np
-from scipy import sparse
-import platform
+from platform import system
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    TypedDict,
+    Union,
+)

-hypothesis = pytest.importorskip('hypothesis')
-sklearn = pytest.importorskip('sklearn')
+import numpy as np
+import pytest
+from scipy import sparse
+from xgboost.core import ArrayLike
+from xgboost.sklearn import SklObjective
+
+import xgboost as xgb
+
+hypothesis = pytest.importorskip("hypothesis")
+
+# pylint:disable=wrong-import-position,wrong-import-order
 from hypothesis import strategies
 from hypothesis.extra.numpy import arrays
-from joblib import Memory
-from sklearn import datasets

+joblib = pytest.importorskip("joblib")
+datasets = pytest.importorskip("sklearn.datasets")
+
+Memory = joblib.Memory
+
+memory = Memory("./cachedir", verbose=0)
+
+PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
+
+
+def has_ipv6() -> bool:
+    """Check whether IPv6 is enabled on this host."""
+    # connection error in macos, still need some fixes.
+    if system() not in ("Linux", "Windows"):
+        return False
+
+    if socket.has_ipv6:
        try:
-    import cupy as cp
-except ImportError:
-    cp = None
+            with socket.socket(
+                socket.AF_INET6, socket.SOCK_STREAM
+            ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
+                server.bind(("::1", 0))
+                port = server.getsockname()[1]
+                server.listen()

-memory = Memory('./cachedir', verbose=0)
+                client.connect(("::1", port))
+                conn, _ = server.accept()
+
+                client.sendall("abc".encode())
+                msg = conn.recv(3).decode()
+                # if the code can be executed to this point, the message should be
+                # correct.
+                assert msg == "abc"
+            return True
+        except OSError:
+            pass
+    return False


-def no_ubjson():
-    reason = "ubjson is not intsalled."
-    try:
-        import ubjson           # noqa
-        return {"condition": False, "reason": reason}
-    except ImportError:
-        return {"condition": True, "reason": reason}
+def no_mod(name: str) -> PytestSkip:
+    spec = importlib.util.find_spec(name)
+    return {"condition": spec is None, "reason": f"{name} is not installed."}


-def no_sklearn():
-    return {'condition': not SKLEARN_INSTALLED,
-            'reason': 'Scikit-Learn is not installed'}
+def no_ipv6() -> PytestSkip:
+    """PyTest skip mark for IPv6."""
+    return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}


-def no_dask():
-    try:
-        import pkg_resources
-
-        pkg_resources.get_distribution("dask")
-        DASK_INSTALLED = True
-    except pkg_resources.DistributionNotFound:
-        DASK_INSTALLED = False
-    return {"condition": not DASK_INSTALLED, "reason": "Dask is not installed"}
+def no_ubjson() -> PytestSkip:
+    return no_mod("ubjson")


-def no_spark():
-    try:
-        import pyspark          # noqa
-        SPARK_INSTALLED = True
-    except ImportError:
-        SPARK_INSTALLED = False
-    return {"condition": not SPARK_INSTALLED, "reason": "Spark is not installed"}
+def no_sklearn() -> PytestSkip:
+    return no_mod("sklearn")


-def no_pandas():
-    return {'condition': not PANDAS_INSTALLED,
-            'reason': 'Pandas is not installed.'}
+def no_dask() -> PytestSkip:
+    return no_mod("dask")


-def no_arrow():
-    reason = "pyarrow is not installed"
-    try:
-        import pyarrow  # noqa
-        return {"condition": False, "reason": reason}
-    except ImportError:
-        return {"condition": True, "reason": reason}
+def no_spark() -> PytestSkip:
+    return no_mod("pyspark")


-def no_modin():
-    reason = 'Modin is not installed.'
-    try:
-        import modin.pandas as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_pandas() -> PytestSkip:
+    return no_mod("pandas")


-def no_dt():
-    import importlib.util
-    spec = importlib.util.find_spec('datatable')
-    return {'condition': spec is None,
-            'reason': 'Datatable is not installed.'}
+def no_arrow() -> PytestSkip:
+    return no_mod("pyarrow")


-def no_matplotlib():
-    reason = 'Matplotlib is not installed.'
+def no_modin() -> PytestSkip:
+    return no_mod("modin")
+
+
+def no_dt() -> PytestSkip:
+    return no_mod("datatable")
+
+
+def no_matplotlib() -> PytestSkip:
+    reason = "Matplotlib is not installed."
    try:
        import matplotlib.pyplot as _  # noqa
-        return {'condition': False,
-                'reason': reason}
+
+        return {"condition": False, "reason": reason}
    except ImportError:
-        return {'condition': True,
-                'reason': reason}
+        return {"condition": True, "reason": reason}


-def no_dask_cuda():
-    reason = 'dask_cuda is not installed.'
-    try:
-        import dask_cuda as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_dask_cuda() -> PytestSkip:
+    return no_mod("dask_cuda")


-def no_cudf():
-    try:
-        import cudf  # noqa
-        CUDF_INSTALLED = True
-    except ImportError:
-        CUDF_INSTALLED = False
-
-    return {'condition': not CUDF_INSTALLED,
-            'reason': 'CUDF is not installed'}
+def no_cudf() -> PytestSkip:
+    return no_mod("cudf")


-def no_cupy():
-    reason = 'cupy is not installed.'
-    try:
-        import cupy as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_cupy() -> PytestSkip:
+    return no_mod("cupy")


-def no_dask_cudf():
-    reason = 'dask_cudf is not installed.'
-    try:
-        import dask_cudf as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_dask_cudf() -> PytestSkip:
+    return no_mod("dask_cudf")


-def no_json_schema():
-    reason = 'jsonschema is not installed'
-    try:
-        import jsonschema  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_json_schema() -> PytestSkip:
+    return no_mod("jsonschema")


-def no_graphviz():
-    reason = 'graphviz is not installed'
-    try:
-        import graphviz  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
+def no_graphviz() -> PytestSkip:
+    return no_mod("graphviz")


-def no_multiple(*args):
+def no_multiple(*args: Any) -> PytestSkip:
    condition = False
-    reason = ''
+    reason = ""
    for arg in args:
-        condition = (condition or arg['condition'])
-        if arg['condition']:
-            reason = arg['reason']
+        condition = condition or arg["condition"]
+        if arg["condition"]:
+            reason = arg["reason"]
            break
-    return {'condition': condition, 'reason': reason}
+    return {"condition": condition, "reason": reason}


-def skip_s390x():
+def skip_s390x() -> PytestSkip:
    condition = platform.machine() == "s390x"
    reason = "Known to fail on s390x"
    return {"condition": condition, "reason": reason}


 class IteratorForTest(xgb.core.DataIter):
+    """Iterator for testing streaming DMatrix. (external memory, quantile)"""
+
    def __init__(
        self,
        X: Sequence,
        y: Sequence,
        w: Optional[Sequence],
-        cache: Optional[str] = "./"
+        cache: Optional[str] = "./",
    ) -> None:
        assert len(X) == len(y)
        self.X = X
@ -242,7 +240,7 @@ def make_batches(
        rng = cupy.random.RandomState(1994)
    else:
        rng = np.random.RandomState(1994)
-    for i in range(n_batches):
+    for _ in range(n_batches):
        _X = rng.randn(n_samples_per_batch, n_features)
        _y = rng.randn(n_samples_per_batch)
        _w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
@ -259,7 +257,7 @@ def make_batches_sparse(
    y = []
    w = []
    rng = np.random.RandomState(1994)
-    for i in range(n_batches):
+    for _ in range(n_batches):
        _X = sparse.random(
            n_samples_per_batch,
            n_features,
@ -276,8 +274,9 @@ def make_batches_sparse(
    return X, y, w


-# Contains a dataset in numpy format as well as the relevant objective and metric
 class TestDataset:
+    """Contains a dataset in numpy format as well as the relevant objective and metric."""
+
    def __init__(
        self, name: str, get_dataset: Callable, objective: str, metric: str
    ) -> None:
@ -289,18 +288,24 @@ class TestDataset:
        self.margin: Optional[np.ndarray] = None

    def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]:
-        params_in['objective'] = self.objective
-        params_in['eval_metric'] = self.metric
+        params_in["objective"] = self.objective
+        params_in["eval_metric"] = self.metric
        if self.objective == "multi:softmax":
            params_in["num_class"] = int(np.max(self.y) + 1)
        return params_in

    def get_dmat(self) -> xgb.DMatrix:
        return xgb.DMatrix(
-            self.X, self.y, self.w, base_margin=self.margin, enable_categorical=True
+            self.X,
+            self.y,
+            weight=self.w,
+            base_margin=self.margin,
+            enable_categorical=True,
        )

    def get_device_dmat(self) -> xgb.DeviceQuantileDMatrix:
+        import cupy as cp
+
        w = None if self.w is None else cp.array(self.w)
        X = cp.array(self.X, dtype=np.float32)
        y = cp.array(self.y, dtype=np.float32)
@ -334,25 +339,24 @@ class TestDataset:


@memory.cache
-def get_california_housing():
+def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
    data = datasets.fetch_california_housing()
    return data.data, data.target


@memory.cache
-def get_digits():
+def get_digits() -> Tuple[np.ndarray, np.ndarray]:
    data = datasets.load_digits()
    return data.data, data.target


@memory.cache
-def get_cancer():
-    data = datasets.load_breast_cancer()
-    return data.data, data.target
+def get_cancer() -> Tuple[np.ndarray, np.ndarray]:
+    return datasets.load_breast_cancer(return_X_y=True)


@memory.cache
-def get_sparse():
+def get_sparse() -> Tuple[np.ndarray, np.ndarray]:
    rng = np.random.RandomState(199)
    n = 2000
    sparsity = 0.75
@ -366,7 +370,7 @@ def get_sparse():


@memory.cache
-def get_ames_housing():
+def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]:
    """
    Number of samples: 1460
    Number of features: 20
@ -374,9 +378,10 @@ def get_ames_housing():
    Number of numerical features: 10
    """
    from sklearn.datasets import fetch_openml
+
    X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True)

-    categorical_columns_subset: list[str] = [
+    categorical_columns_subset: List[str] = [
        "BldgType",  # 5 cats, no nan
        "GarageFinish",  # 3 cats, nan
        "LotConfig",  # 5 cats, no nan
@ -389,7 +394,7 @@ def get_ames_housing():
        "PoolQC",  # 3 cats, nan
    ]

-    numerical_columns_subset: list[str] = [
+    numerical_columns_subset: List[str] = [
        "3SsnPorch",
        "Fireplaces",
        "BsmtHalfBath",
@ -408,32 +413,70 @@ def get_ames_housing():


@memory.cache
-def get_mq2008(dpath):
+def get_mq2008(
+    dpath: str,
+) -> Tuple[
+    sparse.csr_matrix,
+    np.ndarray,
+    np.ndarray,
+    sparse.csr_matrix,
+    np.ndarray,
+    np.ndarray,
+    sparse.csr_matrix,
+    np.ndarray,
+    np.ndarray,
+]:
    from sklearn.datasets import load_svmlight_files

-    src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
-    target = dpath + '/MQ2008.zip'
+    src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
+    target = dpath + "/MQ2008.zip"
    if not os.path.exists(target):
        urllib.request.urlretrieve(url=src, filename=target)

-    with zipfile.ZipFile(target, 'r') as f:
+    with zipfile.ZipFile(target, "r") as f:
        f.extractall(path=dpath)

-    (x_train, y_train, qid_train, x_test, y_test, qid_test,
-     x_valid, y_valid, qid_valid) = load_svmlight_files(
-         (dpath + "MQ2008/Fold1/train.txt",
+    (
+        x_train,
+        y_train,
+        qid_train,
+        x_test,
+        y_test,
+        qid_test,
+        x_valid,
+        y_valid,
+        qid_valid,
+    ) = load_svmlight_files(
+        (
+            dpath + "MQ2008/Fold1/train.txt",
            dpath + "MQ2008/Fold1/test.txt",
-          dpath + "MQ2008/Fold1/vali.txt"),
-         query_id=True, zero_based=False)
+            dpath + "MQ2008/Fold1/vali.txt",
+        ),
+        query_id=True,
+        zero_based=False,
+    )

-    return (x_train, y_train, qid_train, x_test, y_test, qid_test,
-            x_valid, y_valid, qid_valid)
+    return (
+        x_train,
+        y_train,
+        qid_train,
+        x_test,
+        y_test,
+        qid_test,
+        x_valid,
+        y_valid,
+        qid_valid,
+    )


@memory.cache
 def make_categorical(
-    n_samples: int, n_features: int, n_categories: int, onehot: bool, sparsity=0.0,
-):
+    n_samples: int,
+    n_features: int,
+    n_categories: int,
+    onehot: bool,
+    sparsity: float = 0.0,
+) -> Tuple[ArrayLike, np.ndarray]:
    import pandas as pd

    rng = np.random.RandomState(1994)
@ -457,7 +500,9 @@ def make_categorical(

    if sparsity > 0.0:
        for i in range(n_features):
-            index = rng.randint(low=0, high=n_samples-1, size=int(n_samples * sparsity))
+            index = rng.randint(
+                low=0, high=n_samples - 1, size=int(n_samples * sparsity)
+            )
            df.iloc[index, i] = np.NaN
            assert n_categories == np.unique(df.dtypes[i].categories).size

@ -466,9 +511,9 @@ def make_categorical(
    return df, label


-def _cat_sampled_from():
+def _cat_sampled_from() -> strategies.SearchStrategy:
    @strategies.composite
-    def _make_cat(draw):
+    def _make_cat(draw: Callable) -> Tuple[int, int, int, float]:
        n_samples = draw(strategies.integers(2, 512))
        n_features = draw(strategies.integers(1, 4))
        n_cats = draw(strategies.integers(1, 128))
@ -483,7 +528,7 @@ def _cat_sampled_from():
        )
        return n_samples, n_features, n_cats, sparsity

-    def _build(args):
+    def _build(args: Tuple[int, int, int, float]) -> TestDataset:
        n_samples = args[0]
        n_features = args[1]
        n_cats = args[2]
@ -495,12 +540,13 @@ def _cat_sampled_from():
            "rmse",
        )

-    return _make_cat().map(_build)
+    return _make_cat().map(_build)  # pylint: disable=no-member


-categorical_dataset_strategy = _cat_sampled_from()
+categorical_dataset_strategy: strategies.SearchStrategy = _cat_sampled_from()


+# pylint: disable=too-many-locals
@memory.cache
 def make_sparse_regression(
    n_samples: int, n_features: int, sparsity: float, as_dense: bool
@ -530,8 +576,7 @@ def make_sparse_regression(

    # Use multi-thread to speed up the generation, convenient if you use this function
    # for benchmarking.
-    n_threads = multiprocessing.cpu_count()
-    n_threads = min(n_threads, n_features)
+    n_threads = min(multiprocessing.cpu_count(), n_features)

    def random_csc(t_id: int) -> sparse.csc_matrix:
        rng = np.random.default_rng(1994 * t_id)
@ -653,7 +698,7 @@ _unweighted_datasets_strategy = strategies.sampled_from(


@strategies.composite
-def _dataset_weight_margin(draw):
+def _dataset_weight_margin(draw: Callable) -> TestDataset:
    data: TestDataset = draw(_unweighted_datasets_strategy)
    if draw(strategies.booleans()):
        data.w = draw(
@ -673,6 +718,7 @@ def _dataset_weight_margin(draw):
                elements=strategies.floats(0.5, 1.0),
            )
        )
+        assert data.margin is not None
        if num_class != 1:
            data.margin = data.margin.reshape(data.y.shape[0], num_class)

@ -684,24 +730,24 @@ def _dataset_weight_margin(draw):
 dataset_strategy = _dataset_weight_margin()


-def non_increasing(L, tolerance=1e-4):
+def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:
    return all((y - x) < tolerance for x, y in zip(L, L[1:]))


-def eval_error_metric(predt, dtrain: xgb.DMatrix):
+def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
    """Evaluation metric for xgb.train"""
    label = dtrain.get_label()
    r = np.zeros(predt.shape)
    gt = predt > 0.5
    if predt.size == 0:
-        return "CustomErr", 0
+        return "CustomErr", np.float64(0.0)
    r[gt] = 1 - label[gt]
    le = predt <= 0.5
    r[le] = label[le]
-    return 'CustomErr', np.sum(r)
+    return "CustomErr", np.sum(r)


-def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> float:
+def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> np.float64:
    """Evaluation metric that looks like metrics provided by sklearn."""
    r = np.zeros(y_score.shape)
    gt = y_score > 0.5
@ -717,13 +763,15 @@ def root_mean_square(y_true: np.ndarray, y_score: np.ndarray) -> float:
    return rmse


-def softmax(x):
+def softmax(x: np.ndarray) -> np.ndarray:
    e = np.exp(x)
    return e / np.sum(e)


-def softprob_obj(classes):
-    def objective(labels, predt):
+def softprob_obj(classes: int) -> SklObjective:
+    def objective(
+        labels: np.ndarray, predt: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray]:
        rows = labels.shape[0]
        grad = np.zeros((rows, classes), dtype=float)
        hess = np.zeros((rows, classes), dtype=float)
@ -746,29 +794,33 @@ def softprob_obj(classes):


 class DirectoryExcursion:
-    def __init__(self, path: os.PathLike, cleanup=False):
-        '''Change directory.  Change back and optionally cleaning up the directory when exit.
+    """Change directory.  Change back and optionally cleaning up the directory when
+    exit.

-        '''
+    """
+
+    def __init__(self, path: os.PathLike, cleanup: bool = False):
        self.path = path
        self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))
        self.cleanup = cleanup
-        self.files = {}
+        self.files: Set[str] = set()

-    def __enter__(self):
+    def __enter__(self) -> None:
        os.chdir(self.path)
        if self.cleanup:
            self.files = {
                os.path.join(root, f)
-                for root, subdir, files in os.walk(self.path) for f in files
+                for root, subdir, files in os.walk(os.path.expanduser(self.path))
+                for f in files
            }

-    def __exit__(self, *args):
+    def __exit__(self, *args: Any) -> None:
        os.chdir(self.curdir)
        if self.cleanup:
            files = {
                os.path.join(root, f)
-                for root, subdir, files in os.walk(self.path) for f in files
+                for root, subdir, files in os.walk(os.path.expanduser(self.path))
+                for f in files
            }
            diff = files.difference(self.files)
            for f in diff:
@ -776,7 +828,7 @@ class DirectoryExcursion:


@contextmanager
-def captured_output():
+def captured_output() -> Generator[Tuple[StringIO, StringIO], None, None]:
    """Reassign stdout temporarily in order to test printed statements
    Taken from:
    https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
@ -793,14 +845,46 @@ def captured_output():
        sys.stdout, sys.stderr = old_out, old_err


-try:
-    # Python 3.7+
-    from contextlib import nullcontext as noop_context
-except ImportError:
-    # Python 3.6
-    from contextlib import suppress as noop_context
+def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
+    """Make a pytest mark for the `pytest-timeout` package.
+
+    Parameters
+    ----------
+    sec :
+        Timeout seconds.
+    enable :
+        Control whether timeout should be applied, used for debugging.
+
+    Returns
+    -------
+    pytest.mark.timeout
+    """
+
+    if enable:
+        return pytest.mark.timeout(sec, *args, **kwargs)
+    return pytest.mark.timeout(None, *args, **kwargs)


-CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
-PROJECT_ROOT = os.path.normpath(
-    os.path.join(CURDIR, os.path.pardir, os.path.pardir))
+def demo_dir(path: str) -> str:
+    """Look for the demo directory based on the test file name."""
+    path = normpath(os.path.dirname(path))
+    while True:
+        subdirs = [f.path for f in os.scandir(path) if f.is_dir()]
+        subdirs = [os.path.basename(d) for d in subdirs]
+        if "demo" in subdirs:
+            return os.path.join(path, "demo")
+        new_path = normpath(os.path.join(path, os.path.pardir))
+        assert new_path != path
+        path = new_path
+
+
+def normpath(path: str) -> str:
+    return os.path.normpath(os.path.abspath(path))
+
+
+def data_dir(path: str) -> str:
+    return os.path.join(demo_dir(path), "data")
+
+
+def project_root(path: str) -> str:
+    return normpath(os.path.join(demo_dir(path), os.path.pardir))
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@ -121,12 +121,14 @@ if __name__ == "__main__":
                "python-package/xgboost/sklearn.py",
                "python-package/xgboost/spark",
                "python-package/xgboost/federated.py",
-                "python-package/xgboost/testing.py",
+                "python-package/xgboost/testing",
                # tests
                "tests/python/test_config.py",
+                "tests/python/test_data_iterator.py",
                "tests/python/test_spark/",
                "tests/python/test_quantile_dmatrix.py",
                "tests/python-gpu/test_gpu_spark/",
+                "tests/python-gpu/test_gpu_data_iterator.py",
                "tests/ci_build/lint_python.py",
                # demo
                "demo/guide-python/cat_in_the_dat.py",
--- a/tests/python-gpu/conftest.py
+++ b/tests/python-gpu/conftest.py
@ -1,9 +1,7 @@
-import sys
 import pytest
-import logging

-sys.path.append("tests/python")
-import testing as tm                          # noqa
+from xgboost import testing as tm  # noqa
+

 def has_rmm():
    try:
@ -34,8 +32,8 @@ def local_cuda_client(request, pytestconfig):
        kwargs['rmm_pool_size'] = '2GB'
    if tm.no_dask_cuda()['condition']:
        raise ImportError('The local_cuda_cluster fixture requires dask_cuda package')
-    from dask_cuda import LocalCUDACluster
    from dask.distributed import Client
+    from dask_cuda import LocalCUDACluster
    yield Client(LocalCUDACluster(**kwargs))

 def pytest_addoption(parser):
--- a/tests/python-gpu/load_pickle.py
+++ b/tests/python-gpu/load_pickle.py
@ -1,16 +1,14 @@
 '''Loading a pickled model generated by test_pickling.py, only used by
 `test_gpu_with_dask.py`'''
-import os
-import numpy as np
-import xgboost as xgb
 import json
+import os
+
+import numpy as np
 import pytest
-import sys
+from test_gpu_pickling import build_dataset, load_pickle, model_path

-from test_gpu_pickling import build_dataset, model_path, load_pickle
-
-sys.path.append("tests/python")
-import testing as tm
+import xgboost as xgb
+from xgboost import testing as tm


 class TestLoadPickle:
--- a/tests/python-gpu/test_device_quantile_dmatrix.py
+++ b/tests/python-gpu/test_device_quantile_dmatrix.py
@ -5,10 +5,10 @@ import pytest
 from hypothesis import given, settings, strategies

 import xgboost as xgb
+from xgboost import testing as tm

 sys.path.append("tests/python")
 import test_quantile_dmatrix as tqd
-import testing as tm


 class TestDeviceQuantileDMatrix:
--- a/tests/python-gpu/test_from_cudf.py
+++ b/tests/python-gpu/test_from_cudf.py
@ -2,11 +2,12 @@ import json
 import sys

 import numpy as np
-import xgboost as xgb
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
-import testing as tm
 from test_dmatrix import set_base_margin_info


@ -85,8 +86,8 @@ def _test_from_cudf(DMatrixT):


 def _test_cudf_training(DMatrixT):
-    from cudf import DataFrame as df
    import pandas as pd
+    from cudf import DataFrame as df
    np.random.seed(1)
    X = pd.DataFrame(np.random.randn(50, 10))
    y = pd.DataFrame(np.random.randn(50))
@ -109,8 +110,8 @@ def _test_cudf_training(DMatrixT):


 def _test_cudf_metainfo(DMatrixT):
-    from cudf import DataFrame as df
    import pandas as pd
+    from cudf import DataFrame as df
    n = 100
    X = np.random.random((n, 2))
    dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))
@ -247,9 +248,9 @@ Arrow specification.'''
@pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.skipif(**tm.no_pandas())
 def test_cudf_training_with_sklearn():
+    import pandas as pd
    from cudf import DataFrame as df
    from cudf import Series as ss
-    import pandas as pd
    np.random.seed(1)
    X = pd.DataFrame(np.random.randn(50, 10))
    y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@ -1,12 +1,15 @@
-import numpy as np
-import xgboost as xgb
 import sys
+
+import numpy as np
 import pytest

+import xgboost as xgb
+
 sys.path.append("tests/python")
-import testing as tm
 from test_dmatrix import set_base_margin_info

+from xgboost import testing as tm
+

 def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
    '''Test constructing DMatrix from cupy'''
--- a/tests/python-gpu/test_gpu_basic_models.py
+++ b/tests/python-gpu/test_gpu_basic_models.py
@ -1,13 +1,18 @@
-import sys
 import os
+import sys
+
 import numpy as np
-import xgboost as xgb
 import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
+import test_basic_models as test_bm
+
 # Don't import the test class, otherwise they will run twice.
 import test_callback as test_cb  # noqa
-import test_basic_models as test_bm
-import testing as tm
+
 rng = np.random.RandomState(1994)


--- a/tests/python-gpu/test_gpu_data_iterator.py
+++ b/tests/python-gpu/test_gpu_data_iterator.py
@ -1,13 +1,12 @@
-import numpy as np
-import xgboost as xgb
-from hypothesis import given, strategies, settings
-import pytest
 import sys

+import pytest
+from hypothesis import given, settings, strategies
+from xgboost.testing import no_cupy
+
 sys.path.append("tests/python")
-from test_data_iterator import test_single_batch as cpu_single_batch
 from test_data_iterator import run_data_iterator
-from testing import no_cupy
+from test_data_iterator import test_single_batch as cpu_single_batch


 def test_gpu_single_batch() -> None:
@ -24,7 +23,11 @@ def test_gpu_single_batch() -> None:
 )
@settings(deadline=None, max_examples=10, print_blob=True)
 def test_gpu_data_iterator(
-    n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool
+    n_samples_per_batch: int,
+    n_features: int,
+    n_batches: int,
+    subsample: bool,
+    use_cupy: bool,
 ) -> None:
    run_data_iterator(
        n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy
--- a/tests/python-gpu/test_gpu_demos.py
+++ b/tests/python-gpu/test_gpu_demos.py
@ -1,9 +1,12 @@
 import os
 import subprocess
 import sys
+
 import pytest
+
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
-import testing as tm
 import test_demos as td  # noqa


@ -31,6 +34,6 @@ def test_categorical_demo():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
 def test_dask_training():
-    script = os.path.join(tm.PROJECT_ROOT, 'demo', 'dask', 'gpu_training.py')
+    script = os.path.join(tm.demo_dir(__file__), 'dask', 'gpu_training.py')
    cmd = ['python', script]
    subprocess.check_call(cmd)
--- a/tests/python-gpu/test_gpu_eval_metrics.py
+++ b/tests/python-gpu/test_gpu_eval_metrics.py
@ -1,7 +1,9 @@
 import sys
-import xgboost
+
 import pytest

+import xgboost
+
 sys.path.append("tests/python")
 import test_eval_metrics as test_em  # noqa

--- a/tests/python-gpu/test_gpu_interaction_constraints.py
+++ b/tests/python-gpu/test_gpu_interaction_constraints.py
@ -1,8 +1,11 @@
-import numpy as np
 import sys
+
+import numpy as np
+
 sys.path.append("tests/python")
 # Don't import the test class, otherwise they will run twice.
 import test_interaction_constraints as test_ic  # noqa
+
 rng = np.random.RandomState(1994)


--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@ -1,15 +1,10 @@
-import sys
-
 import pytest
 from hypothesis import assume, given, note, settings, strategies

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

-sys.path.append("tests/python")
-import testing as tm
-
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)

 parameter_strategy = strategies.fixed_dictionaries({
    'booster': strategies.just('gblinear'),
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@ -3,20 +3,17 @@ import json
 import os
 import pickle
 import subprocess
-import sys

 import numpy as np
 import pytest

 import xgboost as xgb
-from xgboost import XGBClassifier, testing
-
-sys.path.append("tests/python")
-import testing as tm
+from xgboost import XGBClassifier
+from xgboost import testing as tm

 model_path = './model.pkl'

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)


 def build_dataset():
--- a/tests/python-gpu/test_gpu_plotting.py
+++ b/tests/python-gpu/test_gpu_plotting.py
@ -1,10 +1,11 @@
 import sys
+
 import pytest

-sys.path.append("tests/python")
-import testing as tm
-import test_plotting as tp
+from xgboost import testing as tm

+sys.path.append("tests/python")
+import test_plotting as tp

 pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))

--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@ -6,7 +6,7 @@ from hypothesis import assume, given, settings, strategies
 from xgboost.compat import PANDAS_INSTALLED

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 if PANDAS_INSTALLED:
    from hypothesis.extra.pandas import column, data_frames, range_indexes
@ -16,7 +16,6 @@ else:
    column, data_frames, range_indexes = noop, noop, noop

 sys.path.append("tests/python")
-import testing as tm
 from test_predict import run_predict_leaf  # noqa
 from test_predict import run_threaded_predict  # noqa

@ -33,7 +32,7 @@ predict_parameter_strategy = strategies.fixed_dictionaries({
    'num_parallel_tree': strategies.sampled_from([1, 4]),
 })

-pytestmark = testing.timeout(20)
+pytestmark = tm.timeout(20)


 class TestGPUPredict:
@ -227,8 +226,8 @@ class TestGPUPredict:
    @pytest.mark.skipif(**tm.no_cupy())
    @pytest.mark.skipif(**tm.no_cudf())
    def test_inplace_predict_cudf(self):
-        import cupy as cp
        import cudf
+        import cupy as cp
        import pandas as pd
        rows = 1000
        cols = 10
@ -379,8 +378,8 @@ class TestGPUPredict:
    @pytest.mark.skipif(**tm.no_cupy())
    @pytest.mark.parametrize("n_classes", [2, 3])
    def test_predict_dart(self, n_classes):
-        from sklearn.datasets import make_classification
        import cupy as cp
+        from sklearn.datasets import make_classification
        n_samples = 1000
        X_, y_ = make_classification(
            n_samples=n_samples, n_informative=5, n_classes=n_classes
--- a/tests/python-gpu/test_gpu_ranking.py
+++ b/tests/python-gpu/test_gpu_ranking.py
@ -1,20 +1,15 @@
 import itertools
 import os
 import shutil
-import sys
 import urllib.request
 import zipfile

 import numpy as np

 import xgboost
-from xgboost import testing
+from xgboost import testing as tm

-sys.path.append("tests/python")
-
-import testing as tm  # noqa
-
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)


 class TestRanking:
@ -24,8 +19,9 @@ class TestRanking:
        Download and setup the test fixtures
        """
        from sklearn.datasets import load_svmlight_files
+
        # download the test data
-        cls.dpath = os.path.join(tm.PROJECT_ROOT, "demo/rank/")
+        cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
        src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
        target = os.path.join(cls.dpath, "MQ2008.zip")

--- a/tests/python-gpu/test_gpu_spark/test_data.py
+++ b/tests/python-gpu/test_gpu_spark/test_data.py
@ -1,13 +1,8 @@
 import sys
-from typing import List

-import numpy as np
-import pandas as pd
 import pytest

-sys.path.append("tests/python")
-
-import testing as tm
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
@ -15,6 +10,7 @@ if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)


+sys.path.append("tests/python")
 from test_spark.test_data import run_dmatrix_ctor


--- a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
+++ b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
@ -6,8 +6,7 @@ import sys
 import pytest
 import sklearn

-sys.path.append("tests/python")
-import testing as tm
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
--- a/tests/python-gpu/test_gpu_training_continuation.py
+++ b/tests/python-gpu/test_gpu_training_continuation.py
@ -1,7 +1,9 @@
-import numpy as np
-import xgboost as xgb
 import json

+import numpy as np
+
+import xgboost as xgb
+
 rng = np.random.RandomState(1994)


--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@ -6,13 +6,12 @@ import pytest
 from hypothesis import assume, given, note, settings, strategies

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 sys.path.append("tests/python")
 import test_updaters as test_up
-import testing as tm

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)

 parameter_strategy = strategies.fixed_dictionaries({
    'max_depth': strategies.integers(0, 11),
--- a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
@ -1,52 +1,54 @@
 """Copyright 2019-2022 XGBoost contributors"""
-import sys
-import os
-from typing import Type, TypeVar, Any, Dict, List, Union
-import pytest
-import numpy as np
 import asyncio
-import xgboost
+import os
 import subprocess
+import sys
 from collections import OrderedDict
 from inspect import signature
-from hypothesis import given, strategies, settings, note
+from typing import Any, Dict, Type, TypeVar, Union
+
+import numpy as np
+import pytest
+from hypothesis import given, note, settings, strategies
 from hypothesis._settings import duration
 from test_gpu_updaters import parameter_strategy

+import xgboost
+from xgboost import testing as tm
+
 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)

 sys.path.append("tests/python")
-import testing as tm  # noqa

 if tm.no_dask_cuda()["condition"]:
    pytest.skip(tm.no_dask_cuda()["reason"], allow_module_level=True)


-from test_with_dask import run_empty_dmatrix_reg  # noqa
-from test_with_dask import run_empty_dmatrix_auc  # noqa
+from test_with_dask import _get_client_workers  # noqa
+from test_with_dask import generate_array  # noqa
+from test_with_dask import make_categorical  # noqa
 from test_with_dask import run_auc  # noqa
 from test_with_dask import run_boost_from_prediction  # noqa
 from test_with_dask import run_boost_from_prediction_multi_class  # noqa
-from test_with_dask import run_dask_classifier  # noqa
-from test_with_dask import run_empty_dmatrix_cls  # noqa
-from test_with_dask import _get_client_workers  # noqa
-from test_with_dask import generate_array  # noqa
-from test_with_dask import kCols as random_cols  # noqa
-from test_with_dask import suppress  # noqa
-from test_with_dask import run_tree_stats  # noqa
 from test_with_dask import run_categorical  # noqa
-from test_with_dask import make_categorical  # noqa
-
+from test_with_dask import run_dask_classifier  # noqa
+from test_with_dask import run_empty_dmatrix_auc  # noqa
+from test_with_dask import run_empty_dmatrix_cls  # noqa
+from test_with_dask import run_empty_dmatrix_reg  # noqa
+from test_with_dask import run_tree_stats  # noqa
+from test_with_dask import suppress  # noqa
+from test_with_dask import kCols as random_cols  # noqa

 try:
-    import dask.dataframe as dd
-    from xgboost import dask as dxgb
-    import xgboost as xgb
-    from dask.distributed import Client
-    from dask import array as da
-    from dask_cuda import LocalCUDACluster, utils
    import cudf
+    import dask.dataframe as dd
+    from dask import array as da
+    from dask.distributed import Client
+    from dask_cuda import LocalCUDACluster, utils
+
+    import xgboost as xgb
+    from xgboost import dask as dxgb
 except ImportError:
    pass

@ -334,9 +336,9 @@ class TestDistributedGPU:

    @pytest.mark.skipif(**tm.no_dask_cudf())
    def test_empty_partition(self, local_cuda_client: Client) -> None:
-        import dask_cudf
        import cudf
        import cupy
+        import dask_cudf

        mult = 100
        df = cudf.DataFrame(
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@ -1,13 +1,15 @@
 import json
-import xgboost as xgb
-import pytest
-import tempfile
-import sys
-import numpy as np
 import os
+import sys
+import tempfile
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 sys.path.append("tests/python")
-import testing as tm               # noqa
 import test_with_sklearn as twskl  # noqa

 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@ -38,9 +40,9 @@ def test_gpu_binary_classification():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
 def test_boost_from_prediction_gpu_hist():
-    from sklearn.datasets import load_breast_cancer, load_digits
-    import cupy as cp
    import cudf
+    import cupy as cp
+    from sklearn.datasets import load_breast_cancer, load_digits

    tree_method = "gpu_hist"
    X, y = load_breast_cancer(return_X_y=True)
@ -68,12 +70,12 @@ def test_num_parallel_tree():
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_sklearn())
 def test_categorical():
-    import pandas as pd
    import cudf
    import cupy as cp
+    import pandas as pd
    from sklearn.datasets import load_svmlight_file

-    data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
+    data_dir = tm.data_dir(__file__)
    X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train"))
    clf = xgb.XGBClassifier(
        tree_method="gpu_hist",
@ -123,9 +125,9 @@ def test_categorical():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
 def test_classififer():
-    from sklearn.datasets import load_digits
-    import cupy as cp
    import cudf
+    import cupy as cp
+    from sklearn.datasets import load_digits

    X, y = load_digits(return_X_y=True)
    y *= 10
--- a/tests/python-gpu/test_large_input.py
+++ b/tests/python-gpu/test_large_input.py
@ -1,9 +1,9 @@
-import numpy as np
-import xgboost as xgb
 import cupy as cp
-import time
+import numpy as np
 import pytest

+import xgboost as xgb
+

 # Test for integer overflow or out of memory exceptions
 def test_large_input():
--- a/tests/python-gpu/test_monotonic_constraints.py
+++ b/tests/python-gpu/test_monotonic_constraints.py
@ -1,11 +1,12 @@
 import sys
-import numpy as np

+import numpy as np
 import pytest

 import xgboost as xgb
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
-import testing as tm
 import test_monotone_constraints as tmc

 rng = np.random.RandomState(1994)
--- a/tests/python/generate_models.py
+++ b/tests/python/generate_models.py
@ -1,7 +1,9 @@
-import xgboost
-import numpy as np
 import os

+import numpy as np
+
+import xgboost
+
 kRounds = 2
 kRows = 1000
 kCols = 4
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@ -1,12 +1,13 @@
-# -*- coding: utf-8 -*-
-import numpy as np
-import os
-import xgboost as xgb
-import pytest
 import json
-from pathlib import Path
+import os
 import tempfile
-import testing as tm
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@ -1,13 +1,15 @@
-import numpy as np
-import xgboost as xgb
-import os
 import json
-import testing as tm
-import pytest
 import locale
+import os
 import tempfile

-dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
+dpath = tm.data_dir(__file__)

 rng = np.random.RandomState(1994)

@ -36,8 +38,8 @@ class TestModels:
        param = {'verbosity': 0, 'objective': 'binary:logistic',
                 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
                 'nthread': 1}
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 4
        bst = xgb.train(param, dtrain, num_round, watchlist)
@ -49,8 +51,8 @@ class TestModels:
        assert err < 0.2

    def test_dart(self):
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        param = {'max_depth': 5, 'objective': 'binary:logistic',
                 'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1}
        # specify validations set to watch performance
@ -116,7 +118,7 @@ class TestModels:

    def test_boost_from_prediction(self):
        # Re-construct dtrain here to avoid modification
-        margined = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        margined = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        bst = xgb.train({'tree_method': 'hist'}, margined, 1)
        predt_0 = bst.predict(margined, output_margin=True)
        margined.set_base_margin(predt_0)
@ -124,13 +126,13 @@ class TestModels:
        predt_1 = bst.predict(margined)

        assert np.any(np.abs(predt_1 - predt_0) > 1e-6)
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        bst = xgb.train({'tree_method': 'hist'}, dtrain, 2)
        predt_2 = bst.predict(dtrain)
        assert np.all(np.abs(predt_2 - predt_1) < 1e-6)

    def test_boost_from_existing_model(self):
-        X = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        X = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4)
        assert booster.num_boosted_rounds() == 4
        booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4,
@ -150,8 +152,8 @@ class TestModels:
            'objective': 'reg:logistic',
            "tree_method": tree_method
        }
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 10

@ -197,8 +199,8 @@ class TestModels:
        self.run_custom_objective()

    def test_multi_eval_metric(self):
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1,
                 'objective': 'binary:logistic'}
@ -220,7 +222,7 @@ class TestModels:
            param['scale_pos_weight'] = ratio
            return (dtrain, dtest, param)

-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        xgb.cv(param, dtrain, num_round, nfold=5,
               metrics={'auc'}, seed=0, fpreproc=fpreproc)

@ -228,7 +230,7 @@ class TestModels:
        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
                 'objective': 'binary:logistic'}
        num_round = 2
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        xgb.cv(param, dtrain, num_round, nfold=5,
               metrics={'error'}, seed=0, show_stdv=False)

@ -346,7 +348,7 @@ class TestModels:
        os.remove(model_path)

        try:
-            dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+            dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
            xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
        except ValueError as e:
            e_str = str(e)
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@ -1,9 +1,12 @@
-from typing import Union
-import xgboost as xgb
-import pytest
 import os
-import testing as tm
 import tempfile
+from contextlib import nullcontext
+from typing import Union
+
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 # We use the dataset for tests.
 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@ -271,13 +274,14 @@ class TestCallbacks:
        """Test learning rate scheduler, used by both CPU and GPU tests."""
        scheduler = xgb.callback.LearningRateScheduler

-        dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dpath = tm.data_dir(__file__)
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
+
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 4

-        warning_check = tm.noop_context()
+        warning_check = nullcontext()

        # learning_rates as a list
        # init eta with 0 to check whether learning_rates work
--- a/tests/python/test_cli.py
+++ b/tests/python/test_cli.py
@ -1,11 +1,13 @@
-import os
-import tempfile
-import platform
-import xgboost
-import subprocess
-import numpy
 import json
-import testing as tm
+import os
+import platform
+import subprocess
+import tempfile
+
+import numpy
+
+import xgboost
+from xgboost import testing as tm


 class TestCLI:
@ -29,7 +31,7 @@ data = {data_path}
 eval[test] = {data_path}
 '''

-    PROJECT_ROOT = tm.PROJECT_ROOT
+    PROJECT_ROOT = tm.project_root(__file__)

    def get_exe(self):
        if platform.system() == 'Windows':
--- a/tests/python/test_data_iterator.py
+++ b/tests/python/test_data_iterator.py
@ -1,14 +1,16 @@
+from typing import Dict, List
+
 import numpy as np
 import pytest
 from hypothesis import given, settings, strategies
 from scipy.sparse import csr_matrix
-from testing import IteratorForTest, make_batches, non_increasing
 from xgboost.data import SingleBatchInternalIter as SingleBatch
+from xgboost.testing import IteratorForTest, make_batches, non_increasing

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)


 def test_single_batch(tree_method: str = "approx") -> None:
@ -83,7 +85,7 @@ def run_data_iterator(
    if tree_method == "gpu_hist":
        parameters["sampling_method"] = "gradient_based"

-    results_from_it: xgb.callback.EvaluationMonitor.EvalsLog = {}
+    results_from_it: Dict[str, Dict[str, List[float]]] = {}
    from_it = xgb.train(
        parameters,
        Xy,
@ -106,7 +108,7 @@ def run_data_iterator(
    assert Xy.num_row() == n_samples_per_batch * n_batches
    assert Xy.num_col() == n_features

-    results_from_arrays: xgb.callback.EvaluationMonitor.EvalsLog = {}
+    results_from_arrays: Dict[str, Dict[str, List[float]]] = {}
    from_arrays = xgb.train(
        parameters,
        Xy,
--- a/tests/python/test_demos.py
+++ b/tests/python/test_demos.py
@ -3,14 +3,12 @@ import subprocess
 import sys

 import pytest
-import testing as tm

-from xgboost import testing
+from xgboost import testing as tm

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)

-ROOT_DIR = tm.PROJECT_ROOT
-DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
+DEMO_DIR = tm.demo_dir(__file__)
 PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, 'guide-python')
 CLI_DEMO_DIR = os.path.join(DEMO_DIR, 'CLI')

@ -156,7 +154,7 @@ def test_cli_regression_demo():
    cmd = ['python', script, 'machine.txt', '1']
    subprocess.check_call(cmd, cwd=reg_dir)

-    exe = os.path.join(tm.PROJECT_ROOT, 'xgboost')
+    exe = os.path.join(DEMO_DIR, os.path.pardir, 'xgboost')
    conf = os.path.join(reg_dir, 'machine.conf')
    subprocess.check_call([exe, conf], cwd=reg_dir)

--- a/tests/python/test_dmatrix.py
+++ b/tests/python/test_dmatrix.py
@ -4,11 +4,11 @@ import tempfile
 import numpy as np
 import pytest
 import scipy.sparse
-import testing as tm
 from hypothesis import given, settings, strategies
 from scipy.sparse import csr_matrix, rand

 import xgboost as xgb
+from xgboost import testing as tm

 rng = np.random.RandomState(1)

--- a/tests/python/test_dt.py
+++ b/tests/python/test_dt.py
@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
-import pytest
 import numpy as np
+import pytest

-import testing as tm
 import xgboost as xgb
+from xgboost import testing as tm

 try:
    import datatable as dt
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@ -1,8 +1,9 @@
-import xgboost as xgb
-import testing as tm
 import numpy as np
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 rng = np.random.RandomState(1994)


--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@ -1,8 +1,9 @@
-import xgboost as xgb
-import testing as tm
 import numpy as np
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 rng = np.random.RandomState(1337)


@ -254,8 +255,8 @@ class TestEvalMetrics:
        self.run_roc_auc_multi("hist", n_samples, weighted)

    def run_pr_auc_binary(self, tree_method):
-        from sklearn.metrics import precision_recall_curve, auc
        from sklearn.datasets import make_classification
+        from sklearn.metrics import auc, precision_recall_curve
        X, y = make_classification(128, 4, n_classes=2, random_state=1994)
        clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
        clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
--- a/tests/python/test_interaction_constraints.py
+++ b/tests/python/test_interaction_constraints.py
@ -1,9 +1,9 @@
-# -*- coding: utf-8 -*-
 import numpy as np
-import xgboost
-import testing as tm
 import pytest

+import xgboost
+from xgboost import testing as tm
+
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)

--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@ -1,10 +1,9 @@
-import testing as tm
 from hypothesis import given, note, settings, strategies

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)


 parameter_strategy = strategies.fixed_dictionaries({
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@ -1,12 +1,14 @@
-import xgboost
-import os
-import generate_models as gm
-import testing as tm
-import json
-import zipfile
-import pytest
 import copy
+import json
+import os
 import urllib.request
+import zipfile
+
+import generate_models as gm
+import pytest
+
+import xgboost
+from xgboost import testing as tm


 def run_model_param_check(config):
--- a/tests/python/test_monotone_constraints.py
+++ b/tests/python/test_monotone_constraints.py
@ -1,8 +1,9 @@
 import numpy as np
-import xgboost as xgb
-import testing as tm
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 dpath = 'demo/data/'


--- a/tests/python/test_openmp.py
+++ b/tests/python/test_openmp.py
@ -4,12 +4,11 @@ import tempfile

 import numpy as np
 import pytest
-import testing as tm

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)


 class TestOMP:
@ -86,7 +85,7 @@ class TestOMP:
    def test_with_omp_thread_limit(self):
        args = [
            "python", os.path.join(
-                tm.PROJECT_ROOT, "tests", "python", "with_omp_limit.py"
+                os.path.dirname(tm.normpath(__file__)), "with_omp_limit.py"
            )
        ]
        results = []
--- a/tests/python/test_parse_tree.py
+++ b/tests/python/test_parse_tree.py
@ -1,8 +1,8 @@
-import xgboost as xgb
 import numpy as np
 import pytest
-import testing as tm

+import xgboost as xgb
+from xgboost import testing as tm

 pytestmark = pytest.mark.skipif(**tm.no_pandas())

--- a/tests/python/test_pickling.py
+++ b/tests/python/test_pickling.py
@ -1,9 +1,10 @@
-import pickle
-import numpy as np
-import xgboost as xgb
-import os
 import json
+import os
+import pickle

+import numpy as np
+
+import xgboost as xgb

 kRows = 100
 kCols = 10
--- a/tests/python/test_plotting.py
+++ b/tests/python/test_plotting.py
@ -1,15 +1,16 @@
 import json
-import numpy as np
-import xgboost as xgb
-import testing as tm

+import numpy as np
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 try:
    import matplotlib
    matplotlib.use('Agg')
-    from matplotlib.axes import Axes
    from graphviz import Source
+    from matplotlib.axes import Axes
 except ImportError:
    pass

--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@ -1,12 +1,13 @@
 '''Tests for running inplace prediction.'''
 from concurrent.futures import ThreadPoolExecutor
-import numpy as np
-from scipy import sparse
-import pytest
-import pandas as pd

-import testing as tm
+import numpy as np
+import pandas as pd
+import pytest
+from scipy import sparse
+
 import xgboost as xgb
+from xgboost import testing as tm


 def run_threaded_predict(X, rows, predict_func):
--- a/tests/python/test_quantile_dmatrix.py
+++ b/tests/python/test_quantile_dmatrix.py
@ -4,7 +4,7 @@ import numpy as np
 import pytest
 from hypothesis import given, settings, strategies
 from scipy import sparse
-from testing import (
+from xgboost.testing import (
    IteratorForTest,
    make_batches,
    make_batches_sparse,
--- a/tests/python/test_ranking.py
+++ b/tests/python/test_ranking.py
@ -1,13 +1,15 @@
-import numpy as np
-from scipy.sparse import csr_matrix
-import testing as tm
-import xgboost
-import os
 import itertools
+import os
 import shutil
 import urllib.request
 import zipfile

+import numpy as np
+from scipy.sparse import csr_matrix
+
+import xgboost
+from xgboost import testing as tm
+

 def test_ranking_with_unweighted_data():
    Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])
--- a/tests/python/test_shap.py
+++ b/tests/python/test_shap.py
@ -1,11 +1,12 @@
-# -*- coding: utf-8 -*-
-import numpy as np
-import xgboost as xgb
 import itertools
 import re
+
+import numpy as np
 import scipy
 import scipy.special

+import xgboost as xgb
+
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)

--- a/tests/python/test_spark/test_data.py
+++ b/tests/python/test_spark/test_data.py
@ -4,7 +4,8 @@ from typing import List
 import numpy as np
 import pandas as pd
 import pytest
-import testing as tm
+
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@ -6,10 +6,9 @@ import uuid

 import numpy as np
 import pytest
-import testing as tm

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
@ -38,7 +37,7 @@ from .utils import SparkTestCase

 logging.getLogger("py4j").setLevel(logging.INFO)

-pytestmark = testing.timeout(60)
+pytestmark = tm.timeout(60)


 class XgboostLocalTest(SparkTestCase):
--- a/tests/python/test_spark/test_spark_local_cluster.py
+++ b/tests/python/test_spark/test_spark_local_cluster.py
@ -6,7 +6,8 @@ import uuid

 import numpy as np
 import pytest
-import testing as tm
+
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
--- a/tests/python/test_spark/utils.py
+++ b/tests/python/test_spark/utils.py
@ -6,9 +6,10 @@ import tempfile
 import unittest

 import pytest
-import testing as tm
 from six import StringIO

+from xgboost import testing as tm
+
 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
 if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
--- a/tests/python/test_survival.py
+++ b/tests/python/test_survival.py
@ -1,11 +1,13 @@
-import testing as tm
-import pytest
-import numpy as np
-import xgboost as xgb
 import json
 import os

-dpath = os.path.join(tm.PROJECT_ROOT, 'demo', 'data')
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
+dpath = tm.data_dir(__file__)


 def test_aft_survival_toy_data():
--- a/tests/python/test_tracker.py
+++ b/tests/python/test_tracker.py
@ -3,10 +3,10 @@ import sys

 import numpy as np
 import pytest
-import testing as tm

 import xgboost as xgb
-from xgboost import RabitTracker, testing
+from xgboost import RabitTracker
+from xgboost import testing as tm

 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@ -61,7 +61,7 @@ def test_rabit_ops():
            run_rabit_ops(client, n_workers)


-@pytest.mark.skipif(**testing.skip_ipv6())
+@pytest.mark.skipif(**tm.no_ipv6())
@pytest.mark.skipif(**tm.no_dask())
 def test_rabit_ops_ipv6():
    import dask
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@ -1,10 +1,11 @@
-import xgboost as xgb
-import testing as tm
-import numpy as np
-import pytest
 import os
 import tempfile

+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 rng = np.random.RandomState(1337)

--- a/tests/python/test_tree_regularization.py
+++ b/tests/python/test_tree_regularization.py
@ -1,8 +1,8 @@
 import numpy as np
-import xgboost as xgb
-
 from numpy.testing import assert_approx_equal

+import xgboost as xgb
+
 train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))


--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@ -1,11 +1,13 @@
 import json
 from string import ascii_lowercase
-from typing import Dict, Any
-import testing as tm
-import pytest
-import xgboost as xgb
+from typing import Any, Dict
+
 import numpy as np
-from hypothesis import given, strategies, settings, note
+import pytest
+from hypothesis import given, note, settings, strategies
+
+import xgboost as xgb
+from xgboost import testing as tm

 exact_parameter_strategy = strategies.fixed_dictionaries({
    'nthread': strategies.integers(1, 4),
--- a/tests/python/test_with_arrow.py
+++ b/tests/python/test_with_arrow.py
@ -1,14 +1,16 @@
-import unittest
-import pytest
-import numpy as np
-import testing as tm
-import xgboost as xgb
 import os
+import unittest
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 try:
+    import pandas as pd
    import pyarrow as pa
    import pyarrow.csv as pc
-    import pandas as pd
 except ImportError:
    pass

@ -73,7 +75,7 @@ class TestArrowTable(unittest.TestCase):
        np.testing.assert_allclose(preds1, preds2)

    def test_arrow_survival(self):
-        data = os.path.join(tm.PROJECT_ROOT, "demo", "data", "veterans_lung_cancer.csv")
+        data = os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
        table = pc.read_csv(data)
        y_lower_bound = table["Survival_label_lower_bound"]
        y_upper_bound = table["Survival_label_upper_bound"]
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@ -20,7 +20,6 @@ import numpy as np
 import pytest
 import scipy
 import sklearn
-import testing as tm
 from hypothesis import HealthCheck, given, note, settings
 from sklearn.datasets import make_classification, make_regression
 from test_predict import verify_leaf_output
@ -29,7 +28,7 @@ from test_with_sklearn import run_data_initialization, run_feature_weights
 from xgboost.data import _is_cudf_df

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@ -45,7 +44,7 @@ from xgboost.dask import DaskDMatrix

 dask.config.set({"distributed.scheduler.allowed-failures": False})

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)

 if hasattr(HealthCheck, 'function_scoped_fixture'):
    suppress = [HealthCheck.function_scoped_fixture]
@ -1116,8 +1115,9 @@ def test_predict_with_meta(client: "Client") -> None:


 def run_aft_survival(client: "Client", dmatrix_t: Type) -> None:
-    df = dd.read_csv(os.path.join(tm.PROJECT_ROOT, 'demo', 'data',
-                                  'veterans_lung_cancer.csv'))
+    df = dd.read_csv(
+        os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
+    )
    y_lower_bound = df['Survival_label_lower_bound']
    y_upper_bound = df['Survival_label_upper_bound']
    X = df.drop(['Survival_label_lower_bound',
--- a/tests/python/test_with_modin.py
+++ b/tests/python/test_with_modin.py
@ -1,10 +1,10 @@
-# -*- coding: utf-8 -*-
 import numpy as np
-import xgboost as xgb
-import testing as tm
 import pytest
 from test_dmatrix import set_base_margin_info

+import xgboost as xgb
+from xgboost import testing as tm
+
 try:
    import modin.pandas as md
 except ImportError:
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@ -1,11 +1,13 @@
 import os
 import tempfile
+
 import numpy as np
-import xgboost as xgb
-import testing as tm
 import pytest
 from test_dmatrix import set_base_margin_info

+import xgboost as xgb
+from xgboost import testing as tm
+
 try:
    import pandas as pd
 except ImportError:
--- a/tests/python/test_with_shap.py
+++ b/tests/python/test_with_shap.py
@ -1,7 +1,8 @@
 import numpy as np
-import xgboost as xgb
 import pytest

+import xgboost as xgb
+
 try:
    import shap
 except ImportError:
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@ -8,14 +8,13 @@ from typing import Callable, Optional

 import numpy as np
 import pytest
-import testing as tm
 from sklearn.utils.estimator_checks import parametrize_with_checks

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 rng = np.random.RandomState(1994)
-pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), testing.timeout(30)]
+pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)]


 def test_binary_classification():
@ -155,11 +154,10 @@ def test_ranking():


 def test_stacking_regression():
-    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_diabetes
+    from sklearn.ensemble import RandomForestRegressor, StackingRegressor
    from sklearn.linear_model import RidgeCV
-    from sklearn.ensemble import RandomForestRegressor
-    from sklearn.ensemble import StackingRegressor
+    from sklearn.model_selection import train_test_split

    X, y = load_diabetes(return_X_y=True)
    estimators = [
@ -177,13 +175,13 @@ def test_stacking_regression():


 def test_stacking_classification():
-    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_iris
-    from sklearn.svm import LinearSVC
-    from sklearn.linear_model import LogisticRegression
-    from sklearn.preprocessing import StandardScaler
-    from sklearn.pipeline import make_pipeline
    from sklearn.ensemble import StackingClassifier
+    from sklearn.linear_model import LogisticRegression
+    from sklearn.model_selection import train_test_split
+    from sklearn.pipeline import make_pipeline
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.svm import LinearSVC

    X, y = load_iris(return_X_y=True)
    estimators = [
@ -354,8 +352,8 @@ def test_num_parallel_tree():


 def test_regression():
-    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import fetch_california_housing
+    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold

    X, y = fetch_california_housing(return_X_y=True)
@ -383,8 +381,8 @@ def test_regression():


 def run_housing_rf_regression(tree_method):
-    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import fetch_california_housing
+    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold

    X, y = fetch_california_housing(return_X_y=True)
@ -407,8 +405,8 @@ def test_rf_regression():


 def test_parameter_tuning():
-    from sklearn.model_selection import GridSearchCV
    from sklearn.datasets import fetch_california_housing
+    from sklearn.model_selection import GridSearchCV

    X, y = fetch_california_housing(return_X_y=True)
    xgb_model = xgb.XGBRegressor(learning_rate=0.1)
@ -421,8 +419,8 @@ def test_parameter_tuning():


 def test_regression_with_custom_objective():
-    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import fetch_california_housing
+    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold

    def objective_ls(y_true, y_pred):
@ -539,8 +537,8 @@ def test_sklearn_plotting():
    import matplotlib
    matplotlib.use('Agg')

-    from matplotlib.axes import Axes
    from graphviz import Source
+    from matplotlib.axes import Axes

    ax = xgb.plot_importance(classifier)
    assert isinstance(ax, Axes)
@ -666,8 +664,8 @@ def test_kwargs_error():


 def test_kwargs_grid_search():
-    from sklearn.model_selection import GridSearchCV
    from sklearn import datasets
+    from sklearn.model_selection import GridSearchCV

    params = {'tree_method': 'hist'}
    clf = xgb.XGBClassifier(n_estimators=1, learning_rate=1.0, **params)
@ -841,9 +839,7 @@ def test_save_load_model():


 def test_RFECV():
-    from sklearn.datasets import load_diabetes
-    from sklearn.datasets import load_breast_cancer
-    from sklearn.datasets import load_iris
+    from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
    from sklearn.feature_selection import RFECV

    # Regression
@ -1046,8 +1042,9 @@ def run_feature_weights(X, y, fw, tree_method, model=xgb.XGBRegressor):
        with open(model_path) as fd:
            model = json.load(fd)

-        parser_path = os.path.join(tm.PROJECT_ROOT, 'demo', 'json-model',
-                                   'json_parser.py')
+        parser_path = os.path.join(
+            tm.demo_dir(__file__), "json-model", "json_parser.py"
+        )
        spec = importlib.util.spec_from_file_location("JsonParser",
                                                      parser_path)
        foo = importlib.util.module_from_spec(spec)
@ -1162,8 +1159,8 @@ def run_boost_from_prediction_multi_clasas(

@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
 def test_boost_from_prediction(tree_method):
-    from sklearn.datasets import load_breast_cancer, load_iris, make_regression
    import pandas as pd
+    from sklearn.datasets import load_breast_cancer, load_iris, make_regression

    X, y = load_breast_cancer(return_X_y=True)

--- a/tests/python/with_omp_limit.py
+++ b/tests/python/with_omp_limit.py
@ -1,7 +1,9 @@
-import xgboost as xgb
+import sys
+
 from sklearn.datasets import make_classification
 from sklearn.metrics import roc_auc_score
-import sys
+
+import xgboost as xgb


 def run_omp(output_path: str):