Move Python testing utilities into xgboost module. (#8379)

- Add typehints. - Fixes for pylint. Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
2022-10-26 16:56:11 +08:00
parent 7e53189e7c
commit cf70864fa3
66 changed files with 652 additions and 595 deletions
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -65,7 +65,7 @@ def _check_rf_callback(
        )
-_SklObjective = Optional[
+SklObjective = Optional[
    Union[str, Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]]
 ]
@@ -144,7 +144,7 @@ __model_doc = f"""
        Boosting learning rate (xgb's "eta")
    verbosity : Optional[int]
        The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
-    objective : {_SklObjective}
+    objective : {SklObjective}
        Specify the learning task and the corresponding learning objective or
        a custom objective function to be used (see note below).
    booster: Optional[str]
@@ -546,7 +546,7 @@ class XGBModel(XGBModelBase):
        learning_rate: Optional[float] = None,
        n_estimators: int = 100,
        verbosity: Optional[int] = None,
-        objective: _SklObjective = None,
+        objective: SklObjective = None,
        booster: Optional[str] = None,
        tree_method: Optional[str] = None,
        n_jobs: Optional[int] = None,
@@ -1409,7 +1409,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
    def __init__(
        self,
        *,
-        objective: _SklObjective = "binary:logistic",
+        objective: SklObjective = "binary:logistic",
        use_label_encoder: Optional[bool] = None,
        **kwargs: Any,
    ) -> None:
@@ -1712,7 +1712,7 @@ class XGBRegressor(XGBModel, XGBRegressorBase):
    # pylint: disable=missing-docstring
    @_deprecate_positional_args
    def __init__(
-        self, *, objective: _SklObjective = "reg:squarederror", **kwargs: Any
+        self, *, objective: SklObjective = "reg:squarederror", **kwargs: Any
    ) -> None:
        super().__init__(objective=objective, **kwargs)
--- a/python-package/xgboost/testing.py
+++ b/python-package/xgboost/testing.py
@@ -1,64 +0,0 @@
 """Utilities for defining Python tests."""
 import socket
 from platform import system
 from typing import Any, TypedDict
 PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
 def has_ipv6() -> bool:
    """Check whether IPv6 is enabled on this host."""
    # connection error in macos, still need some fixes.
    if system() not in ("Linux", "Windows"):
        return False
    if socket.has_ipv6:
        try:
            with socket.socket(
                socket.AF_INET6, socket.SOCK_STREAM
            ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
                server.bind(("::1", 0))
                port = server.getsockname()[1]
                server.listen()
                client.connect(("::1", port))
                conn, _ = server.accept()
                client.sendall("abc".encode())
                msg = conn.recv(3).decode()
                # if the code can be executed to this point, the message should be
                # correct.
                assert msg == "abc"
            return True
        except OSError:
            pass
    return False
 def skip_ipv6() -> PytestSkip:
    """PyTest skip mark for IPv6."""
    return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
 def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
    """Make a pytest mark for the `pytest-timeout` package.
    Parameters
    ----------
    sec :
        Timeout seconds.
    enable :
        Control whether timeout should be applied, used for debugging.
    Returns
    -------
    pytest.mark.timeout
    """
    import pytest  # pylint: disable=import-error
    # This is disabled for now due to regression caused by conflicts between federated
    # learning build and the CI container environment.
    if enable:
        return pytest.mark.timeout(sec, *args, **kwargs)
    return pytest.mark.timeout(None, *args, **kwargs)
--- a/python-package/xgboost/testing/init.py
+++ b/python-package/xgboost/testing/init.py
@@ -1,192 +1,190 @@
-from concurrent.futures import ThreadPoolExecutor
+"""Utilities for defining Python tests. The module is private and subject to frequent
-import os
+change without notice.
 """
 # pylint: disable=invalid-name,missing-function-docstring,import-error
 import gc
 import importlib.util
 import multiprocessing
-from typing import Tuple, Union, List, Sequence, Callable
+import os
 import platform
 import socket
 import sys
 import urllib
 import zipfile
-import sys
+from concurrent.futures import ThreadPoolExecutor
 from typing import Optional, Dict, Any
 from contextlib import contextmanager
 from io import StringIO
-from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
+from platform import system
-import pytest
+from typing import (
-import gc
+    Any,
-import xgboost as xgb
+    Callable,
-from xgboost.core import ArrayLike
+    Dict,
-import numpy as np
+    Generator,
-from scipy import sparse
+    List,
-import platform
+    Optional,
    Sequence,
    Set,
    Tuple,
    TypedDict,
    Union,
 )
-hypothesis = pytest.importorskip('hypothesis')
+import numpy as np
-sklearn = pytest.importorskip('sklearn')
+import pytest
 from scipy import sparse
 from xgboost.core import ArrayLike
 from xgboost.sklearn import SklObjective
 import xgboost as xgb
 hypothesis = pytest.importorskip("hypothesis")
 # pylint:disable=wrong-import-position,wrong-import-order
 from hypothesis import strategies
 from hypothesis.extra.numpy import arrays
 from joblib import Memory
 from sklearn import datasets
-try:
+joblib = pytest.importorskip("joblib")
-    import cupy as cp
+datasets = pytest.importorskip("sklearn.datasets")
 except ImportError:
    cp = None
-memory = Memory('./cachedir', verbose=0)
+Memory = joblib.Memory
 memory = Memory("./cachedir", verbose=0)
 PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
-def no_ubjson():
+def has_ipv6() -> bool:
-    reason = "ubjson is not intsalled."
+    """Check whether IPv6 is enabled on this host."""
-    try:
+    # connection error in macos, still need some fixes.
-        import ubjson           # noqa
+    if system() not in ("Linux", "Windows"):
-        return {"condition": False, "reason": reason}
+        return False
-    except ImportError:
+
-        return {"condition": True, "reason": reason}
+    if socket.has_ipv6:
        try:
            with socket.socket(
                socket.AF_INET6, socket.SOCK_STREAM
            ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
                server.bind(("::1", 0))
                port = server.getsockname()[1]
                server.listen()
                client.connect(("::1", port))
                conn, _ = server.accept()
                client.sendall("abc".encode())
                msg = conn.recv(3).decode()
                # if the code can be executed to this point, the message should be
                # correct.
                assert msg == "abc"
            return True
        except OSError:
            pass
    return False
-def no_sklearn():
+def no_mod(name: str) -> PytestSkip:
-    return {'condition': not SKLEARN_INSTALLED,
+    spec = importlib.util.find_spec(name)
-            'reason': 'Scikit-Learn is not installed'}
+    return {"condition": spec is None, "reason": f"{name} is not installed."}
-def no_dask():
+def no_ipv6() -> PytestSkip:
-    try:
+    """PyTest skip mark for IPv6."""
-        import pkg_resources
+    return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
        pkg_resources.get_distribution("dask")
        DASK_INSTALLED = True
    except pkg_resources.DistributionNotFound:
        DASK_INSTALLED = False
    return {"condition": not DASK_INSTALLED, "reason": "Dask is not installed"}
-def no_spark():
+def no_ubjson() -> PytestSkip:
-    try:
+    return no_mod("ubjson")
        import pyspark          # noqa
        SPARK_INSTALLED = True
    except ImportError:
        SPARK_INSTALLED = False
    return {"condition": not SPARK_INSTALLED, "reason": "Spark is not installed"}
-def no_pandas():
+def no_sklearn() -> PytestSkip:
-    return {'condition': not PANDAS_INSTALLED,
+    return no_mod("sklearn")
            'reason': 'Pandas is not installed.'}
-def no_arrow():
+def no_dask() -> PytestSkip:
-    reason = "pyarrow is not installed"
+    return no_mod("dask")
    try:
        import pyarrow  # noqa
        return {"condition": False, "reason": reason}
    except ImportError:
        return {"condition": True, "reason": reason}
-def no_modin():
+def no_spark() -> PytestSkip:
-    reason = 'Modin is not installed.'
+    return no_mod("pyspark")
    try:
        import modin.pandas as _  # noqa
        return {'condition': False, 'reason': reason}
    except ImportError:
        return {'condition': True, 'reason': reason}
-def no_dt():
+def no_pandas() -> PytestSkip:
-    import importlib.util
+    return no_mod("pandas")
    spec = importlib.util.find_spec('datatable')
    return {'condition': spec is None,
            'reason': 'Datatable is not installed.'}
-def no_matplotlib():
+def no_arrow() -> PytestSkip:
-    reason = 'Matplotlib is not installed.'
+    return no_mod("pyarrow")
 def no_modin() -> PytestSkip:
    return no_mod("modin")
 def no_dt() -> PytestSkip:
    return no_mod("datatable")
 def no_matplotlib() -> PytestSkip:
    reason = "Matplotlib is not installed."
    try:
        import matplotlib.pyplot as _  # noqa
-        return {'condition': False,
+
-                'reason': reason}
+        return {"condition": False, "reason": reason}
    except ImportError:
-        return {'condition': True,
+        return {"condition": True, "reason": reason}
                'reason': reason}
-def no_dask_cuda():
+def no_dask_cuda() -> PytestSkip:
-    reason = 'dask_cuda is not installed.'
+    return no_mod("dask_cuda")
    try:
        import dask_cuda as _  # noqa
        return {'condition': False, 'reason': reason}
    except ImportError:
        return {'condition': True, 'reason': reason}
-def no_cudf():
+def no_cudf() -> PytestSkip:
-    try:
+    return no_mod("cudf")
        import cudf  # noqa
        CUDF_INSTALLED = True
    except ImportError:
        CUDF_INSTALLED = False
    return {'condition': not CUDF_INSTALLED,
            'reason': 'CUDF is not installed'}
-def no_cupy():
+def no_cupy() -> PytestSkip:
-    reason = 'cupy is not installed.'
+    return no_mod("cupy")
    try:
        import cupy as _  # noqa
        return {'condition': False, 'reason': reason}
    except ImportError:
        return {'condition': True, 'reason': reason}
-def no_dask_cudf():
+def no_dask_cudf() -> PytestSkip:
-    reason = 'dask_cudf is not installed.'
+    return no_mod("dask_cudf")
    try:
        import dask_cudf as _  # noqa
        return {'condition': False, 'reason': reason}
    except ImportError:
        return {'condition': True, 'reason': reason}
-def no_json_schema():
+def no_json_schema() -> PytestSkip:
-    reason = 'jsonschema is not installed'
+    return no_mod("jsonschema")
    try:
        import jsonschema  # noqa
        return {'condition': False, 'reason': reason}
    except ImportError:
        return {'condition': True, 'reason': reason}
-def no_graphviz():
+def no_graphviz() -> PytestSkip:
-    reason = 'graphviz is not installed'
+    return no_mod("graphviz")
    try:
        import graphviz  # noqa
        return {'condition': False, 'reason': reason}
    except ImportError:
        return {'condition': True, 'reason': reason}
-def no_multiple(*args):
+def no_multiple(*args: Any) -> PytestSkip:
    condition = False
-    reason = ''
+    reason = ""
    for arg in args:
-        condition = (condition or arg['condition'])
+        condition = condition or arg["condition"]
-        if arg['condition']:
+        if arg["condition"]:
-            reason = arg['reason']
+            reason = arg["reason"]
            break
-    return {'condition': condition, 'reason': reason}
+    return {"condition": condition, "reason": reason}
-def skip_s390x():
+def skip_s390x() -> PytestSkip:
    condition = platform.machine() == "s390x"
    reason = "Known to fail on s390x"
    return {"condition": condition, "reason": reason}
 class IteratorForTest(xgb.core.DataIter):
    """Iterator for testing streaming DMatrix. (external memory, quantile)"""
    def __init__(
        self,
        X: Sequence,
        y: Sequence,
        w: Optional[Sequence],
-        cache: Optional[str] = "./"
+        cache: Optional[str] = "./",
    ) -> None:
        assert len(X) == len(y)
        self.X = X
@@ -242,7 +240,7 @@ def make_batches(
        rng = cupy.random.RandomState(1994)
    else:
        rng = np.random.RandomState(1994)
-    for i in range(n_batches):
+    for _ in range(n_batches):
        _X = rng.randn(n_samples_per_batch, n_features)
        _y = rng.randn(n_samples_per_batch)
        _w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
@@ -259,7 +257,7 @@ def make_batches_sparse(
    y = []
    w = []
    rng = np.random.RandomState(1994)
-    for i in range(n_batches):
+    for _ in range(n_batches):
        _X = sparse.random(
            n_samples_per_batch,
            n_features,
@@ -276,8 +274,9 @@ def make_batches_sparse(
    return X, y, w
 # Contains a dataset in numpy format as well as the relevant objective and metric
 class TestDataset:
    """Contains a dataset in numpy format as well as the relevant objective and metric."""
    def __init__(
        self, name: str, get_dataset: Callable, objective: str, metric: str
    ) -> None:
@@ -289,18 +288,24 @@ class TestDataset:
        self.margin: Optional[np.ndarray] = None
    def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]:
-        params_in['objective'] = self.objective
+        params_in["objective"] = self.objective
-        params_in['eval_metric'] = self.metric
+        params_in["eval_metric"] = self.metric
        if self.objective == "multi:softmax":
            params_in["num_class"] = int(np.max(self.y) + 1)
        return params_in
    def get_dmat(self) -> xgb.DMatrix:
        return xgb.DMatrix(
-            self.X, self.y, self.w, base_margin=self.margin, enable_categorical=True
+            self.X,
            self.y,
            weight=self.w,
            base_margin=self.margin,
            enable_categorical=True,
        )
    def get_device_dmat(self) -> xgb.DeviceQuantileDMatrix:
        import cupy as cp
        w = None if self.w is None else cp.array(self.w)
        X = cp.array(self.X, dtype=np.float32)
        y = cp.array(self.y, dtype=np.float32)
@@ -318,9 +323,9 @@ class TestDataset:
            beg = i * per_batch
            end = min((i + 1) * per_batch, n_samples)
            assert end != beg
-            X = self.X[beg: end, ...]
+            X = self.X[beg:end, ...]
-            y = self.y[beg: end]
+            y = self.y[beg:end]
-            w = self.w[beg: end] if self.w is not None else None
+            w = self.w[beg:end] if self.w is not None else None
            predictor.append(X)
            response.append(y)
            if w is not None:
@@ -334,25 +339,24 @@ class TestDataset:
@memory.cache
-def get_california_housing():
+def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
    data = datasets.fetch_california_housing()
    return data.data, data.target
@memory.cache
-def get_digits():
+def get_digits() -> Tuple[np.ndarray, np.ndarray]:
    data = datasets.load_digits()
    return data.data, data.target
@memory.cache
-def get_cancer():
+def get_cancer() -> Tuple[np.ndarray, np.ndarray]:
-    data = datasets.load_breast_cancer()
+    return datasets.load_breast_cancer(return_X_y=True)
    return data.data, data.target
@memory.cache
-def get_sparse():
+def get_sparse() -> Tuple[np.ndarray, np.ndarray]:
    rng = np.random.RandomState(199)
    n = 2000
    sparsity = 0.75
@@ -366,7 +370,7 @@ def get_sparse():
@memory.cache
-def get_ames_housing():
+def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]:
    """
    Number of samples: 1460
    Number of features: 20
@@ -374,22 +378,23 @@ def get_ames_housing():
    Number of numerical features: 10
    """
    from sklearn.datasets import fetch_openml
    X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True)
-    categorical_columns_subset: list[str] = [
+    categorical_columns_subset: List[str] = [
-        "BldgType",             # 5 cats, no nan
+        "BldgType",  # 5 cats, no nan
-        "GarageFinish",         # 3 cats, nan
+        "GarageFinish",  # 3 cats, nan
-        "LotConfig",            # 5 cats, no nan
+        "LotConfig",  # 5 cats, no nan
-        "Functional",           # 7 cats, no nan
+        "Functional",  # 7 cats, no nan
-        "MasVnrType",           # 4 cats, nan
+        "MasVnrType",  # 4 cats, nan
-        "HouseStyle",           # 8 cats, no nan
+        "HouseStyle",  # 8 cats, no nan
-        "FireplaceQu",          # 5 cats, nan
+        "FireplaceQu",  # 5 cats, nan
-        "ExterCond",            # 5 cats, no nan
+        "ExterCond",  # 5 cats, no nan
-        "ExterQual",            # 4 cats, no nan
+        "ExterQual",  # 4 cats, no nan
-        "PoolQC",               # 3 cats, nan
+        "PoolQC",  # 3 cats, nan
    ]
-    numerical_columns_subset: list[str] = [
+    numerical_columns_subset: List[str] = [
        "3SsnPorch",
        "Fireplaces",
        "BsmtHalfBath",
@@ -408,32 +413,70 @@ def get_ames_housing():
@memory.cache
-def get_mq2008(dpath):
+def get_mq2008(
    dpath: str,
 ) -> Tuple[
    sparse.csr_matrix,
    np.ndarray,
    np.ndarray,
    sparse.csr_matrix,
    np.ndarray,
    np.ndarray,
    sparse.csr_matrix,
    np.ndarray,
    np.ndarray,
 ]:
    from sklearn.datasets import load_svmlight_files
-    src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
+    src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
-    target = dpath + '/MQ2008.zip'
+    target = dpath + "/MQ2008.zip"
    if not os.path.exists(target):
        urllib.request.urlretrieve(url=src, filename=target)
-    with zipfile.ZipFile(target, 'r') as f:
+    with zipfile.ZipFile(target, "r") as f:
        f.extractall(path=dpath)
-    (x_train, y_train, qid_train, x_test, y_test, qid_test,
+    (
-     x_valid, y_valid, qid_valid) = load_svmlight_files(
+        x_train,
-         (dpath + "MQ2008/Fold1/train.txt",
+        y_train,
-          dpath + "MQ2008/Fold1/test.txt",
+        qid_train,
-          dpath + "MQ2008/Fold1/vali.txt"),
+        x_test,
-         query_id=True, zero_based=False)
+        y_test,
        qid_test,
        x_valid,
        y_valid,
        qid_valid,
    ) = load_svmlight_files(
        (
            dpath + "MQ2008/Fold1/train.txt",
            dpath + "MQ2008/Fold1/test.txt",
            dpath + "MQ2008/Fold1/vali.txt",
        ),
        query_id=True,
        zero_based=False,
    )
-    return (x_train, y_train, qid_train, x_test, y_test, qid_test,
+    return (
-            x_valid, y_valid, qid_valid)
+        x_train,
        y_train,
        qid_train,
        x_test,
        y_test,
        qid_test,
        x_valid,
        y_valid,
        qid_valid,
    )
@memory.cache
 def make_categorical(
-    n_samples: int, n_features: int, n_categories: int, onehot: bool, sparsity=0.0,
+    n_samples: int,
-):
+    n_features: int,
    n_categories: int,
    onehot: bool,
    sparsity: float = 0.0,
 ) -> Tuple[ArrayLike, np.ndarray]:
    import pandas as pd
    rng = np.random.RandomState(1994)
@@ -457,7 +500,9 @@ def make_categorical(
    if sparsity > 0.0:
        for i in range(n_features):
-            index = rng.randint(low=0, high=n_samples-1, size=int(n_samples * sparsity))
+            index = rng.randint(
                low=0, high=n_samples - 1, size=int(n_samples * sparsity)
            )
            df.iloc[index, i] = np.NaN
            assert n_categories == np.unique(df.dtypes[i].categories).size
@@ -466,9 +511,9 @@ def make_categorical(
    return df, label
-def _cat_sampled_from():
+def _cat_sampled_from() -> strategies.SearchStrategy:
    @strategies.composite
-    def _make_cat(draw):
+    def _make_cat(draw: Callable) -> Tuple[int, int, int, float]:
        n_samples = draw(strategies.integers(2, 512))
        n_features = draw(strategies.integers(1, 4))
        n_cats = draw(strategies.integers(1, 128))
@@ -483,7 +528,7 @@ def _cat_sampled_from():
        )
        return n_samples, n_features, n_cats, sparsity
-    def _build(args):
+    def _build(args: Tuple[int, int, int, float]) -> TestDataset:
        n_samples = args[0]
        n_features = args[1]
        n_cats = args[2]
@@ -495,12 +540,13 @@ def _cat_sampled_from():
            "rmse",
        )
-    return _make_cat().map(_build)
+    return _make_cat().map(_build)  # pylint: disable=no-member
-categorical_dataset_strategy = _cat_sampled_from()
+categorical_dataset_strategy: strategies.SearchStrategy = _cat_sampled_from()
 # pylint: disable=too-many-locals
@memory.cache
 def make_sparse_regression(
    n_samples: int, n_features: int, sparsity: float, as_dense: bool
@@ -530,8 +576,7 @@ def make_sparse_regression(
    # Use multi-thread to speed up the generation, convenient if you use this function
    # for benchmarking.
-    n_threads = multiprocessing.cpu_count()
+    n_threads = min(multiprocessing.cpu_count(), n_features)
    n_threads = min(n_threads, n_features)
    def random_csc(t_id: int) -> sparse.csc_matrix:
        rng = np.random.default_rng(1994 * t_id)
@@ -653,7 +698,7 @@ _unweighted_datasets_strategy = strategies.sampled_from(
@strategies.composite
-def _dataset_weight_margin(draw):
+def _dataset_weight_margin(draw: Callable) -> TestDataset:
    data: TestDataset = draw(_unweighted_datasets_strategy)
    if draw(strategies.booleans()):
        data.w = draw(
@@ -673,6 +718,7 @@ def _dataset_weight_margin(draw):
                elements=strategies.floats(0.5, 1.0),
            )
        )
        assert data.margin is not None
        if num_class != 1:
            data.margin = data.margin.reshape(data.y.shape[0], num_class)
@@ -684,24 +730,24 @@ def _dataset_weight_margin(draw):
 dataset_strategy = _dataset_weight_margin()
-def non_increasing(L, tolerance=1e-4):
+def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:
    return all((y - x) < tolerance for x, y in zip(L, L[1:]))
-def eval_error_metric(predt, dtrain: xgb.DMatrix):
+def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
    """Evaluation metric for xgb.train"""
    label = dtrain.get_label()
    r = np.zeros(predt.shape)
    gt = predt > 0.5
    if predt.size == 0:
-        return "CustomErr", 0
+        return "CustomErr", np.float64(0.0)
    r[gt] = 1 - label[gt]
    le = predt <= 0.5
    r[le] = label[le]
-    return 'CustomErr', np.sum(r)
+    return "CustomErr", np.sum(r)
-def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> float:
+def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> np.float64:
    """Evaluation metric that looks like metrics provided by sklearn."""
    r = np.zeros(y_score.shape)
    gt = y_score > 0.5
@@ -717,13 +763,15 @@ def root_mean_square(y_true: np.ndarray, y_score: np.ndarray) -> float:
    return rmse
-def softmax(x):
+def softmax(x: np.ndarray) -> np.ndarray:
    e = np.exp(x)
    return e / np.sum(e)
-def softprob_obj(classes):
+def softprob_obj(classes: int) -> SklObjective:
-    def objective(labels, predt):
+    def objective(
        labels: np.ndarray, predt: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray]:
        rows = labels.shape[0]
        grad = np.zeros((rows, classes), dtype=float)
        hess = np.zeros((rows, classes), dtype=float)
@@ -746,29 +794,33 @@ def softprob_obj(classes):
 class DirectoryExcursion:
-    def __init__(self, path: os.PathLike, cleanup=False):
+    """Change directory.  Change back and optionally cleaning up the directory when
-        '''Change directory.  Change back and optionally cleaning up the directory when exit.
+    exit.
-        '''
+    """
    def __init__(self, path: os.PathLike, cleanup: bool = False):
        self.path = path
        self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))
        self.cleanup = cleanup
-        self.files = {}
+        self.files: Set[str] = set()
-    def __enter__(self):
+    def __enter__(self) -> None:
        os.chdir(self.path)
        if self.cleanup:
            self.files = {
                os.path.join(root, f)
-                for root, subdir, files in os.walk(self.path) for f in files
+                for root, subdir, files in os.walk(os.path.expanduser(self.path))
                for f in files
            }
-    def __exit__(self, *args):
+    def __exit__(self, *args: Any) -> None:
        os.chdir(self.curdir)
        if self.cleanup:
            files = {
                os.path.join(root, f)
-                for root, subdir, files in os.walk(self.path) for f in files
+                for root, subdir, files in os.walk(os.path.expanduser(self.path))
                for f in files
            }
            diff = files.difference(self.files)
            for f in diff:
@@ -776,7 +828,7 @@ class DirectoryExcursion:
@contextmanager
-def captured_output():
+def captured_output() -> Generator[Tuple[StringIO, StringIO], None, None]:
    """Reassign stdout temporarily in order to test printed statements
    Taken from:
    https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
@@ -793,14 +845,46 @@ def captured_output():
        sys.stdout, sys.stderr = old_out, old_err
-try:
+def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
-    # Python 3.7+
+    """Make a pytest mark for the `pytest-timeout` package.
-    from contextlib import nullcontext as noop_context
+
-except ImportError:
+    Parameters
-    # Python 3.6
+    ----------
-    from contextlib import suppress as noop_context
+    sec :
        Timeout seconds.
    enable :
        Control whether timeout should be applied, used for debugging.
    Returns
    -------
    pytest.mark.timeout
    """
    if enable:
        return pytest.mark.timeout(sec, *args, **kwargs)
    return pytest.mark.timeout(None, *args, **kwargs)
-CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+def demo_dir(path: str) -> str:
-PROJECT_ROOT = os.path.normpath(
+    """Look for the demo directory based on the test file name."""
-    os.path.join(CURDIR, os.path.pardir, os.path.pardir))
+    path = normpath(os.path.dirname(path))
    while True:
        subdirs = [f.path for f in os.scandir(path) if f.is_dir()]
        subdirs = [os.path.basename(d) for d in subdirs]
        if "demo" in subdirs:
            return os.path.join(path, "demo")
        new_path = normpath(os.path.join(path, os.path.pardir))
        assert new_path != path
        path = new_path
 def normpath(path: str) -> str:
    return os.path.normpath(os.path.abspath(path))
 def data_dir(path: str) -> str:
    return os.path.join(demo_dir(path), "data")
 def project_root(path: str) -> str:
    return normpath(os.path.join(demo_dir(path), os.path.pardir))
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -121,12 +121,14 @@ if __name__ == "__main__":
                "python-package/xgboost/sklearn.py",
                "python-package/xgboost/spark",
                "python-package/xgboost/federated.py",
-                "python-package/xgboost/testing.py",
+                "python-package/xgboost/testing",
                # tests
                "tests/python/test_config.py",
                "tests/python/test_data_iterator.py",
                "tests/python/test_spark/",
                "tests/python/test_quantile_dmatrix.py",
                "tests/python-gpu/test_gpu_spark/",
                "tests/python-gpu/test_gpu_data_iterator.py",
                "tests/ci_build/lint_python.py",
                # demo
                "demo/guide-python/cat_in_the_dat.py",
--- a/tests/python-gpu/conftest.py
+++ b/tests/python-gpu/conftest.py
@@ -1,9 +1,7 @@
 import sys
 import pytest
 import logging
-sys.path.append("tests/python")
+from xgboost import testing as tm  # noqa
-import testing as tm                          # noqa
+
 def has_rmm():
    try:
@@ -34,8 +32,8 @@ def local_cuda_client(request, pytestconfig):
        kwargs['rmm_pool_size'] = '2GB'
    if tm.no_dask_cuda()['condition']:
        raise ImportError('The local_cuda_cluster fixture requires dask_cuda package')
    from dask_cuda import LocalCUDACluster
    from dask.distributed import Client
    from dask_cuda import LocalCUDACluster
    yield Client(LocalCUDACluster(**kwargs))
 def pytest_addoption(parser):
--- a/tests/python-gpu/load_pickle.py
+++ b/tests/python-gpu/load_pickle.py
@@ -1,16 +1,14 @@
 '''Loading a pickled model generated by test_pickling.py, only used by
 `test_gpu_with_dask.py`'''
 import os
 import numpy as np
 import xgboost as xgb
 import json
 import os
 import numpy as np
 import pytest
-import sys
+from test_gpu_pickling import build_dataset, load_pickle, model_path
-from test_gpu_pickling import build_dataset, model_path, load_pickle
+import xgboost as xgb
-
+from xgboost import testing as tm
 sys.path.append("tests/python")
 import testing as tm
 class TestLoadPickle:
--- a/tests/python-gpu/test_device_quantile_dmatrix.py
+++ b/tests/python-gpu/test_device_quantile_dmatrix.py
@@ -5,10 +5,10 @@ import pytest
 from hypothesis import given, settings, strategies
 import xgboost as xgb
 from xgboost import testing as tm
 sys.path.append("tests/python")
 import test_quantile_dmatrix as tqd
 import testing as tm
 class TestDeviceQuantileDMatrix:
--- a/tests/python-gpu/test_from_cudf.py
+++ b/tests/python-gpu/test_from_cudf.py
@@ -2,11 +2,12 @@ import json
 import sys
 import numpy as np
 import xgboost as xgb
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 sys.path.append("tests/python")
 import testing as tm
 from test_dmatrix import set_base_margin_info
@@ -85,8 +86,8 @@ def _test_from_cudf(DMatrixT):
 def _test_cudf_training(DMatrixT):
    from cudf import DataFrame as df
    import pandas as pd
    from cudf import DataFrame as df
    np.random.seed(1)
    X = pd.DataFrame(np.random.randn(50, 10))
    y = pd.DataFrame(np.random.randn(50))
@@ -109,8 +110,8 @@ def _test_cudf_training(DMatrixT):
 def _test_cudf_metainfo(DMatrixT):
    from cudf import DataFrame as df
    import pandas as pd
    from cudf import DataFrame as df
    n = 100
    X = np.random.random((n, 2))
    dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))
@@ -247,9 +248,9 @@ Arrow specification.'''
@pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.skipif(**tm.no_pandas())
 def test_cudf_training_with_sklearn():
    import pandas as pd
    from cudf import DataFrame as df
    from cudf import Series as ss
    import pandas as pd
    np.random.seed(1)
    X = pd.DataFrame(np.random.randn(50, 10))
    y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@@ -1,12 +1,15 @@
 import numpy as np
 import xgboost as xgb
 import sys
 import numpy as np
 import pytest
 import xgboost as xgb
 sys.path.append("tests/python")
 import testing as tm
 from test_dmatrix import set_base_margin_info
 from xgboost import testing as tm
 def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
    '''Test constructing DMatrix from cupy'''
--- a/tests/python-gpu/test_gpu_basic_models.py
+++ b/tests/python-gpu/test_gpu_basic_models.py
@@ -1,13 +1,18 @@
 import sys
 import os
 import sys
 import numpy as np
 import xgboost as xgb
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 sys.path.append("tests/python")
 import test_basic_models as test_bm
 # Don't import the test class, otherwise they will run twice.
 import test_callback as test_cb  # noqa
-import test_basic_models as test_bm
+
 import testing as tm
 rng = np.random.RandomState(1994)
--- a/tests/python-gpu/test_gpu_data_iterator.py
+++ b/tests/python-gpu/test_gpu_data_iterator.py
@@ -1,13 +1,12 @@
 import numpy as np
 import xgboost as xgb
 from hypothesis import given, strategies, settings
 import pytest
 import sys
 import pytest
 from hypothesis import given, settings, strategies
 from xgboost.testing import no_cupy
 sys.path.append("tests/python")
 from test_data_iterator import test_single_batch as cpu_single_batch
 from test_data_iterator import run_data_iterator
-from testing import no_cupy
+from test_data_iterator import test_single_batch as cpu_single_batch
 def test_gpu_single_batch() -> None:
@@ -24,7 +23,11 @@ def test_gpu_single_batch() -> None:
 )
@settings(deadline=None, max_examples=10, print_blob=True)
 def test_gpu_data_iterator(
-    n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool
+    n_samples_per_batch: int,
    n_features: int,
    n_batches: int,
    subsample: bool,
    use_cupy: bool,
 ) -> None:
    run_data_iterator(
        n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy
--- a/tests/python-gpu/test_gpu_demos.py
+++ b/tests/python-gpu/test_gpu_demos.py
@@ -1,10 +1,13 @@
 import os
 import subprocess
 import sys
 import pytest
 from xgboost import testing as tm
 sys.path.append("tests/python")
-import testing as tm
+import test_demos as td  # noqa
 import test_demos as td         # noqa
@pytest.mark.skipif(**tm.no_cupy())
@@ -31,6 +34,6 @@ def test_categorical_demo():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
 def test_dask_training():
-    script = os.path.join(tm.PROJECT_ROOT, 'demo', 'dask', 'gpu_training.py')
+    script = os.path.join(tm.demo_dir(__file__), 'dask', 'gpu_training.py')
    cmd = ['python', script]
    subprocess.check_call(cmd)
--- a/tests/python-gpu/test_gpu_eval_metrics.py
+++ b/tests/python-gpu/test_gpu_eval_metrics.py
@@ -1,7 +1,9 @@
 import sys
-import xgboost
+
 import pytest
 import xgboost
 sys.path.append("tests/python")
 import test_eval_metrics as test_em  # noqa
--- a/tests/python-gpu/test_gpu_interaction_constraints.py
+++ b/tests/python-gpu/test_gpu_interaction_constraints.py
@@ -1,8 +1,11 @@
 import numpy as np
 import sys
 import numpy as np
 sys.path.append("tests/python")
 # Don't import the test class, otherwise they will run twice.
 import test_interaction_constraints as test_ic  # noqa
 rng = np.random.RandomState(1994)
--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@@ -1,15 +1,10 @@
 import sys
 import pytest
 from hypothesis import assume, given, note, settings, strategies
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
-sys.path.append("tests/python")
+pytestmark = tm.timeout(10)
 import testing as tm
 pytestmark = testing.timeout(10)
 parameter_strategy = strategies.fixed_dictionaries({
    'booster': strategies.just('gblinear'),
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@@ -3,20 +3,17 @@ import json
 import os
 import pickle
 import subprocess
 import sys
 import numpy as np
 import pytest
 import xgboost as xgb
-from xgboost import XGBClassifier, testing
+from xgboost import XGBClassifier
-
+from xgboost import testing as tm
 sys.path.append("tests/python")
 import testing as tm
 model_path = './model.pkl'
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
 def build_dataset():
--- a/tests/python-gpu/test_gpu_plotting.py
+++ b/tests/python-gpu/test_gpu_plotting.py
@@ -1,10 +1,11 @@
 import sys
 import pytest
-sys.path.append("tests/python")
+from xgboost import testing as tm
 import testing as tm
 import test_plotting as tp
 sys.path.append("tests/python")
 import test_plotting as tp
 pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -6,7 +6,7 @@ from hypothesis import assume, given, settings, strategies
 from xgboost.compat import PANDAS_INSTALLED
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 if PANDAS_INSTALLED:
    from hypothesis.extra.pandas import column, data_frames, range_indexes
@@ -16,7 +16,6 @@ else:
    column, data_frames, range_indexes = noop, noop, noop
 sys.path.append("tests/python")
 import testing as tm
 from test_predict import run_predict_leaf  # noqa
 from test_predict import run_threaded_predict  # noqa
@@ -33,7 +32,7 @@ predict_parameter_strategy = strategies.fixed_dictionaries({
    'num_parallel_tree': strategies.sampled_from([1, 4]),
 })
-pytestmark = testing.timeout(20)
+pytestmark = tm.timeout(20)
 class TestGPUPredict:
@@ -227,8 +226,8 @@ class TestGPUPredict:
    @pytest.mark.skipif(**tm.no_cupy())
    @pytest.mark.skipif(**tm.no_cudf())
    def test_inplace_predict_cudf(self):
        import cupy as cp
        import cudf
        import cupy as cp
        import pandas as pd
        rows = 1000
        cols = 10
@@ -379,8 +378,8 @@ class TestGPUPredict:
    @pytest.mark.skipif(**tm.no_cupy())
    @pytest.mark.parametrize("n_classes", [2, 3])
    def test_predict_dart(self, n_classes):
        from sklearn.datasets import make_classification
        import cupy as cp
        from sklearn.datasets import make_classification
        n_samples = 1000
        X_, y_ = make_classification(
            n_samples=n_samples, n_informative=5, n_classes=n_classes
--- a/tests/python-gpu/test_gpu_ranking.py
+++ b/tests/python-gpu/test_gpu_ranking.py
@@ -1,20 +1,15 @@
 import itertools
 import os
 import shutil
 import sys
 import urllib.request
 import zipfile
 import numpy as np
 import xgboost
-from xgboost import testing
+from xgboost import testing as tm
-sys.path.append("tests/python")
+pytestmark = tm.timeout(10)
 import testing as tm  # noqa
 pytestmark = testing.timeout(10)
 class TestRanking:
@@ -24,8 +19,9 @@ class TestRanking:
        Download and setup the test fixtures
        """
        from sklearn.datasets import load_svmlight_files
        # download the test data
-        cls.dpath = os.path.join(tm.PROJECT_ROOT, "demo/rank/")
+        cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
        src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
        target = os.path.join(cls.dpath, "MQ2008.zip")
--- a/tests/python-gpu/test_gpu_spark/test_data.py
+++ b/tests/python-gpu/test_gpu_spark/test_data.py
@@ -1,13 +1,8 @@
 import sys
 from typing import List
 import numpy as np
 import pandas as pd
 import pytest
-sys.path.append("tests/python")
+from xgboost import testing as tm
 import testing as tm
 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
@@ -15,6 +10,7 @@ if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
 sys.path.append("tests/python")
 from test_spark.test_data import run_dmatrix_ctor
--- a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
+++ b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
@@ -6,8 +6,7 @@ import sys
 import pytest
 import sklearn
-sys.path.append("tests/python")
+from xgboost import testing as tm
 import testing as tm
 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
--- a/tests/python-gpu/test_gpu_training_continuation.py
+++ b/tests/python-gpu/test_gpu_training_continuation.py
@@ -1,7 +1,9 @@
 import numpy as np
 import xgboost as xgb
 import json
 import numpy as np
 import xgboost as xgb
 rng = np.random.RandomState(1994)
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -6,13 +6,12 @@ import pytest
 from hypothesis import assume, given, note, settings, strategies
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 sys.path.append("tests/python")
 import test_updaters as test_up
 import testing as tm
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
 parameter_strategy = strategies.fixed_dictionaries({
    'max_depth': strategies.integers(0, 11),
--- a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
@@ -1,52 +1,54 @@
 """Copyright 2019-2022 XGBoost contributors"""
 import sys
 import os
 from typing import Type, TypeVar, Any, Dict, List, Union
 import pytest
 import numpy as np
 import asyncio
-import xgboost
+import os
 import subprocess
 import sys
 from collections import OrderedDict
 from inspect import signature
-from hypothesis import given, strategies, settings, note
+from typing import Any, Dict, Type, TypeVar, Union
 import numpy as np
 import pytest
 from hypothesis import given, note, settings, strategies
 from hypothesis._settings import duration
 from test_gpu_updaters import parameter_strategy
 import xgboost
 from xgboost import testing as tm
 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
 sys.path.append("tests/python")
 import testing as tm  # noqa
 if tm.no_dask_cuda()["condition"]:
    pytest.skip(tm.no_dask_cuda()["reason"], allow_module_level=True)
-from test_with_dask import run_empty_dmatrix_reg  # noqa
+from test_with_dask import _get_client_workers  # noqa
-from test_with_dask import run_empty_dmatrix_auc  # noqa
+from test_with_dask import generate_array  # noqa
 from test_with_dask import make_categorical  # noqa
 from test_with_dask import run_auc  # noqa
 from test_with_dask import run_boost_from_prediction  # noqa
 from test_with_dask import run_boost_from_prediction_multi_class  # noqa
 from test_with_dask import run_dask_classifier  # noqa
 from test_with_dask import run_empty_dmatrix_cls  # noqa
 from test_with_dask import _get_client_workers  # noqa
 from test_with_dask import generate_array  # noqa
 from test_with_dask import kCols as random_cols  # noqa
 from test_with_dask import suppress  # noqa
 from test_with_dask import run_tree_stats  # noqa
 from test_with_dask import run_categorical  # noqa
-from test_with_dask import make_categorical  # noqa
+from test_with_dask import run_dask_classifier  # noqa
-
+from test_with_dask import run_empty_dmatrix_auc  # noqa
 from test_with_dask import run_empty_dmatrix_cls  # noqa
 from test_with_dask import run_empty_dmatrix_reg  # noqa
 from test_with_dask import run_tree_stats  # noqa
 from test_with_dask import suppress  # noqa
 from test_with_dask import kCols as random_cols  # noqa
 try:
    import dask.dataframe as dd
    from xgboost import dask as dxgb
    import xgboost as xgb
    from dask.distributed import Client
    from dask import array as da
    from dask_cuda import LocalCUDACluster, utils
    import cudf
    import dask.dataframe as dd
    from dask import array as da
    from dask.distributed import Client
    from dask_cuda import LocalCUDACluster, utils
    import xgboost as xgb
    from xgboost import dask as dxgb
 except ImportError:
    pass
@@ -334,9 +336,9 @@ class TestDistributedGPU:
    @pytest.mark.skipif(**tm.no_dask_cudf())
    def test_empty_partition(self, local_cuda_client: Client) -> None:
        import dask_cudf
        import cudf
        import cupy
        import dask_cudf
        mult = 100
        df = cudf.DataFrame(
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -1,13 +1,15 @@
 import json
 import xgboost as xgb
 import pytest
 import tempfile
 import sys
 import numpy as np
 import os
 import sys
 import tempfile
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 sys.path.append("tests/python")
 import testing as tm               # noqa
 import test_with_sklearn as twskl  # noqa
 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@@ -38,9 +40,9 @@ def test_gpu_binary_classification():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
 def test_boost_from_prediction_gpu_hist():
    from sklearn.datasets import load_breast_cancer, load_digits
    import cupy as cp
    import cudf
    import cupy as cp
    from sklearn.datasets import load_breast_cancer, load_digits
    tree_method = "gpu_hist"
    X, y = load_breast_cancer(return_X_y=True)
@@ -68,12 +70,12 @@ def test_num_parallel_tree():
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_sklearn())
 def test_categorical():
    import pandas as pd
    import cudf
    import cupy as cp
    import pandas as pd
    from sklearn.datasets import load_svmlight_file
-    data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
+    data_dir = tm.data_dir(__file__)
    X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train"))
    clf = xgb.XGBClassifier(
        tree_method="gpu_hist",
@@ -123,9 +125,9 @@ def test_categorical():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
 def test_classififer():
    from sklearn.datasets import load_digits
    import cupy as cp
    import cudf
    import cupy as cp
    from sklearn.datasets import load_digits
    X, y = load_digits(return_X_y=True)
    y *= 10
--- a/tests/python-gpu/test_large_input.py
+++ b/tests/python-gpu/test_large_input.py
@@ -1,9 +1,9 @@
 import numpy as np
 import xgboost as xgb
 import cupy as cp
-import time
+import numpy as np
 import pytest
 import xgboost as xgb
 # Test for integer overflow or out of memory exceptions
 def test_large_input():
--- a/tests/python-gpu/test_monotonic_constraints.py
+++ b/tests/python-gpu/test_monotonic_constraints.py
@@ -1,11 +1,12 @@
 import sys
 import numpy as np
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 sys.path.append("tests/python")
 import testing as tm
 import test_monotone_constraints as tmc
 rng = np.random.RandomState(1994)
--- a/tests/python/generate_models.py
+++ b/tests/python/generate_models.py
@@ -1,7 +1,9 @@
 import xgboost
 import numpy as np
 import os
 import numpy as np
 import xgboost
 kRounds = 2
 kRows = 1000
 kCols = 4
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import os
 import xgboost as xgb
 import pytest
 import json
-from pathlib import Path
+import os
 import tempfile
-import testing as tm
+from pathlib import Path
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -1,13 +1,15 @@
 import numpy as np
 import xgboost as xgb
 import os
 import json
 import testing as tm
 import pytest
 import locale
 import os
 import tempfile
-dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
+import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 dpath = tm.data_dir(__file__)
 rng = np.random.RandomState(1994)
@@ -36,8 +38,8 @@ class TestModels:
        param = {'verbosity': 0, 'objective': 'binary:logistic',
                 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
                 'nthread': 1}
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 4
        bst = xgb.train(param, dtrain, num_round, watchlist)
@@ -49,8 +51,8 @@ class TestModels:
        assert err < 0.2
    def test_dart(self):
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        param = {'max_depth': 5, 'objective': 'binary:logistic',
                 'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1}
        # specify validations set to watch performance
@@ -116,7 +118,7 @@ class TestModels:
    def test_boost_from_prediction(self):
        # Re-construct dtrain here to avoid modification
-        margined = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        margined = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        bst = xgb.train({'tree_method': 'hist'}, margined, 1)
        predt_0 = bst.predict(margined, output_margin=True)
        margined.set_base_margin(predt_0)
@@ -124,13 +126,13 @@ class TestModels:
        predt_1 = bst.predict(margined)
        assert np.any(np.abs(predt_1 - predt_0) > 1e-6)
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        bst = xgb.train({'tree_method': 'hist'}, dtrain, 2)
        predt_2 = bst.predict(dtrain)
        assert np.all(np.abs(predt_2 - predt_1) < 1e-6)
    def test_boost_from_existing_model(self):
-        X = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        X = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4)
        assert booster.num_boosted_rounds() == 4
        booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4,
@@ -150,8 +152,8 @@ class TestModels:
            'objective': 'reg:logistic',
            "tree_method": tree_method
        }
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 10
@@ -197,8 +199,8 @@ class TestModels:
        self.run_custom_objective()
    def test_multi_eval_metric(self):
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1,
                 'objective': 'binary:logistic'}
@@ -220,7 +222,7 @@ class TestModels:
            param['scale_pos_weight'] = ratio
            return (dtrain, dtest, param)
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        xgb.cv(param, dtrain, num_round, nfold=5,
               metrics={'auc'}, seed=0, fpreproc=fpreproc)
@@ -228,7 +230,7 @@ class TestModels:
        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
                 'objective': 'binary:logistic'}
        num_round = 2
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        xgb.cv(param, dtrain, num_round, nfold=5,
               metrics={'error'}, seed=0, show_stdv=False)
@@ -346,7 +348,7 @@ class TestModels:
        os.remove(model_path)
        try:
-            dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+            dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
            xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
        except ValueError as e:
            e_str = str(e)
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -1,9 +1,12 @@
 from typing import Union
 import xgboost as xgb
 import pytest
 import os
 import testing as tm
 import tempfile
 from contextlib import nullcontext
 from typing import Union
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 # We use the dataset for tests.
 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@@ -271,13 +274,14 @@ class TestCallbacks:
        """Test learning rate scheduler, used by both CPU and GPU tests."""
        scheduler = xgb.callback.LearningRateScheduler
-        dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
+        dpath = tm.data_dir(__file__)
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 4
-        warning_check = tm.noop_context()
+        warning_check = nullcontext()
        # learning_rates as a list
        # init eta with 0 to check whether learning_rates work
--- a/tests/python/test_cli.py
+++ b/tests/python/test_cli.py
@@ -1,11 +1,13 @@
 import os
 import tempfile
 import platform
 import xgboost
 import subprocess
 import numpy
 import json
-import testing as tm
+import os
 import platform
 import subprocess
 import tempfile
 import numpy
 import xgboost
 from xgboost import testing as tm
 class TestCLI:
@@ -29,7 +31,7 @@ data = {data_path}
 eval[test] = {data_path}
 '''
-    PROJECT_ROOT = tm.PROJECT_ROOT
+    PROJECT_ROOT = tm.project_root(__file__)
    def get_exe(self):
        if platform.system() == 'Windows':
--- a/tests/python/test_data_iterator.py
+++ b/tests/python/test_data_iterator.py
@@ -1,14 +1,16 @@
 from typing import Dict, List
 import numpy as np
 import pytest
 from hypothesis import given, settings, strategies
 from scipy.sparse import csr_matrix
 from testing import IteratorForTest, make_batches, non_increasing
 from xgboost.data import SingleBatchInternalIter as SingleBatch
 from xgboost.testing import IteratorForTest, make_batches, non_increasing
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
 def test_single_batch(tree_method: str = "approx") -> None:
@@ -83,7 +85,7 @@ def run_data_iterator(
    if tree_method == "gpu_hist":
        parameters["sampling_method"] = "gradient_based"
-    results_from_it: xgb.callback.EvaluationMonitor.EvalsLog = {}
+    results_from_it: Dict[str, Dict[str, List[float]]] = {}
    from_it = xgb.train(
        parameters,
        Xy,
@@ -106,7 +108,7 @@ def run_data_iterator(
    assert Xy.num_row() == n_samples_per_batch * n_batches
    assert Xy.num_col() == n_features
-    results_from_arrays: xgb.callback.EvaluationMonitor.EvalsLog = {}
+    results_from_arrays: Dict[str, Dict[str, List[float]]] = {}
    from_arrays = xgb.train(
        parameters,
        Xy,
--- a/tests/python/test_demos.py
+++ b/tests/python/test_demos.py
@@ -3,14 +3,12 @@ import subprocess
 import sys
 import pytest
 import testing as tm
-from xgboost import testing
+from xgboost import testing as tm
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
-ROOT_DIR = tm.PROJECT_ROOT
+DEMO_DIR = tm.demo_dir(__file__)
 DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
 PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, 'guide-python')
 CLI_DEMO_DIR = os.path.join(DEMO_DIR, 'CLI')
@@ -156,7 +154,7 @@ def test_cli_regression_demo():
    cmd = ['python', script, 'machine.txt', '1']
    subprocess.check_call(cmd, cwd=reg_dir)
-    exe = os.path.join(tm.PROJECT_ROOT, 'xgboost')
+    exe = os.path.join(DEMO_DIR, os.path.pardir, 'xgboost')
    conf = os.path.join(reg_dir, 'machine.conf')
    subprocess.check_call([exe, conf], cwd=reg_dir)
--- a/tests/python/test_dmatrix.py
+++ b/tests/python/test_dmatrix.py
@@ -4,11 +4,11 @@ import tempfile
 import numpy as np
 import pytest
 import scipy.sparse
 import testing as tm
 from hypothesis import given, settings, strategies
 from scipy.sparse import csr_matrix, rand
 import xgboost as xgb
 from xgboost import testing as tm
 rng = np.random.RandomState(1)
--- a/tests/python/test_dt.py
+++ b/tests/python/test_dt.py
@@ -1,9 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
 import numpy as np
 import pytest
 import testing as tm
 import xgboost as xgb
 from xgboost import testing as tm
 try:
    import datatable as dt
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -1,8 +1,9 @@
 import xgboost as xgb
 import testing as tm
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 rng = np.random.RandomState(1994)
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@@ -1,8 +1,9 @@
 import xgboost as xgb
 import testing as tm
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 rng = np.random.RandomState(1337)
@@ -254,8 +255,8 @@ class TestEvalMetrics:
        self.run_roc_auc_multi("hist", n_samples, weighted)
    def run_pr_auc_binary(self, tree_method):
        from sklearn.metrics import precision_recall_curve, auc
        from sklearn.datasets import make_classification
        from sklearn.metrics import auc, precision_recall_curve
        X, y = make_classification(128, 4, n_classes=2, random_state=1994)
        clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
        clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
--- a/tests/python/test_interaction_constraints.py
+++ b/tests/python/test_interaction_constraints.py
@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import xgboost
 import testing as tm
 import pytest
 import xgboost
 from xgboost import testing as tm
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@@ -1,10 +1,9 @@
 import testing as tm
 from hypothesis import given, note, settings, strategies
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)
 parameter_strategy = strategies.fixed_dictionaries({
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@@ -1,12 +1,14 @@
 import xgboost
 import os
 import generate_models as gm
 import testing as tm
 import json
 import zipfile
 import pytest
 import copy
 import json
 import os
 import urllib.request
 import zipfile
 import generate_models as gm
 import pytest
 import xgboost
 from xgboost import testing as tm
 def run_model_param_check(config):
--- a/tests/python/test_monotone_constraints.py
+++ b/tests/python/test_monotone_constraints.py
@@ -1,8 +1,9 @@
 import numpy as np
 import xgboost as xgb
 import testing as tm
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 dpath = 'demo/data/'
--- a/tests/python/test_openmp.py
+++ b/tests/python/test_openmp.py
@@ -4,12 +4,11 @@ import tempfile
 import numpy as np
 import pytest
 import testing as tm
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)
 class TestOMP:
@@ -86,7 +85,7 @@ class TestOMP:
    def test_with_omp_thread_limit(self):
        args = [
            "python", os.path.join(
-                tm.PROJECT_ROOT, "tests", "python", "with_omp_limit.py"
+                os.path.dirname(tm.normpath(__file__)), "with_omp_limit.py"
            )
        ]
        results = []
--- a/tests/python/test_parse_tree.py
+++ b/tests/python/test_parse_tree.py
@@ -1,8 +1,8 @@
 import xgboost as xgb
 import numpy as np
 import pytest
 import testing as tm
 import xgboost as xgb
 from xgboost import testing as tm
 pytestmark = pytest.mark.skipif(**tm.no_pandas())
--- a/tests/python/test_pickling.py
+++ b/tests/python/test_pickling.py
@@ -1,9 +1,10 @@
 import pickle
 import numpy as np
 import xgboost as xgb
 import os
 import json
 import os
 import pickle
 import numpy as np
 import xgboost as xgb
 kRows = 100
 kCols = 10
--- a/tests/python/test_plotting.py
+++ b/tests/python/test_plotting.py
@@ -1,15 +1,16 @@
 import json
 import numpy as np
 import xgboost as xgb
 import testing as tm
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 try:
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib.axes import Axes
    from graphviz import Source
    from matplotlib.axes import Axes
 except ImportError:
    pass
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -1,12 +1,13 @@
 '''Tests for running inplace prediction.'''
 from concurrent.futures import ThreadPoolExecutor
 import numpy as np
 from scipy import sparse
 import pytest
 import pandas as pd
-import testing as tm
+import numpy as np
 import pandas as pd
 import pytest
 from scipy import sparse
 import xgboost as xgb
 from xgboost import testing as tm
 def run_threaded_predict(X, rows, predict_func):
--- a/tests/python/test_quantile_dmatrix.py
+++ b/tests/python/test_quantile_dmatrix.py
@@ -4,7 +4,7 @@ import numpy as np
 import pytest
 from hypothesis import given, settings, strategies
 from scipy import sparse
-from testing import (
+from xgboost.testing import (
    IteratorForTest,
    make_batches,
    make_batches_sparse,
--- a/tests/python/test_ranking.py
+++ b/tests/python/test_ranking.py
@@ -1,13 +1,15 @@
 import numpy as np
 from scipy.sparse import csr_matrix
 import testing as tm
 import xgboost
 import os
 import itertools
 import os
 import shutil
 import urllib.request
 import zipfile
 import numpy as np
 from scipy.sparse import csr_matrix
 import xgboost
 from xgboost import testing as tm
 def test_ranking_with_unweighted_data():
    Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])
--- a/tests/python/test_shap.py
+++ b/tests/python/test_shap.py
@@ -1,11 +1,12 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import xgboost as xgb
 import itertools
 import re
 import numpy as np
 import scipy
 import scipy.special
 import xgboost as xgb
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
--- a/tests/python/test_spark/test_data.py
+++ b/tests/python/test_spark/test_data.py
@@ -4,7 +4,8 @@ from typing import List
 import numpy as np
 import pandas as pd
 import pytest
-import testing as tm
+
 from xgboost import testing as tm
 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -6,10 +6,9 @@ import uuid
 import numpy as np
 import pytest
 import testing as tm
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
@@ -38,7 +37,7 @@ from .utils import SparkTestCase
 logging.getLogger("py4j").setLevel(logging.INFO)
-pytestmark = testing.timeout(60)
+pytestmark = tm.timeout(60)
 class XgboostLocalTest(SparkTestCase):
--- a/tests/python/test_spark/test_spark_local_cluster.py
+++ b/tests/python/test_spark/test_spark_local_cluster.py
@@ -6,7 +6,8 @@ import uuid
 import numpy as np
 import pytest
-import testing as tm
+
 from xgboost import testing as tm
 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
--- a/tests/python/test_spark/utils.py
+++ b/tests/python/test_spark/utils.py
@@ -6,9 +6,10 @@ import tempfile
 import unittest
 import pytest
 import testing as tm
 from six import StringIO
 from xgboost import testing as tm
 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
 if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
--- a/tests/python/test_survival.py
+++ b/tests/python/test_survival.py
@@ -1,11 +1,13 @@
 import testing as tm
 import pytest
 import numpy as np
 import xgboost as xgb
 import json
 import os
-dpath = os.path.join(tm.PROJECT_ROOT, 'demo', 'data')
+import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 dpath = tm.data_dir(__file__)
 def test_aft_survival_toy_data():
--- a/tests/python/test_tracker.py
+++ b/tests/python/test_tracker.py
@@ -3,10 +3,10 @@ import sys
 import numpy as np
 import pytest
 import testing as tm
 import xgboost as xgb
-from xgboost import RabitTracker, testing
+from xgboost import RabitTracker
 from xgboost import testing as tm
 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@@ -61,7 +61,7 @@ def test_rabit_ops():
            run_rabit_ops(client, n_workers)
-@pytest.mark.skipif(**testing.skip_ipv6())
+@pytest.mark.skipif(**tm.no_ipv6())
@pytest.mark.skipif(**tm.no_dask())
 def test_rabit_ops_ipv6():
    import dask
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -1,10 +1,11 @@
 import xgboost as xgb
 import testing as tm
 import numpy as np
 import pytest
 import os
 import tempfile
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 rng = np.random.RandomState(1337)
--- a/tests/python/test_tree_regularization.py
+++ b/tests/python/test_tree_regularization.py
@@ -1,8 +1,8 @@
 import numpy as np
 import xgboost as xgb
 from numpy.testing import assert_approx_equal
 import xgboost as xgb
 train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -1,11 +1,13 @@
 import json
 from string import ascii_lowercase
-from typing import Dict, Any
+from typing import Any, Dict
-import testing as tm
+
 import pytest
 import xgboost as xgb
 import numpy as np
-from hypothesis import given, strategies, settings, note
+import pytest
 from hypothesis import given, note, settings, strategies
 import xgboost as xgb
 from xgboost import testing as tm
 exact_parameter_strategy = strategies.fixed_dictionaries({
    'nthread': strategies.integers(1, 4),
--- a/tests/python/test_with_arrow.py
+++ b/tests/python/test_with_arrow.py
@@ -1,14 +1,16 @@
 import unittest
 import pytest
 import numpy as np
 import testing as tm
 import xgboost as xgb
 import os
 import unittest
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 try:
    import pandas as pd
    import pyarrow as pa
    import pyarrow.csv as pc
    import pandas as pd
 except ImportError:
    pass
@@ -73,7 +75,7 @@ class TestArrowTable(unittest.TestCase):
        np.testing.assert_allclose(preds1, preds2)
    def test_arrow_survival(self):
-        data = os.path.join(tm.PROJECT_ROOT, "demo", "data", "veterans_lung_cancer.csv")
+        data = os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
        table = pc.read_csv(data)
        y_lower_bound = table["Survival_label_lower_bound"]
        y_upper_bound = table["Survival_label_upper_bound"]
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -20,7 +20,6 @@ import numpy as np
 import pytest
 import scipy
 import sklearn
 import testing as tm
 from hypothesis import HealthCheck, given, note, settings
 from sklearn.datasets import make_classification, make_regression
 from test_predict import verify_leaf_output
@@ -29,7 +28,7 @@ from test_with_sklearn import run_data_initialization, run_feature_weights
 from xgboost.data import _is_cudf_df
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@@ -45,7 +44,7 @@ from xgboost.dask import DaskDMatrix
 dask.config.set({"distributed.scheduler.allowed-failures": False})
-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)
 if hasattr(HealthCheck, 'function_scoped_fixture'):
    suppress = [HealthCheck.function_scoped_fixture]
@@ -1116,8 +1115,9 @@ def test_predict_with_meta(client: "Client") -> None:
 def run_aft_survival(client: "Client", dmatrix_t: Type) -> None:
-    df = dd.read_csv(os.path.join(tm.PROJECT_ROOT, 'demo', 'data',
+    df = dd.read_csv(
-                                  'veterans_lung_cancer.csv'))
+        os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
    )
    y_lower_bound = df['Survival_label_lower_bound']
    y_upper_bound = df['Survival_label_upper_bound']
    X = df.drop(['Survival_label_lower_bound',
--- a/tests/python/test_with_modin.py
+++ b/tests/python/test_with_modin.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import xgboost as xgb
 import testing as tm
 import pytest
 from test_dmatrix import set_base_margin_info
 import xgboost as xgb
 from xgboost import testing as tm
 try:
    import modin.pandas as md
 except ImportError:
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -1,11 +1,13 @@
 import os
 import tempfile
 import numpy as np
 import xgboost as xgb
 import testing as tm
 import pytest
 from test_dmatrix import set_base_margin_info
 import xgboost as xgb
 from xgboost import testing as tm
 try:
    import pandas as pd
 except ImportError:
--- a/tests/python/test_with_shap.py
+++ b/tests/python/test_with_shap.py
@@ -1,7 +1,8 @@
 import numpy as np
 import xgboost as xgb
 import pytest
 import xgboost as xgb
 try:
    import shap
 except ImportError:
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -8,14 +8,13 @@ from typing import Callable, Optional
 import numpy as np
 import pytest
 import testing as tm
 from sklearn.utils.estimator_checks import parametrize_with_checks
 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm
 rng = np.random.RandomState(1994)
-pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), testing.timeout(30)]
+pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)]
 def test_binary_classification():
@@ -155,11 +154,10 @@ def test_ranking():
 def test_stacking_regression():
    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_diabetes
    from sklearn.ensemble import RandomForestRegressor, StackingRegressor
    from sklearn.linear_model import RidgeCV
-    from sklearn.ensemble import RandomForestRegressor
+    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import StackingRegressor
    X, y = load_diabetes(return_X_y=True)
    estimators = [
@@ -177,13 +175,13 @@ def test_stacking_regression():
 def test_stacking_classification():
    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_iris
    from sklearn.svm import LinearSVC
    from sklearn.linear_model import LogisticRegression
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import make_pipeline
    from sklearn.ensemble import StackingClassifier
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import train_test_split
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.svm import LinearSVC
    X, y = load_iris(return_X_y=True)
    estimators = [
@@ -354,8 +352,8 @@ def test_num_parallel_tree():
 def test_regression():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import fetch_california_housing
    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold
    X, y = fetch_california_housing(return_X_y=True)
@@ -383,8 +381,8 @@ def test_regression():
 def run_housing_rf_regression(tree_method):
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import fetch_california_housing
    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold
    X, y = fetch_california_housing(return_X_y=True)
@@ -407,8 +405,8 @@ def test_rf_regression():
 def test_parameter_tuning():
    from sklearn.model_selection import GridSearchCV
    from sklearn.datasets import fetch_california_housing
    from sklearn.model_selection import GridSearchCV
    X, y = fetch_california_housing(return_X_y=True)
    xgb_model = xgb.XGBRegressor(learning_rate=0.1)
@@ -421,8 +419,8 @@ def test_parameter_tuning():
 def test_regression_with_custom_objective():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import fetch_california_housing
    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold
    def objective_ls(y_true, y_pred):
@@ -539,8 +537,8 @@ def test_sklearn_plotting():
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib.axes import Axes
    from graphviz import Source
    from matplotlib.axes import Axes
    ax = xgb.plot_importance(classifier)
    assert isinstance(ax, Axes)
@@ -666,8 +664,8 @@ def test_kwargs_error():
 def test_kwargs_grid_search():
    from sklearn.model_selection import GridSearchCV
    from sklearn import datasets
    from sklearn.model_selection import GridSearchCV
    params = {'tree_method': 'hist'}
    clf = xgb.XGBClassifier(n_estimators=1, learning_rate=1.0, **params)
@@ -841,9 +839,7 @@ def test_save_load_model():
 def test_RFECV():
-    from sklearn.datasets import load_diabetes
+    from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
    from sklearn.datasets import load_breast_cancer
    from sklearn.datasets import load_iris
    from sklearn.feature_selection import RFECV
    # Regression
@@ -1046,8 +1042,9 @@ def run_feature_weights(X, y, fw, tree_method, model=xgb.XGBRegressor):
        with open(model_path) as fd:
            model = json.load(fd)
-        parser_path = os.path.join(tm.PROJECT_ROOT, 'demo', 'json-model',
+        parser_path = os.path.join(
-                                   'json_parser.py')
+            tm.demo_dir(__file__), "json-model", "json_parser.py"
        )
        spec = importlib.util.spec_from_file_location("JsonParser",
                                                      parser_path)
        foo = importlib.util.module_from_spec(spec)
@@ -1162,8 +1159,8 @@ def run_boost_from_prediction_multi_clasas(
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
 def test_boost_from_prediction(tree_method):
    from sklearn.datasets import load_breast_cancer, load_iris, make_regression
    import pandas as pd
    from sklearn.datasets import load_breast_cancer, load_iris, make_regression
    X, y = load_breast_cancer(return_X_y=True)
--- a/tests/python/with_omp_limit.py
+++ b/tests/python/with_omp_limit.py
@@ -1,7 +1,9 @@
-import xgboost as xgb
+import sys
 from sklearn.datasets import make_classification
 from sklearn.metrics import roc_auc_score
-import sys
+
 import xgboost as xgb
 def run_omp(output_path: str):