Move Python testing utilities into xgboost module. (#8379)

- Add typehints. - Fixes for pylint. Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
2022-10-26 16:56:11 +08:00
parent 7e53189e7c
commit cf70864fa3
66 changed files with 652 additions and 595 deletions
--- a/tests/python/generate_models.py
+++ b/tests/python/generate_models.py
@@ -1,7 +1,9 @@
-import xgboost
-import numpy as np
 import os

+import numpy as np
+
+import xgboost
+
 kRounds = 2
 kRows = 1000
 kCols = 4
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -1,12 +1,13 @@
-# -*- coding: utf-8 -*-
-import numpy as np
-import os
-import xgboost as xgb
-import pytest
 import json
-from pathlib import Path
+import os
 import tempfile
-import testing as tm
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -1,13 +1,15 @@
-import numpy as np
-import xgboost as xgb
-import os
 import json
-import testing as tm
-import pytest
 import locale
+import os
 import tempfile

-dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
+dpath = tm.data_dir(__file__)

 rng = np.random.RandomState(1994)

@@ -36,8 +38,8 @@ class TestModels:
        param = {'verbosity': 0, 'objective': 'binary:logistic',
                 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
                 'nthread': 1}
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 4
        bst = xgb.train(param, dtrain, num_round, watchlist)
@@ -49,8 +51,8 @@ class TestModels:
        assert err < 0.2

    def test_dart(self):
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        param = {'max_depth': 5, 'objective': 'binary:logistic',
                 'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1}
        # specify validations set to watch performance
@@ -116,7 +118,7 @@ class TestModels:

    def test_boost_from_prediction(self):
        # Re-construct dtrain here to avoid modification
-        margined = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        margined = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        bst = xgb.train({'tree_method': 'hist'}, margined, 1)
        predt_0 = bst.predict(margined, output_margin=True)
        margined.set_base_margin(predt_0)
@@ -124,13 +126,13 @@ class TestModels:
        predt_1 = bst.predict(margined)

        assert np.any(np.abs(predt_1 - predt_0) > 1e-6)
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        bst = xgb.train({'tree_method': 'hist'}, dtrain, 2)
        predt_2 = bst.predict(dtrain)
        assert np.all(np.abs(predt_2 - predt_1) < 1e-6)

    def test_boost_from_existing_model(self):
-        X = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        X = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4)
        assert booster.num_boosted_rounds() == 4
        booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4,
@@ -150,8 +152,8 @@ class TestModels:
            'objective': 'reg:logistic',
            "tree_method": tree_method
        }
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 10

@@ -197,8 +199,8 @@ class TestModels:
        self.run_custom_objective()

    def test_multi_eval_metric(self):
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1,
                 'objective': 'binary:logistic'}
@@ -220,7 +222,7 @@ class TestModels:
            param['scale_pos_weight'] = ratio
            return (dtrain, dtest, param)

-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        xgb.cv(param, dtrain, num_round, nfold=5,
               metrics={'auc'}, seed=0, fpreproc=fpreproc)

@@ -228,7 +230,7 @@ class TestModels:
        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
                 'objective': 'binary:logistic'}
        num_round = 2
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
        xgb.cv(param, dtrain, num_round, nfold=5,
               metrics={'error'}, seed=0, show_stdv=False)

@@ -346,7 +348,7 @@ class TestModels:
        os.remove(model_path)

        try:
-            dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+            dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
            xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
        except ValueError as e:
            e_str = str(e)
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -1,9 +1,12 @@
-from typing import Union
-import xgboost as xgb
-import pytest
 import os
-import testing as tm
 import tempfile
+from contextlib import nullcontext
+from typing import Union
+
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 # We use the dataset for tests.
 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@@ -271,13 +274,14 @@ class TestCallbacks:
        """Test learning rate scheduler, used by both CPU and GPU tests."""
        scheduler = xgb.callback.LearningRateScheduler

-        dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
-        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        dpath = tm.data_dir(__file__)
+        dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
+        dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
+
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 4

-        warning_check = tm.noop_context()
+        warning_check = nullcontext()

        # learning_rates as a list
        # init eta with 0 to check whether learning_rates work
--- a/tests/python/test_cli.py
+++ b/tests/python/test_cli.py
@@ -1,11 +1,13 @@
-import os
-import tempfile
-import platform
-import xgboost
-import subprocess
-import numpy
 import json
-import testing as tm
+import os
+import platform
+import subprocess
+import tempfile
+
+import numpy
+
+import xgboost
+from xgboost import testing as tm


 class TestCLI:
@@ -29,7 +31,7 @@ data = {data_path}
 eval[test] = {data_path}
 '''

-    PROJECT_ROOT = tm.PROJECT_ROOT
+    PROJECT_ROOT = tm.project_root(__file__)

    def get_exe(self):
        if platform.system() == 'Windows':
--- a/tests/python/test_data_iterator.py
+++ b/tests/python/test_data_iterator.py
@@ -1,14 +1,16 @@
+from typing import Dict, List
+
 import numpy as np
 import pytest
 from hypothesis import given, settings, strategies
 from scipy.sparse import csr_matrix
-from testing import IteratorForTest, make_batches, non_increasing
 from xgboost.data import SingleBatchInternalIter as SingleBatch
+from xgboost.testing import IteratorForTest, make_batches, non_increasing

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)


 def test_single_batch(tree_method: str = "approx") -> None:
@@ -83,7 +85,7 @@ def run_data_iterator(
    if tree_method == "gpu_hist":
        parameters["sampling_method"] = "gradient_based"

-    results_from_it: xgb.callback.EvaluationMonitor.EvalsLog = {}
+    results_from_it: Dict[str, Dict[str, List[float]]] = {}
    from_it = xgb.train(
        parameters,
        Xy,
@@ -106,7 +108,7 @@ def run_data_iterator(
    assert Xy.num_row() == n_samples_per_batch * n_batches
    assert Xy.num_col() == n_features

-    results_from_arrays: xgb.callback.EvaluationMonitor.EvalsLog = {}
+    results_from_arrays: Dict[str, Dict[str, List[float]]] = {}
    from_arrays = xgb.train(
        parameters,
        Xy,
--- a/tests/python/test_demos.py
+++ b/tests/python/test_demos.py
@@ -3,14 +3,12 @@ import subprocess
 import sys

 import pytest
-import testing as tm

-from xgboost import testing
+from xgboost import testing as tm

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)

-ROOT_DIR = tm.PROJECT_ROOT
-DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
+DEMO_DIR = tm.demo_dir(__file__)
 PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, 'guide-python')
 CLI_DEMO_DIR = os.path.join(DEMO_DIR, 'CLI')

@@ -156,7 +154,7 @@ def test_cli_regression_demo():
    cmd = ['python', script, 'machine.txt', '1']
    subprocess.check_call(cmd, cwd=reg_dir)

-    exe = os.path.join(tm.PROJECT_ROOT, 'xgboost')
+    exe = os.path.join(DEMO_DIR, os.path.pardir, 'xgboost')
    conf = os.path.join(reg_dir, 'machine.conf')
    subprocess.check_call([exe, conf], cwd=reg_dir)

--- a/tests/python/test_dmatrix.py
+++ b/tests/python/test_dmatrix.py
@@ -4,11 +4,11 @@ import tempfile
 import numpy as np
 import pytest
 import scipy.sparse
-import testing as tm
 from hypothesis import given, settings, strategies
 from scipy.sparse import csr_matrix, rand

 import xgboost as xgb
+from xgboost import testing as tm

 rng = np.random.RandomState(1)

--- a/tests/python/test_dt.py
+++ b/tests/python/test_dt.py
@@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
-import pytest
 import numpy as np
+import pytest

-import testing as tm
 import xgboost as xgb
+from xgboost import testing as tm

 try:
    import datatable as dt
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -1,8 +1,9 @@
-import xgboost as xgb
-import testing as tm
 import numpy as np
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 rng = np.random.RandomState(1994)


--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@@ -1,8 +1,9 @@
-import xgboost as xgb
-import testing as tm
 import numpy as np
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 rng = np.random.RandomState(1337)


@@ -254,8 +255,8 @@ class TestEvalMetrics:
        self.run_roc_auc_multi("hist", n_samples, weighted)

    def run_pr_auc_binary(self, tree_method):
-        from sklearn.metrics import precision_recall_curve, auc
        from sklearn.datasets import make_classification
+        from sklearn.metrics import auc, precision_recall_curve
        X, y = make_classification(128, 4, n_classes=2, random_state=1994)
        clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
        clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
--- a/tests/python/test_interaction_constraints.py
+++ b/tests/python/test_interaction_constraints.py
@@ -1,9 +1,9 @@
-# -*- coding: utf-8 -*-
 import numpy as np
-import xgboost
-import testing as tm
 import pytest

+import xgboost
+from xgboost import testing as tm
+
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)

--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@@ -1,10 +1,9 @@
-import testing as tm
 from hypothesis import given, note, settings, strategies

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)


 parameter_strategy = strategies.fixed_dictionaries({
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@@ -1,12 +1,14 @@
-import xgboost
-import os
-import generate_models as gm
-import testing as tm
-import json
-import zipfile
-import pytest
 import copy
+import json
+import os
 import urllib.request
+import zipfile
+
+import generate_models as gm
+import pytest
+
+import xgboost
+from xgboost import testing as tm


 def run_model_param_check(config):
--- a/tests/python/test_monotone_constraints.py
+++ b/tests/python/test_monotone_constraints.py
@@ -1,8 +1,9 @@
 import numpy as np
-import xgboost as xgb
-import testing as tm
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 dpath = 'demo/data/'


--- a/tests/python/test_openmp.py
+++ b/tests/python/test_openmp.py
@@ -4,12 +4,11 @@ import tempfile

 import numpy as np
 import pytest
-import testing as tm

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)


 class TestOMP:
@@ -86,7 +85,7 @@ class TestOMP:
    def test_with_omp_thread_limit(self):
        args = [
            "python", os.path.join(
-                tm.PROJECT_ROOT, "tests", "python", "with_omp_limit.py"
+                os.path.dirname(tm.normpath(__file__)), "with_omp_limit.py"
            )
        ]
        results = []
--- a/tests/python/test_parse_tree.py
+++ b/tests/python/test_parse_tree.py
@@ -1,8 +1,8 @@
-import xgboost as xgb
 import numpy as np
 import pytest
-import testing as tm

+import xgboost as xgb
+from xgboost import testing as tm

 pytestmark = pytest.mark.skipif(**tm.no_pandas())

--- a/tests/python/test_pickling.py
+++ b/tests/python/test_pickling.py
@@ -1,9 +1,10 @@
-import pickle
-import numpy as np
-import xgboost as xgb
-import os
 import json
+import os
+import pickle

+import numpy as np
+
+import xgboost as xgb

 kRows = 100
 kCols = 10
--- a/tests/python/test_plotting.py
+++ b/tests/python/test_plotting.py
@@ -1,15 +1,16 @@
 import json
-import numpy as np
-import xgboost as xgb
-import testing as tm

+import numpy as np
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 try:
    import matplotlib
    matplotlib.use('Agg')
-    from matplotlib.axes import Axes
    from graphviz import Source
+    from matplotlib.axes import Axes
 except ImportError:
    pass

--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -1,12 +1,13 @@
 '''Tests for running inplace prediction.'''
 from concurrent.futures import ThreadPoolExecutor
-import numpy as np
-from scipy import sparse
-import pytest
-import pandas as pd

-import testing as tm
+import numpy as np
+import pandas as pd
+import pytest
+from scipy import sparse
+
 import xgboost as xgb
+from xgboost import testing as tm


 def run_threaded_predict(X, rows, predict_func):
--- a/tests/python/test_quantile_dmatrix.py
+++ b/tests/python/test_quantile_dmatrix.py
@@ -4,7 +4,7 @@ import numpy as np
 import pytest
 from hypothesis import given, settings, strategies
 from scipy import sparse
-from testing import (
+from xgboost.testing import (
    IteratorForTest,
    make_batches,
    make_batches_sparse,
--- a/tests/python/test_ranking.py
+++ b/tests/python/test_ranking.py
@@ -1,13 +1,15 @@
-import numpy as np
-from scipy.sparse import csr_matrix
-import testing as tm
-import xgboost
-import os
 import itertools
+import os
 import shutil
 import urllib.request
 import zipfile

+import numpy as np
+from scipy.sparse import csr_matrix
+
+import xgboost
+from xgboost import testing as tm
+

 def test_ranking_with_unweighted_data():
    Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])
--- a/tests/python/test_shap.py
+++ b/tests/python/test_shap.py
@@ -1,11 +1,12 @@
-# -*- coding: utf-8 -*-
-import numpy as np
-import xgboost as xgb
 import itertools
 import re
+
+import numpy as np
 import scipy
 import scipy.special

+import xgboost as xgb
+
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)

--- a/tests/python/test_spark/test_data.py
+++ b/tests/python/test_spark/test_data.py
@@ -4,7 +4,8 @@ from typing import List
 import numpy as np
 import pandas as pd
 import pytest
-import testing as tm
+
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -6,10 +6,9 @@ import uuid

 import numpy as np
 import pytest
-import testing as tm

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
@@ -38,7 +37,7 @@ from .utils import SparkTestCase

 logging.getLogger("py4j").setLevel(logging.INFO)

-pytestmark = testing.timeout(60)
+pytestmark = tm.timeout(60)


 class XgboostLocalTest(SparkTestCase):
--- a/tests/python/test_spark/test_spark_local_cluster.py
+++ b/tests/python/test_spark/test_spark_local_cluster.py
@@ -6,7 +6,8 @@ import uuid

 import numpy as np
 import pytest
-import testing as tm
+
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
--- a/tests/python/test_spark/utils.py
+++ b/tests/python/test_spark/utils.py
@@ -6,9 +6,10 @@ import tempfile
 import unittest

 import pytest
-import testing as tm
 from six import StringIO

+from xgboost import testing as tm
+
 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
 if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
--- a/tests/python/test_survival.py
+++ b/tests/python/test_survival.py
@@ -1,11 +1,13 @@
-import testing as tm
-import pytest
-import numpy as np
-import xgboost as xgb
 import json
 import os

-dpath = os.path.join(tm.PROJECT_ROOT, 'demo', 'data')
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
+dpath = tm.data_dir(__file__)


 def test_aft_survival_toy_data():
--- a/tests/python/test_tracker.py
+++ b/tests/python/test_tracker.py
@@ -3,10 +3,10 @@ import sys

 import numpy as np
 import pytest
-import testing as tm

 import xgboost as xgb
-from xgboost import RabitTracker, testing
+from xgboost import RabitTracker
+from xgboost import testing as tm

 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@@ -61,7 +61,7 @@ def test_rabit_ops():
            run_rabit_ops(client, n_workers)


-@pytest.mark.skipif(**testing.skip_ipv6())
+@pytest.mark.skipif(**tm.no_ipv6())
@pytest.mark.skipif(**tm.no_dask())
 def test_rabit_ops_ipv6():
    import dask
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -1,10 +1,11 @@
-import xgboost as xgb
-import testing as tm
-import numpy as np
-import pytest
 import os
 import tempfile

+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 rng = np.random.RandomState(1337)

--- a/tests/python/test_tree_regularization.py
+++ b/tests/python/test_tree_regularization.py
@@ -1,8 +1,8 @@
 import numpy as np
-import xgboost as xgb
-
 from numpy.testing import assert_approx_equal

+import xgboost as xgb
+
 train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))


--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -1,11 +1,13 @@
 import json
 from string import ascii_lowercase
-from typing import Dict, Any
-import testing as tm
-import pytest
-import xgboost as xgb
+from typing import Any, Dict
+
 import numpy as np
-from hypothesis import given, strategies, settings, note
+import pytest
+from hypothesis import given, note, settings, strategies
+
+import xgboost as xgb
+from xgboost import testing as tm

 exact_parameter_strategy = strategies.fixed_dictionaries({
    'nthread': strategies.integers(1, 4),
--- a/tests/python/test_with_arrow.py
+++ b/tests/python/test_with_arrow.py
@@ -1,14 +1,16 @@
-import unittest
-import pytest
-import numpy as np
-import testing as tm
-import xgboost as xgb
 import os
+import unittest
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 try:
+    import pandas as pd
    import pyarrow as pa
    import pyarrow.csv as pc
-    import pandas as pd
 except ImportError:
    pass

@@ -73,7 +75,7 @@ class TestArrowTable(unittest.TestCase):
        np.testing.assert_allclose(preds1, preds2)

    def test_arrow_survival(self):
-        data = os.path.join(tm.PROJECT_ROOT, "demo", "data", "veterans_lung_cancer.csv")
+        data = os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
        table = pc.read_csv(data)
        y_lower_bound = table["Survival_label_lower_bound"]
        y_upper_bound = table["Survival_label_upper_bound"]
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -20,7 +20,6 @@ import numpy as np
 import pytest
 import scipy
 import sklearn
-import testing as tm
 from hypothesis import HealthCheck, given, note, settings
 from sklearn.datasets import make_classification, make_regression
 from test_predict import verify_leaf_output
@@ -29,7 +28,7 @@ from test_with_sklearn import run_data_initialization, run_feature_weights
 from xgboost.data import _is_cudf_df

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@@ -45,7 +44,7 @@ from xgboost.dask import DaskDMatrix

 dask.config.set({"distributed.scheduler.allowed-failures": False})

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)

 if hasattr(HealthCheck, 'function_scoped_fixture'):
    suppress = [HealthCheck.function_scoped_fixture]
@@ -1116,8 +1115,9 @@ def test_predict_with_meta(client: "Client") -> None:


 def run_aft_survival(client: "Client", dmatrix_t: Type) -> None:
-    df = dd.read_csv(os.path.join(tm.PROJECT_ROOT, 'demo', 'data',
-                                  'veterans_lung_cancer.csv'))
+    df = dd.read_csv(
+        os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
+    )
    y_lower_bound = df['Survival_label_lower_bound']
    y_upper_bound = df['Survival_label_upper_bound']
    X = df.drop(['Survival_label_lower_bound',
--- a/tests/python/test_with_modin.py
+++ b/tests/python/test_with_modin.py
@@ -1,10 +1,10 @@
-# -*- coding: utf-8 -*-
 import numpy as np
-import xgboost as xgb
-import testing as tm
 import pytest
 from test_dmatrix import set_base_margin_info

+import xgboost as xgb
+from xgboost import testing as tm
+
 try:
    import modin.pandas as md
 except ImportError:
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -1,11 +1,13 @@
 import os
 import tempfile
+
 import numpy as np
-import xgboost as xgb
-import testing as tm
 import pytest
 from test_dmatrix import set_base_margin_info

+import xgboost as xgb
+from xgboost import testing as tm
+
 try:
    import pandas as pd
 except ImportError:
--- a/tests/python/test_with_shap.py
+++ b/tests/python/test_with_shap.py
@@ -1,7 +1,8 @@
 import numpy as np
-import xgboost as xgb
 import pytest

+import xgboost as xgb
+
 try:
    import shap
 except ImportError:
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -8,14 +8,13 @@ from typing import Callable, Optional

 import numpy as np
 import pytest
-import testing as tm
 from sklearn.utils.estimator_checks import parametrize_with_checks

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 rng = np.random.RandomState(1994)
-pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), testing.timeout(30)]
+pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)]


 def test_binary_classification():
@@ -155,11 +154,10 @@ def test_ranking():


 def test_stacking_regression():
-    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_diabetes
+    from sklearn.ensemble import RandomForestRegressor, StackingRegressor
    from sklearn.linear_model import RidgeCV
-    from sklearn.ensemble import RandomForestRegressor
-    from sklearn.ensemble import StackingRegressor
+    from sklearn.model_selection import train_test_split

    X, y = load_diabetes(return_X_y=True)
    estimators = [
@@ -177,13 +175,13 @@ def test_stacking_regression():


 def test_stacking_classification():
-    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_iris
-    from sklearn.svm import LinearSVC
-    from sklearn.linear_model import LogisticRegression
-    from sklearn.preprocessing import StandardScaler
-    from sklearn.pipeline import make_pipeline
    from sklearn.ensemble import StackingClassifier
+    from sklearn.linear_model import LogisticRegression
+    from sklearn.model_selection import train_test_split
+    from sklearn.pipeline import make_pipeline
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.svm import LinearSVC

    X, y = load_iris(return_X_y=True)
    estimators = [
@@ -354,8 +352,8 @@ def test_num_parallel_tree():


 def test_regression():
-    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import fetch_california_housing
+    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold

    X, y = fetch_california_housing(return_X_y=True)
@@ -383,8 +381,8 @@ def test_regression():


 def run_housing_rf_regression(tree_method):
-    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import fetch_california_housing
+    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold

    X, y = fetch_california_housing(return_X_y=True)
@@ -407,8 +405,8 @@ def test_rf_regression():


 def test_parameter_tuning():
-    from sklearn.model_selection import GridSearchCV
    from sklearn.datasets import fetch_california_housing
+    from sklearn.model_selection import GridSearchCV

    X, y = fetch_california_housing(return_X_y=True)
    xgb_model = xgb.XGBRegressor(learning_rate=0.1)
@@ -421,8 +419,8 @@ def test_parameter_tuning():


 def test_regression_with_custom_objective():
-    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import fetch_california_housing
+    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold

    def objective_ls(y_true, y_pred):
@@ -539,8 +537,8 @@ def test_sklearn_plotting():
    import matplotlib
    matplotlib.use('Agg')

-    from matplotlib.axes import Axes
    from graphviz import Source
+    from matplotlib.axes import Axes

    ax = xgb.plot_importance(classifier)
    assert isinstance(ax, Axes)
@@ -666,8 +664,8 @@ def test_kwargs_error():


 def test_kwargs_grid_search():
-    from sklearn.model_selection import GridSearchCV
    from sklearn import datasets
+    from sklearn.model_selection import GridSearchCV

    params = {'tree_method': 'hist'}
    clf = xgb.XGBClassifier(n_estimators=1, learning_rate=1.0, **params)
@@ -841,9 +839,7 @@ def test_save_load_model():


 def test_RFECV():
-    from sklearn.datasets import load_diabetes
-    from sklearn.datasets import load_breast_cancer
-    from sklearn.datasets import load_iris
+    from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
    from sklearn.feature_selection import RFECV

    # Regression
@@ -1046,8 +1042,9 @@ def run_feature_weights(X, y, fw, tree_method, model=xgb.XGBRegressor):
        with open(model_path) as fd:
            model = json.load(fd)

-        parser_path = os.path.join(tm.PROJECT_ROOT, 'demo', 'json-model',
-                                   'json_parser.py')
+        parser_path = os.path.join(
+            tm.demo_dir(__file__), "json-model", "json_parser.py"
+        )
        spec = importlib.util.spec_from_file_location("JsonParser",
                                                      parser_path)
        foo = importlib.util.module_from_spec(spec)
@@ -1162,8 +1159,8 @@ def run_boost_from_prediction_multi_clasas(

@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
 def test_boost_from_prediction(tree_method):
-    from sklearn.datasets import load_breast_cancer, load_iris, make_regression
    import pandas as pd
+    from sklearn.datasets import load_breast_cancer, load_iris, make_regression

    X, y = load_breast_cancer(return_X_y=True)

--- a/tests/python/testing.py
+++ b/tests/python/testing.py
@@ -1,806 +0,0 @@
-from concurrent.futures import ThreadPoolExecutor
-import os
-import multiprocessing
-from typing import Tuple, Union, List, Sequence, Callable
-import urllib
-import zipfile
-import sys
-from typing import Optional, Dict, Any
-from contextlib import contextmanager
-from io import StringIO
-from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
-import pytest
-import gc
-import xgboost as xgb
-from xgboost.core import ArrayLike
-import numpy as np
-from scipy import sparse
-import platform
-
-hypothesis = pytest.importorskip('hypothesis')
-sklearn = pytest.importorskip('sklearn')
-from hypothesis import strategies
-from hypothesis.extra.numpy import arrays
-from joblib import Memory
-from sklearn import datasets
-
-try:
-    import cupy as cp
-except ImportError:
-    cp = None
-
-memory = Memory('./cachedir', verbose=0)
-
-
-def no_ubjson():
-    reason = "ubjson is not intsalled."
-    try:
-        import ubjson           # noqa
-        return {"condition": False, "reason": reason}
-    except ImportError:
-        return {"condition": True, "reason": reason}
-
-
-def no_sklearn():
-    return {'condition': not SKLEARN_INSTALLED,
-            'reason': 'Scikit-Learn is not installed'}
-
-
-def no_dask():
-    try:
-        import pkg_resources
-
-        pkg_resources.get_distribution("dask")
-        DASK_INSTALLED = True
-    except pkg_resources.DistributionNotFound:
-        DASK_INSTALLED = False
-    return {"condition": not DASK_INSTALLED, "reason": "Dask is not installed"}
-
-
-def no_spark():
-    try:
-        import pyspark          # noqa
-        SPARK_INSTALLED = True
-    except ImportError:
-        SPARK_INSTALLED = False
-    return {"condition": not SPARK_INSTALLED, "reason": "Spark is not installed"}
-
-
-def no_pandas():
-    return {'condition': not PANDAS_INSTALLED,
-            'reason': 'Pandas is not installed.'}
-
-
-def no_arrow():
-    reason = "pyarrow is not installed"
-    try:
-        import pyarrow  # noqa
-        return {"condition": False, "reason": reason}
-    except ImportError:
-        return {"condition": True, "reason": reason}
-
-
-def no_modin():
-    reason = 'Modin is not installed.'
-    try:
-        import modin.pandas as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
-
-
-def no_dt():
-    import importlib.util
-    spec = importlib.util.find_spec('datatable')
-    return {'condition': spec is None,
-            'reason': 'Datatable is not installed.'}
-
-
-def no_matplotlib():
-    reason = 'Matplotlib is not installed.'
-    try:
-        import matplotlib.pyplot as _  # noqa
-        return {'condition': False,
-                'reason': reason}
-    except ImportError:
-        return {'condition': True,
-                'reason': reason}
-
-
-def no_dask_cuda():
-    reason = 'dask_cuda is not installed.'
-    try:
-        import dask_cuda as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
-
-
-def no_cudf():
-    try:
-        import cudf  # noqa
-        CUDF_INSTALLED = True
-    except ImportError:
-        CUDF_INSTALLED = False
-
-    return {'condition': not CUDF_INSTALLED,
-            'reason': 'CUDF is not installed'}
-
-
-def no_cupy():
-    reason = 'cupy is not installed.'
-    try:
-        import cupy as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
-
-
-def no_dask_cudf():
-    reason = 'dask_cudf is not installed.'
-    try:
-        import dask_cudf as _  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
-
-
-def no_json_schema():
-    reason = 'jsonschema is not installed'
-    try:
-        import jsonschema  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
-
-
-def no_graphviz():
-    reason = 'graphviz is not installed'
-    try:
-        import graphviz  # noqa
-        return {'condition': False, 'reason': reason}
-    except ImportError:
-        return {'condition': True, 'reason': reason}
-
-
-def no_multiple(*args):
-    condition = False
-    reason = ''
-    for arg in args:
-        condition = (condition or arg['condition'])
-        if arg['condition']:
-            reason = arg['reason']
-            break
-    return {'condition': condition, 'reason': reason}
-
-
-def skip_s390x():
-    condition = platform.machine() == "s390x"
-    reason = "Known to fail on s390x"
-    return {"condition": condition, "reason": reason}
-
-
-class IteratorForTest(xgb.core.DataIter):
-    def __init__(
-        self,
-        X: Sequence,
-        y: Sequence,
-        w: Optional[Sequence],
-        cache: Optional[str] = "./"
-    ) -> None:
-        assert len(X) == len(y)
-        self.X = X
-        self.y = y
-        self.w = w
-        self.it = 0
-        super().__init__(cache)
-
-    def next(self, input_data: Callable) -> int:
-        if self.it == len(self.X):
-            return 0
-
-        with pytest.raises(TypeError, match="keyword args"):
-            input_data(self.X[self.it], self.y[self.it], None)
-
-        # Use copy to make sure the iterator doesn't hold a reference to the data.
-        input_data(
-            data=self.X[self.it].copy(),
-            label=self.y[self.it].copy(),
-            weight=self.w[self.it].copy() if self.w else None,
-        )
-        gc.collect()  # clear up the copy, see if XGBoost access freed memory.
-        self.it += 1
-        return 1
-
-    def reset(self) -> None:
-        self.it = 0
-
-    def as_arrays(
-        self,
-    ) -> Tuple[Union[np.ndarray, sparse.csr_matrix], ArrayLike, ArrayLike]:
-        if isinstance(self.X[0], sparse.csr_matrix):
-            X = sparse.vstack(self.X, format="csr")
-        else:
-            X = np.concatenate(self.X, axis=0)
-        y = np.concatenate(self.y, axis=0)
-        if self.w:
-            w = np.concatenate(self.w, axis=0)
-        else:
-            w = None
-        return X, y, w
-
-
-def make_batches(
-    n_samples_per_batch: int, n_features: int, n_batches: int, use_cupy: bool = False
-) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
-    X = []
-    y = []
-    w = []
-    if use_cupy:
-        import cupy
-
-        rng = cupy.random.RandomState(1994)
-    else:
-        rng = np.random.RandomState(1994)
-    for i in range(n_batches):
-        _X = rng.randn(n_samples_per_batch, n_features)
-        _y = rng.randn(n_samples_per_batch)
-        _w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
-        X.append(_X)
-        y.append(_y)
-        w.append(_w)
-    return X, y, w
-
-
-def make_batches_sparse(
-    n_samples_per_batch: int, n_features: int, n_batches: int, sparsity: float
-) -> Tuple[List[sparse.csr_matrix], List[np.ndarray], List[np.ndarray]]:
-    X = []
-    y = []
-    w = []
-    rng = np.random.RandomState(1994)
-    for i in range(n_batches):
-        _X = sparse.random(
-            n_samples_per_batch,
-            n_features,
-            1.0 - sparsity,
-            format="csr",
-            dtype=np.float32,
-            random_state=rng,
-        )
-        _y = rng.randn(n_samples_per_batch)
-        _w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
-        X.append(_X)
-        y.append(_y)
-        w.append(_w)
-    return X, y, w
-
-
-# Contains a dataset in numpy format as well as the relevant objective and metric
-class TestDataset:
-    def __init__(
-        self, name: str, get_dataset: Callable, objective: str, metric: str
-    ) -> None:
-        self.name = name
-        self.objective = objective
-        self.metric = metric
-        self.X, self.y = get_dataset()
-        self.w: Optional[np.ndarray] = None
-        self.margin: Optional[np.ndarray] = None
-
-    def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]:
-        params_in['objective'] = self.objective
-        params_in['eval_metric'] = self.metric
-        if self.objective == "multi:softmax":
-            params_in["num_class"] = int(np.max(self.y) + 1)
-        return params_in
-
-    def get_dmat(self) -> xgb.DMatrix:
-        return xgb.DMatrix(
-            self.X, self.y, self.w, base_margin=self.margin, enable_categorical=True
-        )
-
-    def get_device_dmat(self) -> xgb.DeviceQuantileDMatrix:
-        w = None if self.w is None else cp.array(self.w)
-        X = cp.array(self.X, dtype=np.float32)
-        y = cp.array(self.y, dtype=np.float32)
-        return xgb.DeviceQuantileDMatrix(X, y, w, base_margin=self.margin)
-
-    def get_external_dmat(self) -> xgb.DMatrix:
-        n_samples = self.X.shape[0]
-        n_batches = 10
-        per_batch = n_samples // n_batches + 1
-
-        predictor = []
-        response = []
-        weight = []
-        for i in range(n_batches):
-            beg = i * per_batch
-            end = min((i + 1) * per_batch, n_samples)
-            assert end != beg
-            X = self.X[beg: end, ...]
-            y = self.y[beg: end]
-            w = self.w[beg: end] if self.w is not None else None
-            predictor.append(X)
-            response.append(y)
-            if w is not None:
-                weight.append(w)
-
-        it = IteratorForTest(predictor, response, weight if weight else None)
-        return xgb.DMatrix(it)
-
-    def __repr__(self) -> str:
-        return self.name
-
-
-@memory.cache
-def get_california_housing():
-    data = datasets.fetch_california_housing()
-    return data.data, data.target
-
-
-@memory.cache
-def get_digits():
-    data = datasets.load_digits()
-    return data.data, data.target
-
-
-@memory.cache
-def get_cancer():
-    data = datasets.load_breast_cancer()
-    return data.data, data.target
-
-
-@memory.cache
-def get_sparse():
-    rng = np.random.RandomState(199)
-    n = 2000
-    sparsity = 0.75
-    X, y = datasets.make_regression(n, random_state=rng)
-    flag = rng.binomial(1, sparsity, X.shape)
-    for i in range(X.shape[0]):
-        for j in range(X.shape[1]):
-            if flag[i, j]:
-                X[i, j] = np.nan
-    return X, y
-
-
-@memory.cache
-def get_ames_housing():
-    """
-    Number of samples: 1460
-    Number of features: 20
-    Number of categorical features: 10
-    Number of numerical features: 10
-    """
-    from sklearn.datasets import fetch_openml
-    X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True)
-
-    categorical_columns_subset: list[str] = [
-        "BldgType",             # 5 cats, no nan
-        "GarageFinish",         # 3 cats, nan
-        "LotConfig",            # 5 cats, no nan
-        "Functional",           # 7 cats, no nan
-        "MasVnrType",           # 4 cats, nan
-        "HouseStyle",           # 8 cats, no nan
-        "FireplaceQu",          # 5 cats, nan
-        "ExterCond",            # 5 cats, no nan
-        "ExterQual",            # 4 cats, no nan
-        "PoolQC",               # 3 cats, nan
-    ]
-
-    numerical_columns_subset: list[str] = [
-        "3SsnPorch",
-        "Fireplaces",
-        "BsmtHalfBath",
-        "HalfBath",
-        "GarageCars",
-        "TotRmsAbvGrd",
-        "BsmtFinSF1",
-        "BsmtFinSF2",
-        "GrLivArea",
-        "ScreenPorch",
-    ]
-
-    X = X[categorical_columns_subset + numerical_columns_subset]
-    X[categorical_columns_subset] = X[categorical_columns_subset].astype("category")
-    return X, y
-
-
-@memory.cache
-def get_mq2008(dpath):
-    from sklearn.datasets import load_svmlight_files
-
-    src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
-    target = dpath + '/MQ2008.zip'
-    if not os.path.exists(target):
-        urllib.request.urlretrieve(url=src, filename=target)
-
-    with zipfile.ZipFile(target, 'r') as f:
-        f.extractall(path=dpath)
-
-    (x_train, y_train, qid_train, x_test, y_test, qid_test,
-     x_valid, y_valid, qid_valid) = load_svmlight_files(
-         (dpath + "MQ2008/Fold1/train.txt",
-          dpath + "MQ2008/Fold1/test.txt",
-          dpath + "MQ2008/Fold1/vali.txt"),
-         query_id=True, zero_based=False)
-
-    return (x_train, y_train, qid_train, x_test, y_test, qid_test,
-            x_valid, y_valid, qid_valid)
-
-
-@memory.cache
-def make_categorical(
-    n_samples: int, n_features: int, n_categories: int, onehot: bool, sparsity=0.0,
-):
-    import pandas as pd
-
-    rng = np.random.RandomState(1994)
-
-    pd_dict = {}
-    for i in range(n_features + 1):
-        c = rng.randint(low=0, high=n_categories, size=n_samples)
-        pd_dict[str(i)] = pd.Series(c, dtype=np.int64)
-
-    df = pd.DataFrame(pd_dict)
-    label = df.iloc[:, 0]
-    df = df.iloc[:, 1:]
-    for i in range(0, n_features):
-        label += df.iloc[:, i]
-    label += 1
-
-    df = df.astype("category")
-    categories = np.arange(0, n_categories)
-    for col in df.columns:
-        df[col] = df[col].cat.set_categories(categories)
-
-    if sparsity > 0.0:
-        for i in range(n_features):
-            index = rng.randint(low=0, high=n_samples-1, size=int(n_samples * sparsity))
-            df.iloc[index, i] = np.NaN
-            assert n_categories == np.unique(df.dtypes[i].categories).size
-
-    if onehot:
-        return pd.get_dummies(df), label
-    return df, label
-
-
-def _cat_sampled_from():
-    @strategies.composite
-    def _make_cat(draw):
-        n_samples = draw(strategies.integers(2, 512))
-        n_features = draw(strategies.integers(1, 4))
-        n_cats = draw(strategies.integers(1, 128))
-        sparsity = draw(
-            strategies.floats(
-                min_value=0,
-                max_value=1,
-                allow_nan=False,
-                allow_infinity=False,
-                allow_subnormal=False,
-            )
-        )
-        return n_samples, n_features, n_cats, sparsity
-
-    def _build(args):
-        n_samples = args[0]
-        n_features = args[1]
-        n_cats = args[2]
-        sparsity = args[3]
-        return TestDataset(
-            f"{n_samples}x{n_features}-{n_cats}-{sparsity}",
-            lambda: make_categorical(n_samples, n_features, n_cats, False, sparsity),
-            "reg:squarederror",
-            "rmse",
-        )
-
-    return _make_cat().map(_build)
-
-
-categorical_dataset_strategy = _cat_sampled_from()
-
-
-@memory.cache
-def make_sparse_regression(
-    n_samples: int, n_features: int, sparsity: float, as_dense: bool
-) -> Tuple[Union[sparse.csr_matrix], np.ndarray]:
-    """Make sparse matrix.
-
-    Parameters
-    ----------
-
-    as_dense:
-
-      Return the matrix as np.ndarray with missing values filled by NaN
-
-    """
-    if not hasattr(np.random, "default_rng"):
-        # old version of numpy on s390x
-        rng = np.random.RandomState(1994)
-        X = sparse.random(
-            m=n_samples,
-            n=n_features,
-            density=1.0 - sparsity,
-            random_state=rng,
-            format="csr",
-        )
-        y = rng.normal(loc=0.0, scale=1.0, size=n_samples)
-        return X, y
-
-    # Use multi-thread to speed up the generation, convenient if you use this function
-    # for benchmarking.
-    n_threads = multiprocessing.cpu_count()
-    n_threads = min(n_threads, n_features)
-
-    def random_csc(t_id: int) -> sparse.csc_matrix:
-        rng = np.random.default_rng(1994 * t_id)
-        thread_size = n_features // n_threads
-        if t_id == n_threads - 1:
-            n_features_tloc = n_features - t_id * thread_size
-        else:
-            n_features_tloc = thread_size
-
-        X = sparse.random(
-            m=n_samples,
-            n=n_features_tloc,
-            density=1.0 - sparsity,
-            random_state=rng,
-        ).tocsc()
-        y = np.zeros((n_samples, 1))
-
-        for i in range(X.shape[1]):
-            size = X.indptr[i + 1] - X.indptr[i]
-            if size != 0:
-                y += X[:, i].toarray() * rng.random((n_samples, 1)) * 0.2
-
-        return X, y
-
-    futures = []
-    with ThreadPoolExecutor(max_workers=n_threads) as executor:
-        for i in range(n_threads):
-            futures.append(executor.submit(random_csc, i))
-
-    X_results = []
-    y_results = []
-    for f in futures:
-        X, y = f.result()
-        X_results.append(X)
-        y_results.append(y)
-
-    assert len(y_results) == n_threads
-
-    csr: sparse.csr_matrix = sparse.hstack(X_results, format="csr")
-    y = np.asarray(y_results)
-    y = y.reshape((y.shape[0], y.shape[1])).T
-    y = np.sum(y, axis=1)
-
-    assert csr.shape[0] == n_samples
-    assert csr.shape[1] == n_features
-    assert y.shape[0] == n_samples
-
-    if as_dense:
-        arr = csr.toarray()
-        assert arr.shape[0] == n_samples
-        assert arr.shape[1] == n_features
-        arr[arr == 0] = np.nan
-        return arr, y
-
-    return csr, y
-
-
-sparse_datasets_strategy = strategies.sampled_from(
-    [
-        TestDataset(
-            "1e5x8-0.95-csr",
-            lambda: make_sparse_regression(int(1e5), 8, 0.95, False),
-            "reg:squarederror",
-            "rmse",
-        ),
-        TestDataset(
-            "1e5x8-0.5-csr",
-            lambda: make_sparse_regression(int(1e5), 8, 0.5, False),
-            "reg:squarederror",
-            "rmse",
-        ),
-        TestDataset(
-            "1e5x8-0.5-dense",
-            lambda: make_sparse_regression(int(1e5), 8, 0.5, True),
-            "reg:squarederror",
-            "rmse",
-        ),
-        TestDataset(
-            "1e5x8-0.05-csr",
-            lambda: make_sparse_regression(int(1e5), 8, 0.05, False),
-            "reg:squarederror",
-            "rmse",
-        ),
-        TestDataset(
-            "1e5x8-0.05-dense",
-            lambda: make_sparse_regression(int(1e5), 8, 0.05, True),
-            "reg:squarederror",
-            "rmse",
-        ),
-    ]
-)
-
-_unweighted_datasets_strategy = strategies.sampled_from(
-    [
-        TestDataset(
-            "calif_housing", get_california_housing, "reg:squarederror", "rmse"
-        ),
-        TestDataset(
-            "calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae"
-        ),
-        TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
-        TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
-        TestDataset(
-            "mtreg",
-            lambda: datasets.make_regression(n_samples=128, n_targets=3),
-            "reg:squarederror",
-            "rmse",
-        ),
-        TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
-        TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"),
-        TestDataset(
-            "empty",
-            lambda: (np.empty((0, 100)), np.empty(0)),
-            "reg:squarederror",
-            "rmse",
-        ),
-    ]
-)
-
-
-@strategies.composite
-def _dataset_weight_margin(draw):
-    data: TestDataset = draw(_unweighted_datasets_strategy)
-    if draw(strategies.booleans()):
-        data.w = draw(
-            arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0))
-        )
-    if draw(strategies.booleans()):
-        num_class = 1
-        if data.objective == "multi:softmax":
-            num_class = int(np.max(data.y) + 1)
-        elif data.name == "mtreg":
-            num_class = data.y.shape[1]
-
-        data.margin = draw(
-            arrays(
-                np.float64,
-                (data.y.shape[0] * num_class),
-                elements=strategies.floats(0.5, 1.0),
-            )
-        )
-        if num_class != 1:
-            data.margin = data.margin.reshape(data.y.shape[0], num_class)
-
-    return data
-
-
-# A strategy for drawing from a set of example datasets
-# May add random weights to the dataset
-dataset_strategy = _dataset_weight_margin()
-
-
-def non_increasing(L, tolerance=1e-4):
-    return all((y - x) < tolerance for x, y in zip(L, L[1:]))
-
-
-def eval_error_metric(predt, dtrain: xgb.DMatrix):
-    """Evaluation metric for xgb.train"""
-    label = dtrain.get_label()
-    r = np.zeros(predt.shape)
-    gt = predt > 0.5
-    if predt.size == 0:
-        return "CustomErr", 0
-    r[gt] = 1 - label[gt]
-    le = predt <= 0.5
-    r[le] = label[le]
-    return 'CustomErr', np.sum(r)
-
-
-def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> float:
-    """Evaluation metric that looks like metrics provided by sklearn."""
-    r = np.zeros(y_score.shape)
-    gt = y_score > 0.5
-    r[gt] = 1 - y_true[gt]
-    le = y_score <= 0.5
-    r[le] = y_true[le]
-    return np.sum(r)
-
-
-def root_mean_square(y_true: np.ndarray, y_score: np.ndarray) -> float:
-    err = y_score - y_true
-    rmse = np.sqrt(np.dot(err, err) / y_score.size)
-    return rmse
-
-
-def softmax(x):
-    e = np.exp(x)
-    return e / np.sum(e)
-
-
-def softprob_obj(classes):
-    def objective(labels, predt):
-        rows = labels.shape[0]
-        grad = np.zeros((rows, classes), dtype=float)
-        hess = np.zeros((rows, classes), dtype=float)
-        eps = 1e-6
-        for r in range(predt.shape[0]):
-            target = labels[r]
-            p = softmax(predt[r, :])
-            for c in range(predt.shape[1]):
-                assert target >= 0 or target <= classes
-                g = p[c] - 1.0 if c == target else p[c]
-                h = max((2.0 * p[c] * (1.0 - p[c])).item(), eps)
-                grad[r, c] = g
-                hess[r, c] = h
-
-        grad = grad.reshape((rows * classes, 1))
-        hess = hess.reshape((rows * classes, 1))
-        return grad, hess
-
-    return objective
-
-
-class DirectoryExcursion:
-    def __init__(self, path: os.PathLike, cleanup=False):
-        '''Change directory.  Change back and optionally cleaning up the directory when exit.
-
-        '''
-        self.path = path
-        self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))
-        self.cleanup = cleanup
-        self.files = {}
-
-    def __enter__(self):
-        os.chdir(self.path)
-        if self.cleanup:
-            self.files = {
-                os.path.join(root, f)
-                for root, subdir, files in os.walk(self.path) for f in files
-            }
-
-    def __exit__(self, *args):
-        os.chdir(self.curdir)
-        if self.cleanup:
-            files = {
-                os.path.join(root, f)
-                for root, subdir, files in os.walk(self.path) for f in files
-            }
-            diff = files.difference(self.files)
-            for f in diff:
-                os.remove(f)
-
-
-@contextmanager
-def captured_output():
-    """Reassign stdout temporarily in order to test printed statements
-    Taken from:
-    https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
-
-    Also works for pytest.
-
-    """
-    new_out, new_err = StringIO(), StringIO()
-    old_out, old_err = sys.stdout, sys.stderr
-    try:
-        sys.stdout, sys.stderr = new_out, new_err
-        yield sys.stdout, sys.stderr
-    finally:
-        sys.stdout, sys.stderr = old_out, old_err
-
-
-try:
-    # Python 3.7+
-    from contextlib import nullcontext as noop_context
-except ImportError:
-    # Python 3.6
-    from contextlib import suppress as noop_context
-
-
-CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
-PROJECT_ROOT = os.path.normpath(
-    os.path.join(CURDIR, os.path.pardir, os.path.pardir))
--- a/tests/python/with_omp_limit.py
+++ b/tests/python/with_omp_limit.py
@@ -1,7 +1,9 @@
-import xgboost as xgb
+import sys
+
 from sklearn.datasets import make_classification
 from sklearn.metrics import roc_auc_score
-import sys
+
+import xgboost as xgb


 def run_omp(output_path: str):