Move Python testing utilities into xgboost module. (#8379)

- Add typehints. - Fixes for pylint. Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
2022-10-26 16:56:11 +08:00
parent 7e53189e7c
commit cf70864fa3
66 changed files with 652 additions and 595 deletions
--- a/tests/python-gpu/conftest.py
+++ b/tests/python-gpu/conftest.py
@@ -1,9 +1,7 @@
-import sys
 import pytest
-import logging

-sys.path.append("tests/python")
-import testing as tm                          # noqa
+from xgboost import testing as tm  # noqa
+

 def has_rmm():
    try:
@@ -34,8 +32,8 @@ def local_cuda_client(request, pytestconfig):
        kwargs['rmm_pool_size'] = '2GB'
    if tm.no_dask_cuda()['condition']:
        raise ImportError('The local_cuda_cluster fixture requires dask_cuda package')
-    from dask_cuda import LocalCUDACluster
    from dask.distributed import Client
+    from dask_cuda import LocalCUDACluster
    yield Client(LocalCUDACluster(**kwargs))

 def pytest_addoption(parser):
--- a/tests/python-gpu/load_pickle.py
+++ b/tests/python-gpu/load_pickle.py
@@ -1,16 +1,14 @@
 '''Loading a pickled model generated by test_pickling.py, only used by
 `test_gpu_with_dask.py`'''
-import os
-import numpy as np
-import xgboost as xgb
 import json
+import os
+
+import numpy as np
 import pytest
-import sys
+from test_gpu_pickling import build_dataset, load_pickle, model_path

-from test_gpu_pickling import build_dataset, model_path, load_pickle
-
-sys.path.append("tests/python")
-import testing as tm
+import xgboost as xgb
+from xgboost import testing as tm


 class TestLoadPickle:
--- a/tests/python-gpu/test_device_quantile_dmatrix.py
+++ b/tests/python-gpu/test_device_quantile_dmatrix.py
@@ -5,10 +5,10 @@ import pytest
 from hypothesis import given, settings, strategies

 import xgboost as xgb
+from xgboost import testing as tm

 sys.path.append("tests/python")
 import test_quantile_dmatrix as tqd
-import testing as tm


 class TestDeviceQuantileDMatrix:
--- a/tests/python-gpu/test_from_cudf.py
+++ b/tests/python-gpu/test_from_cudf.py
@@ -2,11 +2,12 @@ import json
 import sys

 import numpy as np
-import xgboost as xgb
 import pytest

+import xgboost as xgb
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
-import testing as tm
 from test_dmatrix import set_base_margin_info


@@ -85,8 +86,8 @@ def _test_from_cudf(DMatrixT):


 def _test_cudf_training(DMatrixT):
-    from cudf import DataFrame as df
    import pandas as pd
+    from cudf import DataFrame as df
    np.random.seed(1)
    X = pd.DataFrame(np.random.randn(50, 10))
    y = pd.DataFrame(np.random.randn(50))
@@ -109,8 +110,8 @@ def _test_cudf_training(DMatrixT):


 def _test_cudf_metainfo(DMatrixT):
-    from cudf import DataFrame as df
    import pandas as pd
+    from cudf import DataFrame as df
    n = 100
    X = np.random.random((n, 2))
    dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))
@@ -247,9 +248,9 @@ Arrow specification.'''
@pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.skipif(**tm.no_pandas())
 def test_cudf_training_with_sklearn():
+    import pandas as pd
    from cudf import DataFrame as df
    from cudf import Series as ss
-    import pandas as pd
    np.random.seed(1)
    X = pd.DataFrame(np.random.randn(50, 10))
    y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@@ -1,12 +1,15 @@
-import numpy as np
-import xgboost as xgb
 import sys
+
+import numpy as np
 import pytest

+import xgboost as xgb
+
 sys.path.append("tests/python")
-import testing as tm
 from test_dmatrix import set_base_margin_info

+from xgboost import testing as tm
+

 def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
    '''Test constructing DMatrix from cupy'''
--- a/tests/python-gpu/test_gpu_basic_models.py
+++ b/tests/python-gpu/test_gpu_basic_models.py
@@ -1,13 +1,18 @@
-import sys
 import os
+import sys
+
 import numpy as np
-import xgboost as xgb
 import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
+import test_basic_models as test_bm
+
 # Don't import the test class, otherwise they will run twice.
 import test_callback as test_cb  # noqa
-import test_basic_models as test_bm
-import testing as tm
+
 rng = np.random.RandomState(1994)


--- a/tests/python-gpu/test_gpu_data_iterator.py
+++ b/tests/python-gpu/test_gpu_data_iterator.py
@@ -1,13 +1,12 @@
-import numpy as np
-import xgboost as xgb
-from hypothesis import given, strategies, settings
-import pytest
 import sys

+import pytest
+from hypothesis import given, settings, strategies
+from xgboost.testing import no_cupy
+
 sys.path.append("tests/python")
-from test_data_iterator import test_single_batch as cpu_single_batch
 from test_data_iterator import run_data_iterator
-from testing import no_cupy
+from test_data_iterator import test_single_batch as cpu_single_batch


 def test_gpu_single_batch() -> None:
@@ -24,7 +23,11 @@ def test_gpu_single_batch() -> None:
 )
@settings(deadline=None, max_examples=10, print_blob=True)
 def test_gpu_data_iterator(
-    n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool
+    n_samples_per_batch: int,
+    n_features: int,
+    n_batches: int,
+    subsample: bool,
+    use_cupy: bool,
 ) -> None:
    run_data_iterator(
        n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy
--- a/tests/python-gpu/test_gpu_demos.py
+++ b/tests/python-gpu/test_gpu_demos.py
@@ -1,10 +1,13 @@
 import os
 import subprocess
 import sys
+
 import pytest
+
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
-import testing as tm
-import test_demos as td         # noqa
+import test_demos as td  # noqa


@pytest.mark.skipif(**tm.no_cupy())
@@ -31,6 +34,6 @@ def test_categorical_demo():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
 def test_dask_training():
-    script = os.path.join(tm.PROJECT_ROOT, 'demo', 'dask', 'gpu_training.py')
+    script = os.path.join(tm.demo_dir(__file__), 'dask', 'gpu_training.py')
    cmd = ['python', script]
-    subprocess.check_call(cmd)
+    subprocess.check_call(cmd)
--- a/tests/python-gpu/test_gpu_eval_metrics.py
+++ b/tests/python-gpu/test_gpu_eval_metrics.py
@@ -1,7 +1,9 @@
 import sys
-import xgboost
+
 import pytest

+import xgboost
+
 sys.path.append("tests/python")
 import test_eval_metrics as test_em  # noqa

--- a/tests/python-gpu/test_gpu_interaction_constraints.py
+++ b/tests/python-gpu/test_gpu_interaction_constraints.py
@@ -1,8 +1,11 @@
-import numpy as np
 import sys
+
+import numpy as np
+
 sys.path.append("tests/python")
 # Don't import the test class, otherwise they will run twice.
 import test_interaction_constraints as test_ic  # noqa
+
 rng = np.random.RandomState(1994)


--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@@ -1,15 +1,10 @@
-import sys
-
 import pytest
 from hypothesis import assume, given, note, settings, strategies

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

-sys.path.append("tests/python")
-import testing as tm
-
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)

 parameter_strategy = strategies.fixed_dictionaries({
    'booster': strategies.just('gblinear'),
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@@ -3,20 +3,17 @@ import json
 import os
 import pickle
 import subprocess
-import sys

 import numpy as np
 import pytest

 import xgboost as xgb
-from xgboost import XGBClassifier, testing
-
-sys.path.append("tests/python")
-import testing as tm
+from xgboost import XGBClassifier
+from xgboost import testing as tm

 model_path = './model.pkl'

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)


 def build_dataset():
--- a/tests/python-gpu/test_gpu_plotting.py
+++ b/tests/python-gpu/test_gpu_plotting.py
@@ -1,10 +1,11 @@
 import sys
+
 import pytest

-sys.path.append("tests/python")
-import testing as tm
-import test_plotting as tp
+from xgboost import testing as tm

+sys.path.append("tests/python")
+import test_plotting as tp

 pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))

--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -6,7 +6,7 @@ from hypothesis import assume, given, settings, strategies
 from xgboost.compat import PANDAS_INSTALLED

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 if PANDAS_INSTALLED:
    from hypothesis.extra.pandas import column, data_frames, range_indexes
@@ -16,7 +16,6 @@ else:
    column, data_frames, range_indexes = noop, noop, noop

 sys.path.append("tests/python")
-import testing as tm
 from test_predict import run_predict_leaf  # noqa
 from test_predict import run_threaded_predict  # noqa

@@ -33,7 +32,7 @@ predict_parameter_strategy = strategies.fixed_dictionaries({
    'num_parallel_tree': strategies.sampled_from([1, 4]),
 })

-pytestmark = testing.timeout(20)
+pytestmark = tm.timeout(20)


 class TestGPUPredict:
@@ -227,8 +226,8 @@ class TestGPUPredict:
    @pytest.mark.skipif(**tm.no_cupy())
    @pytest.mark.skipif(**tm.no_cudf())
    def test_inplace_predict_cudf(self):
-        import cupy as cp
        import cudf
+        import cupy as cp
        import pandas as pd
        rows = 1000
        cols = 10
@@ -379,8 +378,8 @@ class TestGPUPredict:
    @pytest.mark.skipif(**tm.no_cupy())
    @pytest.mark.parametrize("n_classes", [2, 3])
    def test_predict_dart(self, n_classes):
-        from sklearn.datasets import make_classification
        import cupy as cp
+        from sklearn.datasets import make_classification
        n_samples = 1000
        X_, y_ = make_classification(
            n_samples=n_samples, n_informative=5, n_classes=n_classes
--- a/tests/python-gpu/test_gpu_ranking.py
+++ b/tests/python-gpu/test_gpu_ranking.py
@@ -1,20 +1,15 @@
 import itertools
 import os
 import shutil
-import sys
 import urllib.request
 import zipfile

 import numpy as np

 import xgboost
-from xgboost import testing
+from xgboost import testing as tm

-sys.path.append("tests/python")
-
-import testing as tm  # noqa
-
-pytestmark = testing.timeout(10)
+pytestmark = tm.timeout(10)


 class TestRanking:
@@ -24,8 +19,9 @@ class TestRanking:
        Download and setup the test fixtures
        """
        from sklearn.datasets import load_svmlight_files
+
        # download the test data
-        cls.dpath = os.path.join(tm.PROJECT_ROOT, "demo/rank/")
+        cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
        src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
        target = os.path.join(cls.dpath, "MQ2008.zip")

--- a/tests/python-gpu/test_gpu_spark/test_data.py
+++ b/tests/python-gpu/test_gpu_spark/test_data.py
@@ -1,13 +1,8 @@
 import sys
-from typing import List

-import numpy as np
-import pandas as pd
 import pytest

-sys.path.append("tests/python")
-
-import testing as tm
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
@@ -15,6 +10,7 @@ if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)


+sys.path.append("tests/python")
 from test_spark.test_data import run_dmatrix_ctor


--- a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
+++ b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
@@ -6,8 +6,7 @@ import sys
 import pytest
 import sklearn

-sys.path.append("tests/python")
-import testing as tm
+from xgboost import testing as tm

 if tm.no_spark()["condition"]:
    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
--- a/tests/python-gpu/test_gpu_training_continuation.py
+++ b/tests/python-gpu/test_gpu_training_continuation.py
@@ -1,7 +1,9 @@
-import numpy as np
-import xgboost as xgb
 import json

+import numpy as np
+
+import xgboost as xgb
+
 rng = np.random.RandomState(1994)


--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -6,13 +6,12 @@ import pytest
 from hypothesis import assume, given, note, settings, strategies

 import xgboost as xgb
-from xgboost import testing
+from xgboost import testing as tm

 sys.path.append("tests/python")
 import test_updaters as test_up
-import testing as tm

-pytestmark = testing.timeout(30)
+pytestmark = tm.timeout(30)

 parameter_strategy = strategies.fixed_dictionaries({
    'max_depth': strategies.integers(0, 11),
--- a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
@@ -1,52 +1,54 @@
 """Copyright 2019-2022 XGBoost contributors"""
-import sys
-import os
-from typing import Type, TypeVar, Any, Dict, List, Union
-import pytest
-import numpy as np
 import asyncio
-import xgboost
+import os
 import subprocess
+import sys
 from collections import OrderedDict
 from inspect import signature
-from hypothesis import given, strategies, settings, note
+from typing import Any, Dict, Type, TypeVar, Union
+
+import numpy as np
+import pytest
+from hypothesis import given, note, settings, strategies
 from hypothesis._settings import duration
 from test_gpu_updaters import parameter_strategy

+import xgboost
+from xgboost import testing as tm
+
 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)

 sys.path.append("tests/python")
-import testing as tm  # noqa

 if tm.no_dask_cuda()["condition"]:
    pytest.skip(tm.no_dask_cuda()["reason"], allow_module_level=True)


-from test_with_dask import run_empty_dmatrix_reg  # noqa
-from test_with_dask import run_empty_dmatrix_auc  # noqa
+from test_with_dask import _get_client_workers  # noqa
+from test_with_dask import generate_array  # noqa
+from test_with_dask import make_categorical  # noqa
 from test_with_dask import run_auc  # noqa
 from test_with_dask import run_boost_from_prediction  # noqa
 from test_with_dask import run_boost_from_prediction_multi_class  # noqa
-from test_with_dask import run_dask_classifier  # noqa
-from test_with_dask import run_empty_dmatrix_cls  # noqa
-from test_with_dask import _get_client_workers  # noqa
-from test_with_dask import generate_array  # noqa
-from test_with_dask import kCols as random_cols  # noqa
-from test_with_dask import suppress  # noqa
-from test_with_dask import run_tree_stats  # noqa
 from test_with_dask import run_categorical  # noqa
-from test_with_dask import make_categorical  # noqa
-
+from test_with_dask import run_dask_classifier  # noqa
+from test_with_dask import run_empty_dmatrix_auc  # noqa
+from test_with_dask import run_empty_dmatrix_cls  # noqa
+from test_with_dask import run_empty_dmatrix_reg  # noqa
+from test_with_dask import run_tree_stats  # noqa
+from test_with_dask import suppress  # noqa
+from test_with_dask import kCols as random_cols  # noqa

 try:
-    import dask.dataframe as dd
-    from xgboost import dask as dxgb
-    import xgboost as xgb
-    from dask.distributed import Client
-    from dask import array as da
-    from dask_cuda import LocalCUDACluster, utils
    import cudf
+    import dask.dataframe as dd
+    from dask import array as da
+    from dask.distributed import Client
+    from dask_cuda import LocalCUDACluster, utils
+
+    import xgboost as xgb
+    from xgboost import dask as dxgb
 except ImportError:
    pass

@@ -334,9 +336,9 @@ class TestDistributedGPU:

    @pytest.mark.skipif(**tm.no_dask_cudf())
    def test_empty_partition(self, local_cuda_client: Client) -> None:
-        import dask_cudf
        import cudf
        import cupy
+        import dask_cudf

        mult = 100
        df = cudf.DataFrame(
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -1,13 +1,15 @@
 import json
-import xgboost as xgb
-import pytest
-import tempfile
-import sys
-import numpy as np
 import os
+import sys
+import tempfile
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm

 sys.path.append("tests/python")
-import testing as tm               # noqa
 import test_with_sklearn as twskl  # noqa

 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@@ -38,9 +40,9 @@ def test_gpu_binary_classification():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
 def test_boost_from_prediction_gpu_hist():
-    from sklearn.datasets import load_breast_cancer, load_digits
-    import cupy as cp
    import cudf
+    import cupy as cp
+    from sklearn.datasets import load_breast_cancer, load_digits

    tree_method = "gpu_hist"
    X, y = load_breast_cancer(return_X_y=True)
@@ -68,12 +70,12 @@ def test_num_parallel_tree():
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_sklearn())
 def test_categorical():
-    import pandas as pd
    import cudf
    import cupy as cp
+    import pandas as pd
    from sklearn.datasets import load_svmlight_file

-    data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
+    data_dir = tm.data_dir(__file__)
    X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train"))
    clf = xgb.XGBClassifier(
        tree_method="gpu_hist",
@@ -123,9 +125,9 @@ def test_categorical():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
 def test_classififer():
-    from sklearn.datasets import load_digits
-    import cupy as cp
    import cudf
+    import cupy as cp
+    from sklearn.datasets import load_digits

    X, y = load_digits(return_X_y=True)
    y *= 10
--- a/tests/python-gpu/test_large_input.py
+++ b/tests/python-gpu/test_large_input.py
@@ -1,23 +1,23 @@
-import numpy as np
-import xgboost as xgb
-import cupy as cp
-import time
-import pytest
-
-
-# Test for integer overflow or out of memory exceptions
-def test_large_input():
-    available_bytes, _ = cp.cuda.runtime.memGetInfo()
-    # 15 GB
-    required_bytes = 1.5e+10
-    if available_bytes < required_bytes:
-        pytest.skip("Not enough memory on this device")
-    n = 1000
-    m = ((1 << 31) + n - 1) // n
-    assert (np.log2(m * n) > 31)
-    X = cp.ones((m, n), dtype=np.float32)
-    y = cp.ones(m)
-    dmat = xgb.DeviceQuantileDMatrix(X, y)
-    booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
-    del y
-    booster.inplace_predict(X)
+import cupy as cp
+import numpy as np
+import pytest
+
+import xgboost as xgb
+
+
+# Test for integer overflow or out of memory exceptions
+def test_large_input():
+    available_bytes, _ = cp.cuda.runtime.memGetInfo()
+    # 15 GB
+    required_bytes = 1.5e+10
+    if available_bytes < required_bytes:
+        pytest.skip("Not enough memory on this device")
+    n = 1000
+    m = ((1 << 31) + n - 1) // n
+    assert (np.log2(m * n) > 31)
+    X = cp.ones((m, n), dtype=np.float32)
+    y = cp.ones(m)
+    dmat = xgb.DeviceQuantileDMatrix(X, y)
+    booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
+    del y
+    booster.inplace_predict(X)
--- a/tests/python-gpu/test_monotonic_constraints.py
+++ b/tests/python-gpu/test_monotonic_constraints.py
@@ -1,11 +1,12 @@
 import sys
-import numpy as np

+import numpy as np
 import pytest

 import xgboost as xgb
+from xgboost import testing as tm
+
 sys.path.append("tests/python")
-import testing as tm
 import test_monotone_constraints as tmc

 rng = np.random.RandomState(1994)