Move Python testing utilities into xgboost module. (#8379)

- Add typehints.
- Fixes for pylint.

Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan
2022-10-26 16:56:11 +08:00
committed by GitHub
parent 7e53189e7c
commit cf70864fa3
66 changed files with 652 additions and 595 deletions

View File

@@ -1,9 +1,7 @@
import sys
import pytest
import logging
sys.path.append("tests/python")
import testing as tm # noqa
from xgboost import testing as tm # noqa
def has_rmm():
try:
@@ -34,8 +32,8 @@ def local_cuda_client(request, pytestconfig):
kwargs['rmm_pool_size'] = '2GB'
if tm.no_dask_cuda()['condition']:
raise ImportError('The local_cuda_cluster fixture requires dask_cuda package')
from dask_cuda import LocalCUDACluster
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
yield Client(LocalCUDACluster(**kwargs))
def pytest_addoption(parser):

View File

@@ -1,16 +1,14 @@
'''Loading a pickled model generated by test_pickling.py, only used by
`test_gpu_with_dask.py`'''
import os
import numpy as np
import xgboost as xgb
import json
import os
import numpy as np
import pytest
import sys
from test_gpu_pickling import build_dataset, load_pickle, model_path
from test_gpu_pickling import build_dataset, model_path, load_pickle
sys.path.append("tests/python")
import testing as tm
import xgboost as xgb
from xgboost import testing as tm
class TestLoadPickle:

View File

@@ -5,10 +5,10 @@ import pytest
from hypothesis import given, settings, strategies
import xgboost as xgb
from xgboost import testing as tm
sys.path.append("tests/python")
import test_quantile_dmatrix as tqd
import testing as tm
class TestDeviceQuantileDMatrix:

View File

@@ -2,11 +2,12 @@ import json
import sys
import numpy as np
import xgboost as xgb
import pytest
import xgboost as xgb
from xgboost import testing as tm
sys.path.append("tests/python")
import testing as tm
from test_dmatrix import set_base_margin_info
@@ -85,8 +86,8 @@ def _test_from_cudf(DMatrixT):
def _test_cudf_training(DMatrixT):
from cudf import DataFrame as df
import pandas as pd
from cudf import DataFrame as df
np.random.seed(1)
X = pd.DataFrame(np.random.randn(50, 10))
y = pd.DataFrame(np.random.randn(50))
@@ -109,8 +110,8 @@ def _test_cudf_training(DMatrixT):
def _test_cudf_metainfo(DMatrixT):
from cudf import DataFrame as df
import pandas as pd
from cudf import DataFrame as df
n = 100
X = np.random.random((n, 2))
dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))
@@ -247,9 +248,9 @@ Arrow specification.'''
@pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.skipif(**tm.no_pandas())
def test_cudf_training_with_sklearn():
import pandas as pd
from cudf import DataFrame as df
from cudf import Series as ss
import pandas as pd
np.random.seed(1)
X = pd.DataFrame(np.random.randn(50, 10))
y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))

View File

@@ -1,12 +1,15 @@
import numpy as np
import xgboost as xgb
import sys
import numpy as np
import pytest
import xgboost as xgb
sys.path.append("tests/python")
import testing as tm
from test_dmatrix import set_base_margin_info
from xgboost import testing as tm
def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
'''Test constructing DMatrix from cupy'''

View File

@@ -1,13 +1,18 @@
import sys
import os
import sys
import numpy as np
import xgboost as xgb
import pytest
import xgboost as xgb
from xgboost import testing as tm
sys.path.append("tests/python")
import test_basic_models as test_bm
# Don't import the test class, otherwise they will run twice.
import test_callback as test_cb # noqa
import test_basic_models as test_bm
import testing as tm
rng = np.random.RandomState(1994)

View File

@@ -1,13 +1,12 @@
import numpy as np
import xgboost as xgb
from hypothesis import given, strategies, settings
import pytest
import sys
import pytest
from hypothesis import given, settings, strategies
from xgboost.testing import no_cupy
sys.path.append("tests/python")
from test_data_iterator import test_single_batch as cpu_single_batch
from test_data_iterator import run_data_iterator
from testing import no_cupy
from test_data_iterator import test_single_batch as cpu_single_batch
def test_gpu_single_batch() -> None:
@@ -24,7 +23,11 @@ def test_gpu_single_batch() -> None:
)
@settings(deadline=None, max_examples=10, print_blob=True)
def test_gpu_data_iterator(
n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool
n_samples_per_batch: int,
n_features: int,
n_batches: int,
subsample: bool,
use_cupy: bool,
) -> None:
run_data_iterator(
n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy

View File

@@ -1,10 +1,13 @@
import os
import subprocess
import sys
import pytest
from xgboost import testing as tm
sys.path.append("tests/python")
import testing as tm
import test_demos as td # noqa
import test_demos as td # noqa
@pytest.mark.skipif(**tm.no_cupy())
@@ -31,6 +34,6 @@ def test_categorical_demo():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_dask_training():
script = os.path.join(tm.PROJECT_ROOT, 'demo', 'dask', 'gpu_training.py')
script = os.path.join(tm.demo_dir(__file__), 'dask', 'gpu_training.py')
cmd = ['python', script]
subprocess.check_call(cmd)
subprocess.check_call(cmd)

View File

@@ -1,7 +1,9 @@
import sys
import xgboost
import pytest
import xgboost
sys.path.append("tests/python")
import test_eval_metrics as test_em # noqa

View File

@@ -1,8 +1,11 @@
import numpy as np
import sys
import numpy as np
sys.path.append("tests/python")
# Don't import the test class, otherwise they will run twice.
import test_interaction_constraints as test_ic # noqa
rng = np.random.RandomState(1994)

View File

@@ -1,15 +1,10 @@
import sys
import pytest
from hypothesis import assume, given, note, settings, strategies
import xgboost as xgb
from xgboost import testing
from xgboost import testing as tm
sys.path.append("tests/python")
import testing as tm
pytestmark = testing.timeout(10)
pytestmark = tm.timeout(10)
parameter_strategy = strategies.fixed_dictionaries({
'booster': strategies.just('gblinear'),

View File

@@ -3,20 +3,17 @@ import json
import os
import pickle
import subprocess
import sys
import numpy as np
import pytest
import xgboost as xgb
from xgboost import XGBClassifier, testing
sys.path.append("tests/python")
import testing as tm
from xgboost import XGBClassifier
from xgboost import testing as tm
model_path = './model.pkl'
pytestmark = testing.timeout(30)
pytestmark = tm.timeout(30)
def build_dataset():

View File

@@ -1,10 +1,11 @@
import sys
import pytest
sys.path.append("tests/python")
import testing as tm
import test_plotting as tp
from xgboost import testing as tm
sys.path.append("tests/python")
import test_plotting as tp
pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))

View File

@@ -6,7 +6,7 @@ from hypothesis import assume, given, settings, strategies
from xgboost.compat import PANDAS_INSTALLED
import xgboost as xgb
from xgboost import testing
from xgboost import testing as tm
if PANDAS_INSTALLED:
from hypothesis.extra.pandas import column, data_frames, range_indexes
@@ -16,7 +16,6 @@ else:
column, data_frames, range_indexes = noop, noop, noop
sys.path.append("tests/python")
import testing as tm
from test_predict import run_predict_leaf # noqa
from test_predict import run_threaded_predict # noqa
@@ -33,7 +32,7 @@ predict_parameter_strategy = strategies.fixed_dictionaries({
'num_parallel_tree': strategies.sampled_from([1, 4]),
})
pytestmark = testing.timeout(20)
pytestmark = tm.timeout(20)
class TestGPUPredict:
@@ -227,8 +226,8 @@ class TestGPUPredict:
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
def test_inplace_predict_cudf(self):
import cupy as cp
import cudf
import cupy as cp
import pandas as pd
rows = 1000
cols = 10
@@ -379,8 +378,8 @@ class TestGPUPredict:
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.parametrize("n_classes", [2, 3])
def test_predict_dart(self, n_classes):
from sklearn.datasets import make_classification
import cupy as cp
from sklearn.datasets import make_classification
n_samples = 1000
X_, y_ = make_classification(
n_samples=n_samples, n_informative=5, n_classes=n_classes

View File

@@ -1,20 +1,15 @@
import itertools
import os
import shutil
import sys
import urllib.request
import zipfile
import numpy as np
import xgboost
from xgboost import testing
from xgboost import testing as tm
sys.path.append("tests/python")
import testing as tm # noqa
pytestmark = testing.timeout(10)
pytestmark = tm.timeout(10)
class TestRanking:
@@ -24,8 +19,9 @@ class TestRanking:
Download and setup the test fixtures
"""
from sklearn.datasets import load_svmlight_files
# download the test data
cls.dpath = os.path.join(tm.PROJECT_ROOT, "demo/rank/")
cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
target = os.path.join(cls.dpath, "MQ2008.zip")

View File

@@ -1,13 +1,8 @@
import sys
from typing import List
import numpy as np
import pandas as pd
import pytest
sys.path.append("tests/python")
import testing as tm
from xgboost import testing as tm
if tm.no_spark()["condition"]:
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
@@ -15,6 +10,7 @@ if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
sys.path.append("tests/python")
from test_spark.test_data import run_dmatrix_ctor

View File

@@ -6,8 +6,7 @@ import sys
import pytest
import sklearn
sys.path.append("tests/python")
import testing as tm
from xgboost import testing as tm
if tm.no_spark()["condition"]:
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)

View File

@@ -1,7 +1,9 @@
import numpy as np
import xgboost as xgb
import json
import numpy as np
import xgboost as xgb
rng = np.random.RandomState(1994)

View File

@@ -6,13 +6,12 @@ import pytest
from hypothesis import assume, given, note, settings, strategies
import xgboost as xgb
from xgboost import testing
from xgboost import testing as tm
sys.path.append("tests/python")
import test_updaters as test_up
import testing as tm
pytestmark = testing.timeout(30)
pytestmark = tm.timeout(30)
parameter_strategy = strategies.fixed_dictionaries({
'max_depth': strategies.integers(0, 11),

View File

@@ -1,52 +1,54 @@
"""Copyright 2019-2022 XGBoost contributors"""
import sys
import os
from typing import Type, TypeVar, Any, Dict, List, Union
import pytest
import numpy as np
import asyncio
import xgboost
import os
import subprocess
import sys
from collections import OrderedDict
from inspect import signature
from hypothesis import given, strategies, settings, note
from typing import Any, Dict, Type, TypeVar, Union
import numpy as np
import pytest
from hypothesis import given, note, settings, strategies
from hypothesis._settings import duration
from test_gpu_updaters import parameter_strategy
import xgboost
from xgboost import testing as tm
if sys.platform.startswith("win"):
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
sys.path.append("tests/python")
import testing as tm # noqa
if tm.no_dask_cuda()["condition"]:
pytest.skip(tm.no_dask_cuda()["reason"], allow_module_level=True)
from test_with_dask import run_empty_dmatrix_reg # noqa
from test_with_dask import run_empty_dmatrix_auc # noqa
from test_with_dask import _get_client_workers # noqa
from test_with_dask import generate_array # noqa
from test_with_dask import make_categorical # noqa
from test_with_dask import run_auc # noqa
from test_with_dask import run_boost_from_prediction # noqa
from test_with_dask import run_boost_from_prediction_multi_class # noqa
from test_with_dask import run_dask_classifier # noqa
from test_with_dask import run_empty_dmatrix_cls # noqa
from test_with_dask import _get_client_workers # noqa
from test_with_dask import generate_array # noqa
from test_with_dask import kCols as random_cols # noqa
from test_with_dask import suppress # noqa
from test_with_dask import run_tree_stats # noqa
from test_with_dask import run_categorical # noqa
from test_with_dask import make_categorical # noqa
from test_with_dask import run_dask_classifier # noqa
from test_with_dask import run_empty_dmatrix_auc # noqa
from test_with_dask import run_empty_dmatrix_cls # noqa
from test_with_dask import run_empty_dmatrix_reg # noqa
from test_with_dask import run_tree_stats # noqa
from test_with_dask import suppress # noqa
from test_with_dask import kCols as random_cols # noqa
try:
import dask.dataframe as dd
from xgboost import dask as dxgb
import xgboost as xgb
from dask.distributed import Client
from dask import array as da
from dask_cuda import LocalCUDACluster, utils
import cudf
import dask.dataframe as dd
from dask import array as da
from dask.distributed import Client
from dask_cuda import LocalCUDACluster, utils
import xgboost as xgb
from xgboost import dask as dxgb
except ImportError:
pass
@@ -334,9 +336,9 @@ class TestDistributedGPU:
@pytest.mark.skipif(**tm.no_dask_cudf())
def test_empty_partition(self, local_cuda_client: Client) -> None:
import dask_cudf
import cudf
import cupy
import dask_cudf
mult = 100
df = cudf.DataFrame(

View File

@@ -1,13 +1,15 @@
import json
import xgboost as xgb
import pytest
import tempfile
import sys
import numpy as np
import os
import sys
import tempfile
import numpy as np
import pytest
import xgboost as xgb
from xgboost import testing as tm
sys.path.append("tests/python")
import testing as tm # noqa
import test_with_sklearn as twskl # noqa
pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@@ -38,9 +40,9 @@ def test_gpu_binary_classification():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
def test_boost_from_prediction_gpu_hist():
from sklearn.datasets import load_breast_cancer, load_digits
import cupy as cp
import cudf
import cupy as cp
from sklearn.datasets import load_breast_cancer, load_digits
tree_method = "gpu_hist"
X, y = load_breast_cancer(return_X_y=True)
@@ -68,12 +70,12 @@ def test_num_parallel_tree():
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_sklearn())
def test_categorical():
import pandas as pd
import cudf
import cupy as cp
import pandas as pd
from sklearn.datasets import load_svmlight_file
data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
data_dir = tm.data_dir(__file__)
X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train"))
clf = xgb.XGBClassifier(
tree_method="gpu_hist",
@@ -123,9 +125,9 @@ def test_categorical():
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
def test_classififer():
from sklearn.datasets import load_digits
import cupy as cp
import cudf
import cupy as cp
from sklearn.datasets import load_digits
X, y = load_digits(return_X_y=True)
y *= 10

View File

@@ -1,23 +1,23 @@
import numpy as np
import xgboost as xgb
import cupy as cp
import time
import pytest
# Test for integer overflow or out of memory exceptions
def test_large_input():
available_bytes, _ = cp.cuda.runtime.memGetInfo()
# 15 GB
required_bytes = 1.5e+10
if available_bytes < required_bytes:
pytest.skip("Not enough memory on this device")
n = 1000
m = ((1 << 31) + n - 1) // n
assert (np.log2(m * n) > 31)
X = cp.ones((m, n), dtype=np.float32)
y = cp.ones(m)
dmat = xgb.DeviceQuantileDMatrix(X, y)
booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
del y
booster.inplace_predict(X)
import cupy as cp
import numpy as np
import pytest
import xgboost as xgb
# Test for integer overflow or out of memory exceptions
def test_large_input():
available_bytes, _ = cp.cuda.runtime.memGetInfo()
# 15 GB
required_bytes = 1.5e+10
if available_bytes < required_bytes:
pytest.skip("Not enough memory on this device")
n = 1000
m = ((1 << 31) + n - 1) // n
assert (np.log2(m * n) > 31)
X = cp.ones((m, n), dtype=np.float32)
y = cp.ones(m)
dmat = xgb.DeviceQuantileDMatrix(X, y)
booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
del y
booster.inplace_predict(X)

View File

@@ -1,11 +1,12 @@
import sys
import numpy as np
import numpy as np
import pytest
import xgboost as xgb
from xgboost import testing as tm
sys.path.append("tests/python")
import testing as tm
import test_monotone_constraints as tmc
rng = np.random.RandomState(1994)