diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 5a873ab88..751b5a1c4 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -65,7 +65,7 @@ def _check_rf_callback( ) -_SklObjective = Optional[ +SklObjective = Optional[ Union[str, Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]] ] @@ -144,7 +144,7 @@ __model_doc = f""" Boosting learning rate (xgb's "eta") verbosity : Optional[int] The degree of verbosity. Valid values are 0 (silent) - 3 (debug). - objective : {_SklObjective} + objective : {SklObjective} Specify the learning task and the corresponding learning objective or a custom objective function to be used (see note below). booster: Optional[str] @@ -546,7 +546,7 @@ class XGBModel(XGBModelBase): learning_rate: Optional[float] = None, n_estimators: int = 100, verbosity: Optional[int] = None, - objective: _SklObjective = None, + objective: SklObjective = None, booster: Optional[str] = None, tree_method: Optional[str] = None, n_jobs: Optional[int] = None, @@ -1409,7 +1409,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): def __init__( self, *, - objective: _SklObjective = "binary:logistic", + objective: SklObjective = "binary:logistic", use_label_encoder: Optional[bool] = None, **kwargs: Any, ) -> None: @@ -1712,7 +1712,7 @@ class XGBRegressor(XGBModel, XGBRegressorBase): # pylint: disable=missing-docstring @_deprecate_positional_args def __init__( - self, *, objective: _SklObjective = "reg:squarederror", **kwargs: Any + self, *, objective: SklObjective = "reg:squarederror", **kwargs: Any ) -> None: super().__init__(objective=objective, **kwargs) diff --git a/python-package/xgboost/testing.py b/python-package/xgboost/testing.py deleted file mode 100644 index 598ddac5b..000000000 --- a/python-package/xgboost/testing.py +++ /dev/null @@ -1,64 +0,0 @@ -"""Utilities for defining Python tests.""" - -import socket -from platform import system -from typing import Any, TypedDict - -PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str}) - - -def has_ipv6() -> bool: - """Check whether IPv6 is enabled on this host.""" - # connection error in macos, still need some fixes. - if system() not in ("Linux", "Windows"): - return False - - if socket.has_ipv6: - try: - with socket.socket( - socket.AF_INET6, socket.SOCK_STREAM - ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client: - server.bind(("::1", 0)) - port = server.getsockname()[1] - server.listen() - - client.connect(("::1", port)) - conn, _ = server.accept() - - client.sendall("abc".encode()) - msg = conn.recv(3).decode() - # if the code can be executed to this point, the message should be - # correct. - assert msg == "abc" - return True - except OSError: - pass - return False - - -def skip_ipv6() -> PytestSkip: - """PyTest skip mark for IPv6.""" - return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."} - - -def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any: - """Make a pytest mark for the `pytest-timeout` package. - - Parameters - ---------- - sec : - Timeout seconds. - enable : - Control whether timeout should be applied, used for debugging. - - Returns - ------- - pytest.mark.timeout - """ - import pytest # pylint: disable=import-error - - # This is disabled for now due to regression caused by conflicts between federated - # learning build and the CI container environment. - if enable: - return pytest.mark.timeout(sec, *args, **kwargs) - return pytest.mark.timeout(None, *args, **kwargs) diff --git a/tests/python/testing.py b/python-package/xgboost/testing/__init__.py similarity index 64% rename from tests/python/testing.py rename to python-package/xgboost/testing/__init__.py index 63d33de97..99ff72423 100644 --- a/tests/python/testing.py +++ b/python-package/xgboost/testing/__init__.py @@ -1,192 +1,190 @@ -from concurrent.futures import ThreadPoolExecutor -import os +"""Utilities for defining Python tests. The module is private and subject to frequent +change without notice. + +""" +# pylint: disable=invalid-name,missing-function-docstring,import-error +import gc +import importlib.util import multiprocessing -from typing import Tuple, Union, List, Sequence, Callable +import os +import platform +import socket +import sys import urllib import zipfile -import sys -from typing import Optional, Dict, Any +from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager from io import StringIO -from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED -import pytest -import gc -import xgboost as xgb -from xgboost.core import ArrayLike -import numpy as np -from scipy import sparse -import platform +from platform import system +from typing import ( + Any, + Callable, + Dict, + Generator, + List, + Optional, + Sequence, + Set, + Tuple, + TypedDict, + Union, +) -hypothesis = pytest.importorskip('hypothesis') -sklearn = pytest.importorskip('sklearn') +import numpy as np +import pytest +from scipy import sparse +from xgboost.core import ArrayLike +from xgboost.sklearn import SklObjective + +import xgboost as xgb + +hypothesis = pytest.importorskip("hypothesis") + +# pylint:disable=wrong-import-position,wrong-import-order from hypothesis import strategies from hypothesis.extra.numpy import arrays -from joblib import Memory -from sklearn import datasets -try: - import cupy as cp -except ImportError: - cp = None +joblib = pytest.importorskip("joblib") +datasets = pytest.importorskip("sklearn.datasets") -memory = Memory('./cachedir', verbose=0) +Memory = joblib.Memory + +memory = Memory("./cachedir", verbose=0) + +PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str}) -def no_ubjson(): - reason = "ubjson is not intsalled." - try: - import ubjson # noqa - return {"condition": False, "reason": reason} - except ImportError: - return {"condition": True, "reason": reason} +def has_ipv6() -> bool: + """Check whether IPv6 is enabled on this host.""" + # connection error in macos, still need some fixes. + if system() not in ("Linux", "Windows"): + return False + + if socket.has_ipv6: + try: + with socket.socket( + socket.AF_INET6, socket.SOCK_STREAM + ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client: + server.bind(("::1", 0)) + port = server.getsockname()[1] + server.listen() + + client.connect(("::1", port)) + conn, _ = server.accept() + + client.sendall("abc".encode()) + msg = conn.recv(3).decode() + # if the code can be executed to this point, the message should be + # correct. + assert msg == "abc" + return True + except OSError: + pass + return False -def no_sklearn(): - return {'condition': not SKLEARN_INSTALLED, - 'reason': 'Scikit-Learn is not installed'} +def no_mod(name: str) -> PytestSkip: + spec = importlib.util.find_spec(name) + return {"condition": spec is None, "reason": f"{name} is not installed."} -def no_dask(): - try: - import pkg_resources - - pkg_resources.get_distribution("dask") - DASK_INSTALLED = True - except pkg_resources.DistributionNotFound: - DASK_INSTALLED = False - return {"condition": not DASK_INSTALLED, "reason": "Dask is not installed"} +def no_ipv6() -> PytestSkip: + """PyTest skip mark for IPv6.""" + return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."} -def no_spark(): - try: - import pyspark # noqa - SPARK_INSTALLED = True - except ImportError: - SPARK_INSTALLED = False - return {"condition": not SPARK_INSTALLED, "reason": "Spark is not installed"} +def no_ubjson() -> PytestSkip: + return no_mod("ubjson") -def no_pandas(): - return {'condition': not PANDAS_INSTALLED, - 'reason': 'Pandas is not installed.'} +def no_sklearn() -> PytestSkip: + return no_mod("sklearn") -def no_arrow(): - reason = "pyarrow is not installed" - try: - import pyarrow # noqa - return {"condition": False, "reason": reason} - except ImportError: - return {"condition": True, "reason": reason} +def no_dask() -> PytestSkip: + return no_mod("dask") -def no_modin(): - reason = 'Modin is not installed.' - try: - import modin.pandas as _ # noqa - return {'condition': False, 'reason': reason} - except ImportError: - return {'condition': True, 'reason': reason} +def no_spark() -> PytestSkip: + return no_mod("pyspark") -def no_dt(): - import importlib.util - spec = importlib.util.find_spec('datatable') - return {'condition': spec is None, - 'reason': 'Datatable is not installed.'} +def no_pandas() -> PytestSkip: + return no_mod("pandas") -def no_matplotlib(): - reason = 'Matplotlib is not installed.' +def no_arrow() -> PytestSkip: + return no_mod("pyarrow") + + +def no_modin() -> PytestSkip: + return no_mod("modin") + + +def no_dt() -> PytestSkip: + return no_mod("datatable") + + +def no_matplotlib() -> PytestSkip: + reason = "Matplotlib is not installed." try: import matplotlib.pyplot as _ # noqa - return {'condition': False, - 'reason': reason} + + return {"condition": False, "reason": reason} except ImportError: - return {'condition': True, - 'reason': reason} + return {"condition": True, "reason": reason} -def no_dask_cuda(): - reason = 'dask_cuda is not installed.' - try: - import dask_cuda as _ # noqa - return {'condition': False, 'reason': reason} - except ImportError: - return {'condition': True, 'reason': reason} +def no_dask_cuda() -> PytestSkip: + return no_mod("dask_cuda") -def no_cudf(): - try: - import cudf # noqa - CUDF_INSTALLED = True - except ImportError: - CUDF_INSTALLED = False - - return {'condition': not CUDF_INSTALLED, - 'reason': 'CUDF is not installed'} +def no_cudf() -> PytestSkip: + return no_mod("cudf") -def no_cupy(): - reason = 'cupy is not installed.' - try: - import cupy as _ # noqa - return {'condition': False, 'reason': reason} - except ImportError: - return {'condition': True, 'reason': reason} +def no_cupy() -> PytestSkip: + return no_mod("cupy") -def no_dask_cudf(): - reason = 'dask_cudf is not installed.' - try: - import dask_cudf as _ # noqa - return {'condition': False, 'reason': reason} - except ImportError: - return {'condition': True, 'reason': reason} +def no_dask_cudf() -> PytestSkip: + return no_mod("dask_cudf") -def no_json_schema(): - reason = 'jsonschema is not installed' - try: - import jsonschema # noqa - return {'condition': False, 'reason': reason} - except ImportError: - return {'condition': True, 'reason': reason} +def no_json_schema() -> PytestSkip: + return no_mod("jsonschema") -def no_graphviz(): - reason = 'graphviz is not installed' - try: - import graphviz # noqa - return {'condition': False, 'reason': reason} - except ImportError: - return {'condition': True, 'reason': reason} +def no_graphviz() -> PytestSkip: + return no_mod("graphviz") -def no_multiple(*args): +def no_multiple(*args: Any) -> PytestSkip: condition = False - reason = '' + reason = "" for arg in args: - condition = (condition or arg['condition']) - if arg['condition']: - reason = arg['reason'] + condition = condition or arg["condition"] + if arg["condition"]: + reason = arg["reason"] break - return {'condition': condition, 'reason': reason} + return {"condition": condition, "reason": reason} -def skip_s390x(): +def skip_s390x() -> PytestSkip: condition = platform.machine() == "s390x" reason = "Known to fail on s390x" return {"condition": condition, "reason": reason} class IteratorForTest(xgb.core.DataIter): + """Iterator for testing streaming DMatrix. (external memory, quantile)""" + def __init__( self, X: Sequence, y: Sequence, w: Optional[Sequence], - cache: Optional[str] = "./" + cache: Optional[str] = "./", ) -> None: assert len(X) == len(y) self.X = X @@ -242,7 +240,7 @@ def make_batches( rng = cupy.random.RandomState(1994) else: rng = np.random.RandomState(1994) - for i in range(n_batches): + for _ in range(n_batches): _X = rng.randn(n_samples_per_batch, n_features) _y = rng.randn(n_samples_per_batch) _w = rng.uniform(low=0, high=1, size=n_samples_per_batch) @@ -259,7 +257,7 @@ def make_batches_sparse( y = [] w = [] rng = np.random.RandomState(1994) - for i in range(n_batches): + for _ in range(n_batches): _X = sparse.random( n_samples_per_batch, n_features, @@ -276,8 +274,9 @@ def make_batches_sparse( return X, y, w -# Contains a dataset in numpy format as well as the relevant objective and metric class TestDataset: + """Contains a dataset in numpy format as well as the relevant objective and metric.""" + def __init__( self, name: str, get_dataset: Callable, objective: str, metric: str ) -> None: @@ -289,18 +288,24 @@ class TestDataset: self.margin: Optional[np.ndarray] = None def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]: - params_in['objective'] = self.objective - params_in['eval_metric'] = self.metric + params_in["objective"] = self.objective + params_in["eval_metric"] = self.metric if self.objective == "multi:softmax": params_in["num_class"] = int(np.max(self.y) + 1) return params_in def get_dmat(self) -> xgb.DMatrix: return xgb.DMatrix( - self.X, self.y, self.w, base_margin=self.margin, enable_categorical=True + self.X, + self.y, + weight=self.w, + base_margin=self.margin, + enable_categorical=True, ) def get_device_dmat(self) -> xgb.DeviceQuantileDMatrix: + import cupy as cp + w = None if self.w is None else cp.array(self.w) X = cp.array(self.X, dtype=np.float32) y = cp.array(self.y, dtype=np.float32) @@ -318,9 +323,9 @@ class TestDataset: beg = i * per_batch end = min((i + 1) * per_batch, n_samples) assert end != beg - X = self.X[beg: end, ...] - y = self.y[beg: end] - w = self.w[beg: end] if self.w is not None else None + X = self.X[beg:end, ...] + y = self.y[beg:end] + w = self.w[beg:end] if self.w is not None else None predictor.append(X) response.append(y) if w is not None: @@ -334,25 +339,24 @@ class TestDataset: @memory.cache -def get_california_housing(): +def get_california_housing() -> Tuple[np.ndarray, np.ndarray]: data = datasets.fetch_california_housing() return data.data, data.target @memory.cache -def get_digits(): +def get_digits() -> Tuple[np.ndarray, np.ndarray]: data = datasets.load_digits() return data.data, data.target @memory.cache -def get_cancer(): - data = datasets.load_breast_cancer() - return data.data, data.target +def get_cancer() -> Tuple[np.ndarray, np.ndarray]: + return datasets.load_breast_cancer(return_X_y=True) @memory.cache -def get_sparse(): +def get_sparse() -> Tuple[np.ndarray, np.ndarray]: rng = np.random.RandomState(199) n = 2000 sparsity = 0.75 @@ -366,7 +370,7 @@ def get_sparse(): @memory.cache -def get_ames_housing(): +def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]: """ Number of samples: 1460 Number of features: 20 @@ -374,22 +378,23 @@ def get_ames_housing(): Number of numerical features: 10 """ from sklearn.datasets import fetch_openml + X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True) - categorical_columns_subset: list[str] = [ - "BldgType", # 5 cats, no nan - "GarageFinish", # 3 cats, nan - "LotConfig", # 5 cats, no nan - "Functional", # 7 cats, no nan - "MasVnrType", # 4 cats, nan - "HouseStyle", # 8 cats, no nan - "FireplaceQu", # 5 cats, nan - "ExterCond", # 5 cats, no nan - "ExterQual", # 4 cats, no nan - "PoolQC", # 3 cats, nan + categorical_columns_subset: List[str] = [ + "BldgType", # 5 cats, no nan + "GarageFinish", # 3 cats, nan + "LotConfig", # 5 cats, no nan + "Functional", # 7 cats, no nan + "MasVnrType", # 4 cats, nan + "HouseStyle", # 8 cats, no nan + "FireplaceQu", # 5 cats, nan + "ExterCond", # 5 cats, no nan + "ExterQual", # 4 cats, no nan + "PoolQC", # 3 cats, nan ] - numerical_columns_subset: list[str] = [ + numerical_columns_subset: List[str] = [ "3SsnPorch", "Fireplaces", "BsmtHalfBath", @@ -408,32 +413,70 @@ def get_ames_housing(): @memory.cache -def get_mq2008(dpath): +def get_mq2008( + dpath: str, +) -> Tuple[ + sparse.csr_matrix, + np.ndarray, + np.ndarray, + sparse.csr_matrix, + np.ndarray, + np.ndarray, + sparse.csr_matrix, + np.ndarray, + np.ndarray, +]: from sklearn.datasets import load_svmlight_files - src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip' - target = dpath + '/MQ2008.zip' + src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip" + target = dpath + "/MQ2008.zip" if not os.path.exists(target): urllib.request.urlretrieve(url=src, filename=target) - with zipfile.ZipFile(target, 'r') as f: + with zipfile.ZipFile(target, "r") as f: f.extractall(path=dpath) - (x_train, y_train, qid_train, x_test, y_test, qid_test, - x_valid, y_valid, qid_valid) = load_svmlight_files( - (dpath + "MQ2008/Fold1/train.txt", - dpath + "MQ2008/Fold1/test.txt", - dpath + "MQ2008/Fold1/vali.txt"), - query_id=True, zero_based=False) + ( + x_train, + y_train, + qid_train, + x_test, + y_test, + qid_test, + x_valid, + y_valid, + qid_valid, + ) = load_svmlight_files( + ( + dpath + "MQ2008/Fold1/train.txt", + dpath + "MQ2008/Fold1/test.txt", + dpath + "MQ2008/Fold1/vali.txt", + ), + query_id=True, + zero_based=False, + ) - return (x_train, y_train, qid_train, x_test, y_test, qid_test, - x_valid, y_valid, qid_valid) + return ( + x_train, + y_train, + qid_train, + x_test, + y_test, + qid_test, + x_valid, + y_valid, + qid_valid, + ) @memory.cache def make_categorical( - n_samples: int, n_features: int, n_categories: int, onehot: bool, sparsity=0.0, -): + n_samples: int, + n_features: int, + n_categories: int, + onehot: bool, + sparsity: float = 0.0, +) -> Tuple[ArrayLike, np.ndarray]: import pandas as pd rng = np.random.RandomState(1994) @@ -457,7 +500,9 @@ def make_categorical( if sparsity > 0.0: for i in range(n_features): - index = rng.randint(low=0, high=n_samples-1, size=int(n_samples * sparsity)) + index = rng.randint( + low=0, high=n_samples - 1, size=int(n_samples * sparsity) + ) df.iloc[index, i] = np.NaN assert n_categories == np.unique(df.dtypes[i].categories).size @@ -466,9 +511,9 @@ def make_categorical( return df, label -def _cat_sampled_from(): +def _cat_sampled_from() -> strategies.SearchStrategy: @strategies.composite - def _make_cat(draw): + def _make_cat(draw: Callable) -> Tuple[int, int, int, float]: n_samples = draw(strategies.integers(2, 512)) n_features = draw(strategies.integers(1, 4)) n_cats = draw(strategies.integers(1, 128)) @@ -483,7 +528,7 @@ def _cat_sampled_from(): ) return n_samples, n_features, n_cats, sparsity - def _build(args): + def _build(args: Tuple[int, int, int, float]) -> TestDataset: n_samples = args[0] n_features = args[1] n_cats = args[2] @@ -495,12 +540,13 @@ def _cat_sampled_from(): "rmse", ) - return _make_cat().map(_build) + return _make_cat().map(_build) # pylint: disable=no-member -categorical_dataset_strategy = _cat_sampled_from() +categorical_dataset_strategy: strategies.SearchStrategy = _cat_sampled_from() +# pylint: disable=too-many-locals @memory.cache def make_sparse_regression( n_samples: int, n_features: int, sparsity: float, as_dense: bool @@ -530,8 +576,7 @@ def make_sparse_regression( # Use multi-thread to speed up the generation, convenient if you use this function # for benchmarking. - n_threads = multiprocessing.cpu_count() - n_threads = min(n_threads, n_features) + n_threads = min(multiprocessing.cpu_count(), n_features) def random_csc(t_id: int) -> sparse.csc_matrix: rng = np.random.default_rng(1994 * t_id) @@ -653,7 +698,7 @@ _unweighted_datasets_strategy = strategies.sampled_from( @strategies.composite -def _dataset_weight_margin(draw): +def _dataset_weight_margin(draw: Callable) -> TestDataset: data: TestDataset = draw(_unweighted_datasets_strategy) if draw(strategies.booleans()): data.w = draw( @@ -673,6 +718,7 @@ def _dataset_weight_margin(draw): elements=strategies.floats(0.5, 1.0), ) ) + assert data.margin is not None if num_class != 1: data.margin = data.margin.reshape(data.y.shape[0], num_class) @@ -684,24 +730,24 @@ def _dataset_weight_margin(draw): dataset_strategy = _dataset_weight_margin() -def non_increasing(L, tolerance=1e-4): +def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool: return all((y - x) < tolerance for x, y in zip(L, L[1:])) -def eval_error_metric(predt, dtrain: xgb.DMatrix): +def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]: """Evaluation metric for xgb.train""" label = dtrain.get_label() r = np.zeros(predt.shape) gt = predt > 0.5 if predt.size == 0: - return "CustomErr", 0 + return "CustomErr", np.float64(0.0) r[gt] = 1 - label[gt] le = predt <= 0.5 r[le] = label[le] - return 'CustomErr', np.sum(r) + return "CustomErr", np.sum(r) -def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> float: +def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> np.float64: """Evaluation metric that looks like metrics provided by sklearn.""" r = np.zeros(y_score.shape) gt = y_score > 0.5 @@ -717,13 +763,15 @@ def root_mean_square(y_true: np.ndarray, y_score: np.ndarray) -> float: return rmse -def softmax(x): +def softmax(x: np.ndarray) -> np.ndarray: e = np.exp(x) return e / np.sum(e) -def softprob_obj(classes): - def objective(labels, predt): +def softprob_obj(classes: int) -> SklObjective: + def objective( + labels: np.ndarray, predt: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray]: rows = labels.shape[0] grad = np.zeros((rows, classes), dtype=float) hess = np.zeros((rows, classes), dtype=float) @@ -746,29 +794,33 @@ def softprob_obj(classes): class DirectoryExcursion: - def __init__(self, path: os.PathLike, cleanup=False): - '''Change directory. Change back and optionally cleaning up the directory when exit. + """Change directory. Change back and optionally cleaning up the directory when + exit. - ''' + """ + + def __init__(self, path: os.PathLike, cleanup: bool = False): self.path = path self.curdir = os.path.normpath(os.path.abspath(os.path.curdir)) self.cleanup = cleanup - self.files = {} + self.files: Set[str] = set() - def __enter__(self): + def __enter__(self) -> None: os.chdir(self.path) if self.cleanup: self.files = { os.path.join(root, f) - for root, subdir, files in os.walk(self.path) for f in files + for root, subdir, files in os.walk(os.path.expanduser(self.path)) + for f in files } - def __exit__(self, *args): + def __exit__(self, *args: Any) -> None: os.chdir(self.curdir) if self.cleanup: files = { os.path.join(root, f) - for root, subdir, files in os.walk(self.path) for f in files + for root, subdir, files in os.walk(os.path.expanduser(self.path)) + for f in files } diff = files.difference(self.files) for f in diff: @@ -776,7 +828,7 @@ class DirectoryExcursion: @contextmanager -def captured_output(): +def captured_output() -> Generator[Tuple[StringIO, StringIO], None, None]: """Reassign stdout temporarily in order to test printed statements Taken from: https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python @@ -793,14 +845,46 @@ def captured_output(): sys.stdout, sys.stderr = old_out, old_err -try: - # Python 3.7+ - from contextlib import nullcontext as noop_context -except ImportError: - # Python 3.6 - from contextlib import suppress as noop_context +def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any: + """Make a pytest mark for the `pytest-timeout` package. + + Parameters + ---------- + sec : + Timeout seconds. + enable : + Control whether timeout should be applied, used for debugging. + + Returns + ------- + pytest.mark.timeout + """ + + if enable: + return pytest.mark.timeout(sec, *args, **kwargs) + return pytest.mark.timeout(None, *args, **kwargs) -CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) -PROJECT_ROOT = os.path.normpath( - os.path.join(CURDIR, os.path.pardir, os.path.pardir)) +def demo_dir(path: str) -> str: + """Look for the demo directory based on the test file name.""" + path = normpath(os.path.dirname(path)) + while True: + subdirs = [f.path for f in os.scandir(path) if f.is_dir()] + subdirs = [os.path.basename(d) for d in subdirs] + if "demo" in subdirs: + return os.path.join(path, "demo") + new_path = normpath(os.path.join(path, os.path.pardir)) + assert new_path != path + path = new_path + + +def normpath(path: str) -> str: + return os.path.normpath(os.path.abspath(path)) + + +def data_dir(path: str) -> str: + return os.path.join(demo_dir(path), "data") + + +def project_root(path: str) -> str: + return normpath(os.path.join(demo_dir(path), os.path.pardir)) diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 84ad02f46..973d5f072 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -121,12 +121,14 @@ if __name__ == "__main__": "python-package/xgboost/sklearn.py", "python-package/xgboost/spark", "python-package/xgboost/federated.py", - "python-package/xgboost/testing.py", + "python-package/xgboost/testing", # tests "tests/python/test_config.py", + "tests/python/test_data_iterator.py", "tests/python/test_spark/", "tests/python/test_quantile_dmatrix.py", "tests/python-gpu/test_gpu_spark/", + "tests/python-gpu/test_gpu_data_iterator.py", "tests/ci_build/lint_python.py", # demo "demo/guide-python/cat_in_the_dat.py", diff --git a/tests/python-gpu/conftest.py b/tests/python-gpu/conftest.py index 52299e13e..317622a34 100644 --- a/tests/python-gpu/conftest.py +++ b/tests/python-gpu/conftest.py @@ -1,9 +1,7 @@ -import sys import pytest -import logging -sys.path.append("tests/python") -import testing as tm # noqa +from xgboost import testing as tm # noqa + def has_rmm(): try: @@ -34,8 +32,8 @@ def local_cuda_client(request, pytestconfig): kwargs['rmm_pool_size'] = '2GB' if tm.no_dask_cuda()['condition']: raise ImportError('The local_cuda_cluster fixture requires dask_cuda package') - from dask_cuda import LocalCUDACluster from dask.distributed import Client + from dask_cuda import LocalCUDACluster yield Client(LocalCUDACluster(**kwargs)) def pytest_addoption(parser): diff --git a/tests/python-gpu/load_pickle.py b/tests/python-gpu/load_pickle.py index 45f33bb16..f12dde360 100644 --- a/tests/python-gpu/load_pickle.py +++ b/tests/python-gpu/load_pickle.py @@ -1,16 +1,14 @@ '''Loading a pickled model generated by test_pickling.py, only used by `test_gpu_with_dask.py`''' -import os -import numpy as np -import xgboost as xgb import json +import os + +import numpy as np import pytest -import sys +from test_gpu_pickling import build_dataset, load_pickle, model_path -from test_gpu_pickling import build_dataset, model_path, load_pickle - -sys.path.append("tests/python") -import testing as tm +import xgboost as xgb +from xgboost import testing as tm class TestLoadPickle: diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py index 2c3a7a760..385f1b83e 100644 --- a/tests/python-gpu/test_device_quantile_dmatrix.py +++ b/tests/python-gpu/test_device_quantile_dmatrix.py @@ -5,10 +5,10 @@ import pytest from hypothesis import given, settings, strategies import xgboost as xgb +from xgboost import testing as tm sys.path.append("tests/python") import test_quantile_dmatrix as tqd -import testing as tm class TestDeviceQuantileDMatrix: diff --git a/tests/python-gpu/test_from_cudf.py b/tests/python-gpu/test_from_cudf.py index f924fc348..f22e69b22 100644 --- a/tests/python-gpu/test_from_cudf.py +++ b/tests/python-gpu/test_from_cudf.py @@ -2,11 +2,12 @@ import json import sys import numpy as np -import xgboost as xgb import pytest +import xgboost as xgb +from xgboost import testing as tm + sys.path.append("tests/python") -import testing as tm from test_dmatrix import set_base_margin_info @@ -85,8 +86,8 @@ def _test_from_cudf(DMatrixT): def _test_cudf_training(DMatrixT): - from cudf import DataFrame as df import pandas as pd + from cudf import DataFrame as df np.random.seed(1) X = pd.DataFrame(np.random.randn(50, 10)) y = pd.DataFrame(np.random.randn(50)) @@ -109,8 +110,8 @@ def _test_cudf_training(DMatrixT): def _test_cudf_metainfo(DMatrixT): - from cudf import DataFrame as df import pandas as pd + from cudf import DataFrame as df n = 100 X = np.random.random((n, 2)) dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X))) @@ -247,9 +248,9 @@ Arrow specification.''' @pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_pandas()) def test_cudf_training_with_sklearn(): + import pandas as pd from cudf import DataFrame as df from cudf import Series as ss - import pandas as pd np.random.seed(1) X = pd.DataFrame(np.random.randn(50, 10)) y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8)) diff --git a/tests/python-gpu/test_from_cupy.py b/tests/python-gpu/test_from_cupy.py index 77fa694e5..841ab7d34 100644 --- a/tests/python-gpu/test_from_cupy.py +++ b/tests/python-gpu/test_from_cupy.py @@ -1,12 +1,15 @@ -import numpy as np -import xgboost as xgb import sys + +import numpy as np import pytest +import xgboost as xgb + sys.path.append("tests/python") -import testing as tm from test_dmatrix import set_base_margin_info +from xgboost import testing as tm + def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN): '''Test constructing DMatrix from cupy''' diff --git a/tests/python-gpu/test_gpu_basic_models.py b/tests/python-gpu/test_gpu_basic_models.py index 9e955eac2..83d1a2557 100644 --- a/tests/python-gpu/test_gpu_basic_models.py +++ b/tests/python-gpu/test_gpu_basic_models.py @@ -1,13 +1,18 @@ -import sys import os +import sys + import numpy as np -import xgboost as xgb import pytest + +import xgboost as xgb +from xgboost import testing as tm + sys.path.append("tests/python") +import test_basic_models as test_bm + # Don't import the test class, otherwise they will run twice. import test_callback as test_cb # noqa -import test_basic_models as test_bm -import testing as tm + rng = np.random.RandomState(1994) diff --git a/tests/python-gpu/test_gpu_data_iterator.py b/tests/python-gpu/test_gpu_data_iterator.py index 9753a51e0..23e495bcc 100644 --- a/tests/python-gpu/test_gpu_data_iterator.py +++ b/tests/python-gpu/test_gpu_data_iterator.py @@ -1,13 +1,12 @@ -import numpy as np -import xgboost as xgb -from hypothesis import given, strategies, settings -import pytest import sys +import pytest +from hypothesis import given, settings, strategies +from xgboost.testing import no_cupy + sys.path.append("tests/python") -from test_data_iterator import test_single_batch as cpu_single_batch from test_data_iterator import run_data_iterator -from testing import no_cupy +from test_data_iterator import test_single_batch as cpu_single_batch def test_gpu_single_batch() -> None: @@ -24,7 +23,11 @@ def test_gpu_single_batch() -> None: ) @settings(deadline=None, max_examples=10, print_blob=True) def test_gpu_data_iterator( - n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool + n_samples_per_batch: int, + n_features: int, + n_batches: int, + subsample: bool, + use_cupy: bool, ) -> None: run_data_iterator( n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy diff --git a/tests/python-gpu/test_gpu_demos.py b/tests/python-gpu/test_gpu_demos.py index 54909da48..ef181a67e 100644 --- a/tests/python-gpu/test_gpu_demos.py +++ b/tests/python-gpu/test_gpu_demos.py @@ -1,10 +1,13 @@ import os import subprocess import sys + import pytest + +from xgboost import testing as tm + sys.path.append("tests/python") -import testing as tm -import test_demos as td # noqa +import test_demos as td # noqa @pytest.mark.skipif(**tm.no_cupy()) @@ -31,6 +34,6 @@ def test_categorical_demo(): @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_dask_training(): - script = os.path.join(tm.PROJECT_ROOT, 'demo', 'dask', 'gpu_training.py') + script = os.path.join(tm.demo_dir(__file__), 'dask', 'gpu_training.py') cmd = ['python', script] - subprocess.check_call(cmd) \ No newline at end of file + subprocess.check_call(cmd) diff --git a/tests/python-gpu/test_gpu_eval_metrics.py b/tests/python-gpu/test_gpu_eval_metrics.py index 1282e115a..cb4d8eb6c 100644 --- a/tests/python-gpu/test_gpu_eval_metrics.py +++ b/tests/python-gpu/test_gpu_eval_metrics.py @@ -1,7 +1,9 @@ import sys -import xgboost + import pytest +import xgboost + sys.path.append("tests/python") import test_eval_metrics as test_em # noqa diff --git a/tests/python-gpu/test_gpu_interaction_constraints.py b/tests/python-gpu/test_gpu_interaction_constraints.py index 885cf5bf9..ee85cf075 100644 --- a/tests/python-gpu/test_gpu_interaction_constraints.py +++ b/tests/python-gpu/test_gpu_interaction_constraints.py @@ -1,8 +1,11 @@ -import numpy as np import sys + +import numpy as np + sys.path.append("tests/python") # Don't import the test class, otherwise they will run twice. import test_interaction_constraints as test_ic # noqa + rng = np.random.RandomState(1994) diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py index 5cd63e514..40c5d4845 100644 --- a/tests/python-gpu/test_gpu_linear.py +++ b/tests/python-gpu/test_gpu_linear.py @@ -1,15 +1,10 @@ -import sys - import pytest from hypothesis import assume, given, note, settings, strategies import xgboost as xgb -from xgboost import testing +from xgboost import testing as tm -sys.path.append("tests/python") -import testing as tm - -pytestmark = testing.timeout(10) +pytestmark = tm.timeout(10) parameter_strategy = strategies.fixed_dictionaries({ 'booster': strategies.just('gblinear'), diff --git a/tests/python-gpu/test_gpu_pickling.py b/tests/python-gpu/test_gpu_pickling.py index 4b321bece..7c452926e 100644 --- a/tests/python-gpu/test_gpu_pickling.py +++ b/tests/python-gpu/test_gpu_pickling.py @@ -3,20 +3,17 @@ import json import os import pickle import subprocess -import sys import numpy as np import pytest import xgboost as xgb -from xgboost import XGBClassifier, testing - -sys.path.append("tests/python") -import testing as tm +from xgboost import XGBClassifier +from xgboost import testing as tm model_path = './model.pkl' -pytestmark = testing.timeout(30) +pytestmark = tm.timeout(30) def build_dataset(): diff --git a/tests/python-gpu/test_gpu_plotting.py b/tests/python-gpu/test_gpu_plotting.py index f12f895a0..22b3b41fc 100644 --- a/tests/python-gpu/test_gpu_plotting.py +++ b/tests/python-gpu/test_gpu_plotting.py @@ -1,10 +1,11 @@ import sys + import pytest -sys.path.append("tests/python") -import testing as tm -import test_plotting as tp +from xgboost import testing as tm +sys.path.append("tests/python") +import test_plotting as tp pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz())) diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index 3dedb0637..63154e775 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -6,7 +6,7 @@ from hypothesis import assume, given, settings, strategies from xgboost.compat import PANDAS_INSTALLED import xgboost as xgb -from xgboost import testing +from xgboost import testing as tm if PANDAS_INSTALLED: from hypothesis.extra.pandas import column, data_frames, range_indexes @@ -16,7 +16,6 @@ else: column, data_frames, range_indexes = noop, noop, noop sys.path.append("tests/python") -import testing as tm from test_predict import run_predict_leaf # noqa from test_predict import run_threaded_predict # noqa @@ -33,7 +32,7 @@ predict_parameter_strategy = strategies.fixed_dictionaries({ 'num_parallel_tree': strategies.sampled_from([1, 4]), }) -pytestmark = testing.timeout(20) +pytestmark = tm.timeout(20) class TestGPUPredict: @@ -227,8 +226,8 @@ class TestGPUPredict: @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cudf()) def test_inplace_predict_cudf(self): - import cupy as cp import cudf + import cupy as cp import pandas as pd rows = 1000 cols = 10 @@ -379,8 +378,8 @@ class TestGPUPredict: @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.parametrize("n_classes", [2, 3]) def test_predict_dart(self, n_classes): - from sklearn.datasets import make_classification import cupy as cp + from sklearn.datasets import make_classification n_samples = 1000 X_, y_ = make_classification( n_samples=n_samples, n_informative=5, n_classes=n_classes diff --git a/tests/python-gpu/test_gpu_ranking.py b/tests/python-gpu/test_gpu_ranking.py index 059d9325a..d86c1aa14 100644 --- a/tests/python-gpu/test_gpu_ranking.py +++ b/tests/python-gpu/test_gpu_ranking.py @@ -1,20 +1,15 @@ import itertools import os import shutil -import sys import urllib.request import zipfile import numpy as np import xgboost -from xgboost import testing +from xgboost import testing as tm -sys.path.append("tests/python") - -import testing as tm # noqa - -pytestmark = testing.timeout(10) +pytestmark = tm.timeout(10) class TestRanking: @@ -24,8 +19,9 @@ class TestRanking: Download and setup the test fixtures """ from sklearn.datasets import load_svmlight_files + # download the test data - cls.dpath = os.path.join(tm.PROJECT_ROOT, "demo/rank/") + cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/") src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip' target = os.path.join(cls.dpath, "MQ2008.zip") diff --git a/tests/python-gpu/test_gpu_spark/test_data.py b/tests/python-gpu/test_gpu_spark/test_data.py index 523973250..b529fa6ab 100644 --- a/tests/python-gpu/test_gpu_spark/test_data.py +++ b/tests/python-gpu/test_gpu_spark/test_data.py @@ -1,13 +1,8 @@ import sys -from typing import List -import numpy as np -import pandas as pd import pytest -sys.path.append("tests/python") - -import testing as tm +from xgboost import testing as tm if tm.no_spark()["condition"]: pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True) @@ -15,6 +10,7 @@ if sys.platform.startswith("win") or sys.platform.startswith("darwin"): pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True) +sys.path.append("tests/python") from test_spark.test_data import run_dmatrix_ctor diff --git a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py index bcae96dc5..b8f529218 100644 --- a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py +++ b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py @@ -6,8 +6,7 @@ import sys import pytest import sklearn -sys.path.append("tests/python") -import testing as tm +from xgboost import testing as tm if tm.no_spark()["condition"]: pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True) diff --git a/tests/python-gpu/test_gpu_training_continuation.py b/tests/python-gpu/test_gpu_training_continuation.py index 7fa17d4be..6a908af27 100644 --- a/tests/python-gpu/test_gpu_training_continuation.py +++ b/tests/python-gpu/test_gpu_training_continuation.py @@ -1,7 +1,9 @@ -import numpy as np -import xgboost as xgb import json +import numpy as np + +import xgboost as xgb + rng = np.random.RandomState(1994) diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index e86152327..10fbe3d35 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -6,13 +6,12 @@ import pytest from hypothesis import assume, given, note, settings, strategies import xgboost as xgb -from xgboost import testing +from xgboost import testing as tm sys.path.append("tests/python") import test_updaters as test_up -import testing as tm -pytestmark = testing.timeout(30) +pytestmark = tm.timeout(30) parameter_strategy = strategies.fixed_dictionaries({ 'max_depth': strategies.integers(0, 11), diff --git a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py index 09787a439..356845a01 100644 --- a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py +++ b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py @@ -1,52 +1,54 @@ """Copyright 2019-2022 XGBoost contributors""" -import sys -import os -from typing import Type, TypeVar, Any, Dict, List, Union -import pytest -import numpy as np import asyncio -import xgboost +import os import subprocess +import sys from collections import OrderedDict from inspect import signature -from hypothesis import given, strategies, settings, note +from typing import Any, Dict, Type, TypeVar, Union + +import numpy as np +import pytest +from hypothesis import given, note, settings, strategies from hypothesis._settings import duration from test_gpu_updaters import parameter_strategy +import xgboost +from xgboost import testing as tm + if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows", allow_module_level=True) sys.path.append("tests/python") -import testing as tm # noqa if tm.no_dask_cuda()["condition"]: pytest.skip(tm.no_dask_cuda()["reason"], allow_module_level=True) -from test_with_dask import run_empty_dmatrix_reg # noqa -from test_with_dask import run_empty_dmatrix_auc # noqa +from test_with_dask import _get_client_workers # noqa +from test_with_dask import generate_array # noqa +from test_with_dask import make_categorical # noqa from test_with_dask import run_auc # noqa from test_with_dask import run_boost_from_prediction # noqa from test_with_dask import run_boost_from_prediction_multi_class # noqa -from test_with_dask import run_dask_classifier # noqa -from test_with_dask import run_empty_dmatrix_cls # noqa -from test_with_dask import _get_client_workers # noqa -from test_with_dask import generate_array # noqa -from test_with_dask import kCols as random_cols # noqa -from test_with_dask import suppress # noqa -from test_with_dask import run_tree_stats # noqa from test_with_dask import run_categorical # noqa -from test_with_dask import make_categorical # noqa - +from test_with_dask import run_dask_classifier # noqa +from test_with_dask import run_empty_dmatrix_auc # noqa +from test_with_dask import run_empty_dmatrix_cls # noqa +from test_with_dask import run_empty_dmatrix_reg # noqa +from test_with_dask import run_tree_stats # noqa +from test_with_dask import suppress # noqa +from test_with_dask import kCols as random_cols # noqa try: - import dask.dataframe as dd - from xgboost import dask as dxgb - import xgboost as xgb - from dask.distributed import Client - from dask import array as da - from dask_cuda import LocalCUDACluster, utils import cudf + import dask.dataframe as dd + from dask import array as da + from dask.distributed import Client + from dask_cuda import LocalCUDACluster, utils + + import xgboost as xgb + from xgboost import dask as dxgb except ImportError: pass @@ -334,9 +336,9 @@ class TestDistributedGPU: @pytest.mark.skipif(**tm.no_dask_cudf()) def test_empty_partition(self, local_cuda_client: Client) -> None: - import dask_cudf import cudf import cupy + import dask_cudf mult = 100 df = cudf.DataFrame( diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py index 227c2a874..8ecb4bdc7 100644 --- a/tests/python-gpu/test_gpu_with_sklearn.py +++ b/tests/python-gpu/test_gpu_with_sklearn.py @@ -1,13 +1,15 @@ import json -import xgboost as xgb -import pytest -import tempfile -import sys -import numpy as np import os +import sys +import tempfile + +import numpy as np +import pytest + +import xgboost as xgb +from xgboost import testing as tm sys.path.append("tests/python") -import testing as tm # noqa import test_with_sklearn as twskl # noqa pytestmark = pytest.mark.skipif(**tm.no_sklearn()) @@ -38,9 +40,9 @@ def test_gpu_binary_classification(): @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cudf()) def test_boost_from_prediction_gpu_hist(): - from sklearn.datasets import load_breast_cancer, load_digits - import cupy as cp import cudf + import cupy as cp + from sklearn.datasets import load_breast_cancer, load_digits tree_method = "gpu_hist" X, y = load_breast_cancer(return_X_y=True) @@ -68,12 +70,12 @@ def test_num_parallel_tree(): @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_sklearn()) def test_categorical(): - import pandas as pd import cudf import cupy as cp + import pandas as pd from sklearn.datasets import load_svmlight_file - data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data") + data_dir = tm.data_dir(__file__) X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train")) clf = xgb.XGBClassifier( tree_method="gpu_hist", @@ -123,9 +125,9 @@ def test_categorical(): @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cudf()) def test_classififer(): - from sklearn.datasets import load_digits - import cupy as cp import cudf + import cupy as cp + from sklearn.datasets import load_digits X, y = load_digits(return_X_y=True) y *= 10 diff --git a/tests/python-gpu/test_large_input.py b/tests/python-gpu/test_large_input.py index 4c8e06a6f..310dd6f10 100644 --- a/tests/python-gpu/test_large_input.py +++ b/tests/python-gpu/test_large_input.py @@ -1,23 +1,23 @@ -import numpy as np -import xgboost as xgb -import cupy as cp -import time -import pytest - - -# Test for integer overflow or out of memory exceptions -def test_large_input(): - available_bytes, _ = cp.cuda.runtime.memGetInfo() - # 15 GB - required_bytes = 1.5e+10 - if available_bytes < required_bytes: - pytest.skip("Not enough memory on this device") - n = 1000 - m = ((1 << 31) + n - 1) // n - assert (np.log2(m * n) > 31) - X = cp.ones((m, n), dtype=np.float32) - y = cp.ones(m) - dmat = xgb.DeviceQuantileDMatrix(X, y) - booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1) - del y - booster.inplace_predict(X) +import cupy as cp +import numpy as np +import pytest + +import xgboost as xgb + + +# Test for integer overflow or out of memory exceptions +def test_large_input(): + available_bytes, _ = cp.cuda.runtime.memGetInfo() + # 15 GB + required_bytes = 1.5e+10 + if available_bytes < required_bytes: + pytest.skip("Not enough memory on this device") + n = 1000 + m = ((1 << 31) + n - 1) // n + assert (np.log2(m * n) > 31) + X = cp.ones((m, n), dtype=np.float32) + y = cp.ones(m) + dmat = xgb.DeviceQuantileDMatrix(X, y) + booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1) + del y + booster.inplace_predict(X) diff --git a/tests/python-gpu/test_monotonic_constraints.py b/tests/python-gpu/test_monotonic_constraints.py index fdecf0306..3bf4f0deb 100644 --- a/tests/python-gpu/test_monotonic_constraints.py +++ b/tests/python-gpu/test_monotonic_constraints.py @@ -1,11 +1,12 @@ import sys -import numpy as np +import numpy as np import pytest import xgboost as xgb +from xgboost import testing as tm + sys.path.append("tests/python") -import testing as tm import test_monotone_constraints as tmc rng = np.random.RandomState(1994) diff --git a/tests/python/generate_models.py b/tests/python/generate_models.py index 7b881355e..2a2444e8f 100644 --- a/tests/python/generate_models.py +++ b/tests/python/generate_models.py @@ -1,7 +1,9 @@ -import xgboost -import numpy as np import os +import numpy as np + +import xgboost + kRounds = 2 kRows = 1000 kCols = 4 diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py index e155ab047..fab2a6eca 100644 --- a/tests/python/test_basic.py +++ b/tests/python/test_basic.py @@ -1,12 +1,13 @@ -# -*- coding: utf-8 -*- -import numpy as np -import os -import xgboost as xgb -import pytest import json -from pathlib import Path +import os import tempfile -import testing as tm +from pathlib import Path + +import numpy as np +import pytest + +import xgboost as xgb +from xgboost import testing as tm dpath = 'demo/data/' rng = np.random.RandomState(1994) diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py index 82d0096cf..06f666da1 100644 --- a/tests/python/test_basic_models.py +++ b/tests/python/test_basic_models.py @@ -1,13 +1,15 @@ -import numpy as np -import xgboost as xgb -import os import json -import testing as tm -import pytest import locale +import os import tempfile -dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/') +import numpy as np +import pytest + +import xgboost as xgb +from xgboost import testing as tm + +dpath = tm.data_dir(__file__) rng = np.random.RandomState(1994) @@ -36,8 +38,8 @@ class TestModels: param = {'verbosity': 0, 'objective': 'binary:logistic', 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, 'nthread': 1} - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 4 bst = xgb.train(param, dtrain, num_round, watchlist) @@ -49,8 +51,8 @@ class TestModels: assert err < 0.2 def test_dart(self): - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) param = {'max_depth': 5, 'objective': 'binary:logistic', 'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1} # specify validations set to watch performance @@ -116,7 +118,7 @@ class TestModels: def test_boost_from_prediction(self): # Re-construct dtrain here to avoid modification - margined = xgb.DMatrix(dpath + 'agaricus.txt.train') + margined = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) bst = xgb.train({'tree_method': 'hist'}, margined, 1) predt_0 = bst.predict(margined, output_margin=True) margined.set_base_margin(predt_0) @@ -124,13 +126,13 @@ class TestModels: predt_1 = bst.predict(margined) assert np.any(np.abs(predt_1 - predt_0) > 1e-6) - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) bst = xgb.train({'tree_method': 'hist'}, dtrain, 2) predt_2 = bst.predict(dtrain) assert np.all(np.abs(predt_2 - predt_1) < 1e-6) def test_boost_from_existing_model(self): - X = xgb.DMatrix(dpath + 'agaricus.txt.train') + X = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4) assert booster.num_boosted_rounds() == 4 booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4, @@ -150,8 +152,8 @@ class TestModels: 'objective': 'reg:logistic', "tree_method": tree_method } - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 10 @@ -197,8 +199,8 @@ class TestModels: self.run_custom_objective() def test_multi_eval_metric(self): - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) watchlist = [(dtest, 'eval'), (dtrain, 'train')] param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1, 'objective': 'binary:logistic'} @@ -220,7 +222,7 @@ class TestModels: param['scale_pos_weight'] = ratio return (dtrain, dtest, param) - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) xgb.cv(param, dtrain, num_round, nfold=5, metrics={'auc'}, seed=0, fpreproc=fpreproc) @@ -228,7 +230,7 @@ class TestModels: param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': 'binary:logistic'} num_round = 2 - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) xgb.cv(param, dtrain, num_round, nfold=5, metrics={'error'}, seed=0, show_stdv=False) @@ -346,7 +348,7 @@ class TestModels: os.remove(model_path) try: - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1) except ValueError as e: e_str = str(e) diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py index dcd898ac0..3e972345b 100644 --- a/tests/python/test_callback.py +++ b/tests/python/test_callback.py @@ -1,9 +1,12 @@ -from typing import Union -import xgboost as xgb -import pytest import os -import testing as tm import tempfile +from contextlib import nullcontext +from typing import Union + +import pytest + +import xgboost as xgb +from xgboost import testing as tm # We use the dataset for tests. pytestmark = pytest.mark.skipif(**tm.no_sklearn()) @@ -271,13 +274,14 @@ class TestCallbacks: """Test learning rate scheduler, used by both CPU and GPU tests.""" scheduler = xgb.callback.LearningRateScheduler - dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/') - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dpath = tm.data_dir(__file__) + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) + watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 4 - warning_check = tm.noop_context() + warning_check = nullcontext() # learning_rates as a list # init eta with 0 to check whether learning_rates work diff --git a/tests/python/test_cli.py b/tests/python/test_cli.py index aef9bc586..69e8df83d 100644 --- a/tests/python/test_cli.py +++ b/tests/python/test_cli.py @@ -1,11 +1,13 @@ -import os -import tempfile -import platform -import xgboost -import subprocess -import numpy import json -import testing as tm +import os +import platform +import subprocess +import tempfile + +import numpy + +import xgboost +from xgboost import testing as tm class TestCLI: @@ -29,7 +31,7 @@ data = {data_path} eval[test] = {data_path} ''' - PROJECT_ROOT = tm.PROJECT_ROOT + PROJECT_ROOT = tm.project_root(__file__) def get_exe(self): if platform.system() == 'Windows': diff --git a/tests/python/test_data_iterator.py b/tests/python/test_data_iterator.py index 0416bd8a4..cf81288e8 100644 --- a/tests/python/test_data_iterator.py +++ b/tests/python/test_data_iterator.py @@ -1,14 +1,16 @@ +from typing import Dict, List + import numpy as np import pytest from hypothesis import given, settings, strategies from scipy.sparse import csr_matrix -from testing import IteratorForTest, make_batches, non_increasing from xgboost.data import SingleBatchInternalIter as SingleBatch +from xgboost.testing import IteratorForTest, make_batches, non_increasing import xgboost as xgb -from xgboost import testing +from xgboost import testing as tm -pytestmark = testing.timeout(30) +pytestmark = tm.timeout(30) def test_single_batch(tree_method: str = "approx") -> None: @@ -83,7 +85,7 @@ def run_data_iterator( if tree_method == "gpu_hist": parameters["sampling_method"] = "gradient_based" - results_from_it: xgb.callback.EvaluationMonitor.EvalsLog = {} + results_from_it: Dict[str, Dict[str, List[float]]] = {} from_it = xgb.train( parameters, Xy, @@ -106,7 +108,7 @@ def run_data_iterator( assert Xy.num_row() == n_samples_per_batch * n_batches assert Xy.num_col() == n_features - results_from_arrays: xgb.callback.EvaluationMonitor.EvalsLog = {} + results_from_arrays: Dict[str, Dict[str, List[float]]] = {} from_arrays = xgb.train( parameters, Xy, diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py index 63e44c0b0..97a462ff9 100644 --- a/tests/python/test_demos.py +++ b/tests/python/test_demos.py @@ -3,14 +3,12 @@ import subprocess import sys import pytest -import testing as tm -from xgboost import testing +from xgboost import testing as tm -pytestmark = testing.timeout(30) +pytestmark = tm.timeout(30) -ROOT_DIR = tm.PROJECT_ROOT -DEMO_DIR = os.path.join(ROOT_DIR, 'demo') +DEMO_DIR = tm.demo_dir(__file__) PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, 'guide-python') CLI_DEMO_DIR = os.path.join(DEMO_DIR, 'CLI') @@ -156,7 +154,7 @@ def test_cli_regression_demo(): cmd = ['python', script, 'machine.txt', '1'] subprocess.check_call(cmd, cwd=reg_dir) - exe = os.path.join(tm.PROJECT_ROOT, 'xgboost') + exe = os.path.join(DEMO_DIR, os.path.pardir, 'xgboost') conf = os.path.join(reg_dir, 'machine.conf') subprocess.check_call([exe, conf], cwd=reg_dir) diff --git a/tests/python/test_dmatrix.py b/tests/python/test_dmatrix.py index b7933eac4..def369027 100644 --- a/tests/python/test_dmatrix.py +++ b/tests/python/test_dmatrix.py @@ -4,11 +4,11 @@ import tempfile import numpy as np import pytest import scipy.sparse -import testing as tm from hypothesis import given, settings, strategies from scipy.sparse import csr_matrix, rand import xgboost as xgb +from xgboost import testing as tm rng = np.random.RandomState(1) diff --git a/tests/python/test_dt.py b/tests/python/test_dt.py index b62b1317b..eee874b16 100644 --- a/tests/python/test_dt.py +++ b/tests/python/test_dt.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- -import pytest import numpy as np +import pytest -import testing as tm import xgboost as xgb +from xgboost import testing as tm try: import datatable as dt diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py index 29f8fb4b0..ab1aebc77 100644 --- a/tests/python/test_early_stopping.py +++ b/tests/python/test_early_stopping.py @@ -1,8 +1,9 @@ -import xgboost as xgb -import testing as tm import numpy as np import pytest +import xgboost as xgb +from xgboost import testing as tm + rng = np.random.RandomState(1994) diff --git a/tests/python/test_eval_metrics.py b/tests/python/test_eval_metrics.py index 72263e3d5..24e3817ce 100644 --- a/tests/python/test_eval_metrics.py +++ b/tests/python/test_eval_metrics.py @@ -1,8 +1,9 @@ -import xgboost as xgb -import testing as tm import numpy as np import pytest +import xgboost as xgb +from xgboost import testing as tm + rng = np.random.RandomState(1337) @@ -254,8 +255,8 @@ class TestEvalMetrics: self.run_roc_auc_multi("hist", n_samples, weighted) def run_pr_auc_binary(self, tree_method): - from sklearn.metrics import precision_recall_curve, auc from sklearn.datasets import make_classification + from sklearn.metrics import auc, precision_recall_curve X, y = make_classification(128, 4, n_classes=2, random_state=1994) clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1) clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)]) diff --git a/tests/python/test_interaction_constraints.py b/tests/python/test_interaction_constraints.py index 18d416501..96d2ba7dc 100644 --- a/tests/python/test_interaction_constraints.py +++ b/tests/python/test_interaction_constraints.py @@ -1,9 +1,9 @@ -# -*- coding: utf-8 -*- import numpy as np -import xgboost -import testing as tm import pytest +import xgboost +from xgboost import testing as tm + dpath = 'demo/data/' rng = np.random.RandomState(1994) diff --git a/tests/python/test_linear.py b/tests/python/test_linear.py index 78e604635..b3dbf35f1 100644 --- a/tests/python/test_linear.py +++ b/tests/python/test_linear.py @@ -1,10 +1,9 @@ -import testing as tm from hypothesis import given, note, settings, strategies import xgboost as xgb -from xgboost import testing +from xgboost import testing as tm -pytestmark = testing.timeout(10) +pytestmark = tm.timeout(10) parameter_strategy = strategies.fixed_dictionaries({ diff --git a/tests/python/test_model_compatibility.py b/tests/python/test_model_compatibility.py index 88549e1f2..a46715e42 100644 --- a/tests/python/test_model_compatibility.py +++ b/tests/python/test_model_compatibility.py @@ -1,12 +1,14 @@ -import xgboost -import os -import generate_models as gm -import testing as tm -import json -import zipfile -import pytest import copy +import json +import os import urllib.request +import zipfile + +import generate_models as gm +import pytest + +import xgboost +from xgboost import testing as tm def run_model_param_check(config): diff --git a/tests/python/test_monotone_constraints.py b/tests/python/test_monotone_constraints.py index ae2c2917d..4dbfaa60d 100644 --- a/tests/python/test_monotone_constraints.py +++ b/tests/python/test_monotone_constraints.py @@ -1,8 +1,9 @@ import numpy as np -import xgboost as xgb -import testing as tm import pytest +import xgboost as xgb +from xgboost import testing as tm + dpath = 'demo/data/' diff --git a/tests/python/test_openmp.py b/tests/python/test_openmp.py index 950d15d86..c53363736 100644 --- a/tests/python/test_openmp.py +++ b/tests/python/test_openmp.py @@ -4,12 +4,11 @@ import tempfile import numpy as np import pytest -import testing as tm import xgboost as xgb -from xgboost import testing +from xgboost import testing as tm -pytestmark = testing.timeout(10) +pytestmark = tm.timeout(10) class TestOMP: @@ -86,7 +85,7 @@ class TestOMP: def test_with_omp_thread_limit(self): args = [ "python", os.path.join( - tm.PROJECT_ROOT, "tests", "python", "with_omp_limit.py" + os.path.dirname(tm.normpath(__file__)), "with_omp_limit.py" ) ] results = [] diff --git a/tests/python/test_parse_tree.py b/tests/python/test_parse_tree.py index 4957b93bf..885c0f1e2 100644 --- a/tests/python/test_parse_tree.py +++ b/tests/python/test_parse_tree.py @@ -1,8 +1,8 @@ -import xgboost as xgb import numpy as np import pytest -import testing as tm +import xgboost as xgb +from xgboost import testing as tm pytestmark = pytest.mark.skipif(**tm.no_pandas()) diff --git a/tests/python/test_pickling.py b/tests/python/test_pickling.py index 37bbc6c13..161a5fd4e 100644 --- a/tests/python/test_pickling.py +++ b/tests/python/test_pickling.py @@ -1,9 +1,10 @@ -import pickle -import numpy as np -import xgboost as xgb -import os import json +import os +import pickle +import numpy as np + +import xgboost as xgb kRows = 100 kCols = 10 diff --git a/tests/python/test_plotting.py b/tests/python/test_plotting.py index 0167fb62d..dc45cd254 100644 --- a/tests/python/test_plotting.py +++ b/tests/python/test_plotting.py @@ -1,15 +1,16 @@ import json -import numpy as np -import xgboost as xgb -import testing as tm +import numpy as np import pytest +import xgboost as xgb +from xgboost import testing as tm + try: import matplotlib matplotlib.use('Agg') - from matplotlib.axes import Axes from graphviz import Source + from matplotlib.axes import Axes except ImportError: pass diff --git a/tests/python/test_predict.py b/tests/python/test_predict.py index f4ea944e8..daf916198 100644 --- a/tests/python/test_predict.py +++ b/tests/python/test_predict.py @@ -1,12 +1,13 @@ '''Tests for running inplace prediction.''' from concurrent.futures import ThreadPoolExecutor -import numpy as np -from scipy import sparse -import pytest -import pandas as pd -import testing as tm +import numpy as np +import pandas as pd +import pytest +from scipy import sparse + import xgboost as xgb +from xgboost import testing as tm def run_threaded_predict(X, rows, predict_func): diff --git a/tests/python/test_quantile_dmatrix.py b/tests/python/test_quantile_dmatrix.py index 65ccfa4e5..56b2a7d90 100644 --- a/tests/python/test_quantile_dmatrix.py +++ b/tests/python/test_quantile_dmatrix.py @@ -4,7 +4,7 @@ import numpy as np import pytest from hypothesis import given, settings, strategies from scipy import sparse -from testing import ( +from xgboost.testing import ( IteratorForTest, make_batches, make_batches_sparse, diff --git a/tests/python/test_ranking.py b/tests/python/test_ranking.py index 98bca122f..da2411983 100644 --- a/tests/python/test_ranking.py +++ b/tests/python/test_ranking.py @@ -1,13 +1,15 @@ -import numpy as np -from scipy.sparse import csr_matrix -import testing as tm -import xgboost -import os import itertools +import os import shutil import urllib.request import zipfile +import numpy as np +from scipy.sparse import csr_matrix + +import xgboost +from xgboost import testing as tm + def test_ranking_with_unweighted_data(): Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17]) diff --git a/tests/python/test_shap.py b/tests/python/test_shap.py index 54a95c8c2..4d861ad6e 100644 --- a/tests/python/test_shap.py +++ b/tests/python/test_shap.py @@ -1,11 +1,12 @@ -# -*- coding: utf-8 -*- -import numpy as np -import xgboost as xgb import itertools import re + +import numpy as np import scipy import scipy.special +import xgboost as xgb + dpath = 'demo/data/' rng = np.random.RandomState(1994) diff --git a/tests/python/test_spark/test_data.py b/tests/python/test_spark/test_data.py index cf9063b71..20b31998e 100644 --- a/tests/python/test_spark/test_data.py +++ b/tests/python/test_spark/test_data.py @@ -4,7 +4,8 @@ from typing import List import numpy as np import pandas as pd import pytest -import testing as tm + +from xgboost import testing as tm if tm.no_spark()["condition"]: pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True) diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py index 03981d955..758c5c87b 100644 --- a/tests/python/test_spark/test_spark_local.py +++ b/tests/python/test_spark/test_spark_local.py @@ -6,10 +6,9 @@ import uuid import numpy as np import pytest -import testing as tm import xgboost as xgb -from xgboost import testing +from xgboost import testing as tm if tm.no_spark()["condition"]: pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True) @@ -38,7 +37,7 @@ from .utils import SparkTestCase logging.getLogger("py4j").setLevel(logging.INFO) -pytestmark = testing.timeout(60) +pytestmark = tm.timeout(60) class XgboostLocalTest(SparkTestCase): diff --git a/tests/python/test_spark/test_spark_local_cluster.py b/tests/python/test_spark/test_spark_local_cluster.py index 9276e08f3..3f375644f 100644 --- a/tests/python/test_spark/test_spark_local_cluster.py +++ b/tests/python/test_spark/test_spark_local_cluster.py @@ -6,7 +6,8 @@ import uuid import numpy as np import pytest -import testing as tm + +from xgboost import testing as tm if tm.no_spark()["condition"]: pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True) diff --git a/tests/python/test_spark/utils.py b/tests/python/test_spark/utils.py index 23968fbcc..0ed9f8521 100644 --- a/tests/python/test_spark/utils.py +++ b/tests/python/test_spark/utils.py @@ -6,9 +6,10 @@ import tempfile import unittest import pytest -import testing as tm from six import StringIO +from xgboost import testing as tm + if tm.no_spark()["condition"]: pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True) if sys.platform.startswith("win") or sys.platform.startswith("darwin"): diff --git a/tests/python/test_survival.py b/tests/python/test_survival.py index 1fb931545..7a297c191 100644 --- a/tests/python/test_survival.py +++ b/tests/python/test_survival.py @@ -1,11 +1,13 @@ -import testing as tm -import pytest -import numpy as np -import xgboost as xgb import json import os -dpath = os.path.join(tm.PROJECT_ROOT, 'demo', 'data') +import numpy as np +import pytest + +import xgboost as xgb +from xgboost import testing as tm + +dpath = tm.data_dir(__file__) def test_aft_survival_toy_data(): diff --git a/tests/python/test_tracker.py b/tests/python/test_tracker.py index 67543a968..11d3be36f 100644 --- a/tests/python/test_tracker.py +++ b/tests/python/test_tracker.py @@ -3,10 +3,10 @@ import sys import numpy as np import pytest -import testing as tm import xgboost as xgb -from xgboost import RabitTracker, testing +from xgboost import RabitTracker +from xgboost import testing as tm if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows", allow_module_level=True) @@ -61,7 +61,7 @@ def test_rabit_ops(): run_rabit_ops(client, n_workers) -@pytest.mark.skipif(**testing.skip_ipv6()) +@pytest.mark.skipif(**tm.no_ipv6()) @pytest.mark.skipif(**tm.no_dask()) def test_rabit_ops_ipv6(): import dask diff --git a/tests/python/test_training_continuation.py b/tests/python/test_training_continuation.py index 31a408170..258af760c 100644 --- a/tests/python/test_training_continuation.py +++ b/tests/python/test_training_continuation.py @@ -1,10 +1,11 @@ -import xgboost as xgb -import testing as tm -import numpy as np -import pytest import os import tempfile +import numpy as np +import pytest + +import xgboost as xgb +from xgboost import testing as tm rng = np.random.RandomState(1337) diff --git a/tests/python/test_tree_regularization.py b/tests/python/test_tree_regularization.py index 92fa9fb51..ae8e539a0 100644 --- a/tests/python/test_tree_regularization.py +++ b/tests/python/test_tree_regularization.py @@ -1,8 +1,8 @@ import numpy as np -import xgboost as xgb - from numpy.testing import assert_approx_equal +import xgboost as xgb + train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1])) diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index e28f17386..e8e43e8e7 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -1,11 +1,13 @@ import json from string import ascii_lowercase -from typing import Dict, Any -import testing as tm -import pytest -import xgboost as xgb +from typing import Any, Dict + import numpy as np -from hypothesis import given, strategies, settings, note +import pytest +from hypothesis import given, note, settings, strategies + +import xgboost as xgb +from xgboost import testing as tm exact_parameter_strategy = strategies.fixed_dictionaries({ 'nthread': strategies.integers(1, 4), diff --git a/tests/python/test_with_arrow.py b/tests/python/test_with_arrow.py index ad2448294..8b7bce9eb 100644 --- a/tests/python/test_with_arrow.py +++ b/tests/python/test_with_arrow.py @@ -1,14 +1,16 @@ -import unittest -import pytest -import numpy as np -import testing as tm -import xgboost as xgb import os +import unittest + +import numpy as np +import pytest + +import xgboost as xgb +from xgboost import testing as tm try: + import pandas as pd import pyarrow as pa import pyarrow.csv as pc - import pandas as pd except ImportError: pass @@ -73,7 +75,7 @@ class TestArrowTable(unittest.TestCase): np.testing.assert_allclose(preds1, preds2) def test_arrow_survival(self): - data = os.path.join(tm.PROJECT_ROOT, "demo", "data", "veterans_lung_cancer.csv") + data = os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv") table = pc.read_csv(data) y_lower_bound = table["Survival_label_lower_bound"] y_upper_bound = table["Survival_label_upper_bound"] diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py index c06232e99..d54aba6ca 100644 --- a/tests/python/test_with_dask.py +++ b/tests/python/test_with_dask.py @@ -20,7 +20,6 @@ import numpy as np import pytest import scipy import sklearn -import testing as tm from hypothesis import HealthCheck, given, note, settings from sklearn.datasets import make_classification, make_regression from test_predict import verify_leaf_output @@ -29,7 +28,7 @@ from test_with_sklearn import run_data_initialization, run_feature_weights from xgboost.data import _is_cudf_df import xgboost as xgb -from xgboost import testing +from xgboost import testing as tm if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows", allow_module_level=True) @@ -45,7 +44,7 @@ from xgboost.dask import DaskDMatrix dask.config.set({"distributed.scheduler.allowed-failures": False}) -pytestmark = testing.timeout(30) +pytestmark = tm.timeout(30) if hasattr(HealthCheck, 'function_scoped_fixture'): suppress = [HealthCheck.function_scoped_fixture] @@ -1116,8 +1115,9 @@ def test_predict_with_meta(client: "Client") -> None: def run_aft_survival(client: "Client", dmatrix_t: Type) -> None: - df = dd.read_csv(os.path.join(tm.PROJECT_ROOT, 'demo', 'data', - 'veterans_lung_cancer.csv')) + df = dd.read_csv( + os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv") + ) y_lower_bound = df['Survival_label_lower_bound'] y_upper_bound = df['Survival_label_upper_bound'] X = df.drop(['Survival_label_lower_bound', diff --git a/tests/python/test_with_modin.py b/tests/python/test_with_modin.py index 4932d1c1f..3f1f9cf97 100644 --- a/tests/python/test_with_modin.py +++ b/tests/python/test_with_modin.py @@ -1,10 +1,10 @@ -# -*- coding: utf-8 -*- import numpy as np -import xgboost as xgb -import testing as tm import pytest from test_dmatrix import set_base_margin_info +import xgboost as xgb +from xgboost import testing as tm + try: import modin.pandas as md except ImportError: diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py index e4289c1cd..209e5cf6f 100644 --- a/tests/python/test_with_pandas.py +++ b/tests/python/test_with_pandas.py @@ -1,11 +1,13 @@ import os import tempfile + import numpy as np -import xgboost as xgb -import testing as tm import pytest from test_dmatrix import set_base_margin_info +import xgboost as xgb +from xgboost import testing as tm + try: import pandas as pd except ImportError: diff --git a/tests/python/test_with_shap.py b/tests/python/test_with_shap.py index 1e03e0700..3103e1b7e 100644 --- a/tests/python/test_with_shap.py +++ b/tests/python/test_with_shap.py @@ -1,7 +1,8 @@ import numpy as np -import xgboost as xgb import pytest +import xgboost as xgb + try: import shap except ImportError: diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 17114d2dd..0a2c8fabc 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -8,14 +8,13 @@ from typing import Callable, Optional import numpy as np import pytest -import testing as tm from sklearn.utils.estimator_checks import parametrize_with_checks import xgboost as xgb -from xgboost import testing +from xgboost import testing as tm rng = np.random.RandomState(1994) -pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), testing.timeout(30)] +pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)] def test_binary_classification(): @@ -155,11 +154,10 @@ def test_ranking(): def test_stacking_regression(): - from sklearn.model_selection import train_test_split from sklearn.datasets import load_diabetes + from sklearn.ensemble import RandomForestRegressor, StackingRegressor from sklearn.linear_model import RidgeCV - from sklearn.ensemble import RandomForestRegressor - from sklearn.ensemble import StackingRegressor + from sklearn.model_selection import train_test_split X, y = load_diabetes(return_X_y=True) estimators = [ @@ -177,13 +175,13 @@ def test_stacking_regression(): def test_stacking_classification(): - from sklearn.model_selection import train_test_split from sklearn.datasets import load_iris - from sklearn.svm import LinearSVC - from sklearn.linear_model import LogisticRegression - from sklearn.preprocessing import StandardScaler - from sklearn.pipeline import make_pipeline from sklearn.ensemble import StackingClassifier + from sklearn.linear_model import LogisticRegression + from sklearn.model_selection import train_test_split + from sklearn.pipeline import make_pipeline + from sklearn.preprocessing import StandardScaler + from sklearn.svm import LinearSVC X, y = load_iris(return_X_y=True) estimators = [ @@ -354,8 +352,8 @@ def test_num_parallel_tree(): def test_regression(): - from sklearn.metrics import mean_squared_error from sklearn.datasets import fetch_california_housing + from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold X, y = fetch_california_housing(return_X_y=True) @@ -383,8 +381,8 @@ def test_regression(): def run_housing_rf_regression(tree_method): - from sklearn.metrics import mean_squared_error from sklearn.datasets import fetch_california_housing + from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold X, y = fetch_california_housing(return_X_y=True) @@ -407,8 +405,8 @@ def test_rf_regression(): def test_parameter_tuning(): - from sklearn.model_selection import GridSearchCV from sklearn.datasets import fetch_california_housing + from sklearn.model_selection import GridSearchCV X, y = fetch_california_housing(return_X_y=True) xgb_model = xgb.XGBRegressor(learning_rate=0.1) @@ -421,8 +419,8 @@ def test_parameter_tuning(): def test_regression_with_custom_objective(): - from sklearn.metrics import mean_squared_error from sklearn.datasets import fetch_california_housing + from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold def objective_ls(y_true, y_pred): @@ -539,8 +537,8 @@ def test_sklearn_plotting(): import matplotlib matplotlib.use('Agg') - from matplotlib.axes import Axes from graphviz import Source + from matplotlib.axes import Axes ax = xgb.plot_importance(classifier) assert isinstance(ax, Axes) @@ -666,8 +664,8 @@ def test_kwargs_error(): def test_kwargs_grid_search(): - from sklearn.model_selection import GridSearchCV from sklearn import datasets + from sklearn.model_selection import GridSearchCV params = {'tree_method': 'hist'} clf = xgb.XGBClassifier(n_estimators=1, learning_rate=1.0, **params) @@ -841,9 +839,7 @@ def test_save_load_model(): def test_RFECV(): - from sklearn.datasets import load_diabetes - from sklearn.datasets import load_breast_cancer - from sklearn.datasets import load_iris + from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris from sklearn.feature_selection import RFECV # Regression @@ -1046,8 +1042,9 @@ def run_feature_weights(X, y, fw, tree_method, model=xgb.XGBRegressor): with open(model_path) as fd: model = json.load(fd) - parser_path = os.path.join(tm.PROJECT_ROOT, 'demo', 'json-model', - 'json_parser.py') + parser_path = os.path.join( + tm.demo_dir(__file__), "json-model", "json_parser.py" + ) spec = importlib.util.spec_from_file_location("JsonParser", parser_path) foo = importlib.util.module_from_spec(spec) @@ -1162,8 +1159,8 @@ def run_boost_from_prediction_multi_clasas( @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"]) def test_boost_from_prediction(tree_method): - from sklearn.datasets import load_breast_cancer, load_iris, make_regression import pandas as pd + from sklearn.datasets import load_breast_cancer, load_iris, make_regression X, y = load_breast_cancer(return_X_y=True) diff --git a/tests/python/with_omp_limit.py b/tests/python/with_omp_limit.py index 950ec0364..856914e96 100644 --- a/tests/python/with_omp_limit.py +++ b/tests/python/with_omp_limit.py @@ -1,7 +1,9 @@ -import xgboost as xgb +import sys + from sklearn.datasets import make_classification from sklearn.metrics import roc_auc_score -import sys + +import xgboost as xgb def run_omp(output_path: str):