Move Python testing utilities into xgboost module. (#8379)
- Add typehints. - Fixes for pylint. Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
This commit is contained in:
parent
7e53189e7c
commit
cf70864fa3
@ -65,7 +65,7 @@ def _check_rf_callback(
|
||||
)
|
||||
|
||||
|
||||
_SklObjective = Optional[
|
||||
SklObjective = Optional[
|
||||
Union[str, Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]]
|
||||
]
|
||||
|
||||
@ -144,7 +144,7 @@ __model_doc = f"""
|
||||
Boosting learning rate (xgb's "eta")
|
||||
verbosity : Optional[int]
|
||||
The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
|
||||
objective : {_SklObjective}
|
||||
objective : {SklObjective}
|
||||
Specify the learning task and the corresponding learning objective or
|
||||
a custom objective function to be used (see note below).
|
||||
booster: Optional[str]
|
||||
@ -546,7 +546,7 @@ class XGBModel(XGBModelBase):
|
||||
learning_rate: Optional[float] = None,
|
||||
n_estimators: int = 100,
|
||||
verbosity: Optional[int] = None,
|
||||
objective: _SklObjective = None,
|
||||
objective: SklObjective = None,
|
||||
booster: Optional[str] = None,
|
||||
tree_method: Optional[str] = None,
|
||||
n_jobs: Optional[int] = None,
|
||||
@ -1409,7 +1409,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
objective: _SklObjective = "binary:logistic",
|
||||
objective: SklObjective = "binary:logistic",
|
||||
use_label_encoder: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
@ -1712,7 +1712,7 @@ class XGBRegressor(XGBModel, XGBRegressorBase):
|
||||
# pylint: disable=missing-docstring
|
||||
@_deprecate_positional_args
|
||||
def __init__(
|
||||
self, *, objective: _SklObjective = "reg:squarederror", **kwargs: Any
|
||||
self, *, objective: SklObjective = "reg:squarederror", **kwargs: Any
|
||||
) -> None:
|
||||
super().__init__(objective=objective, **kwargs)
|
||||
|
||||
|
||||
@ -1,64 +0,0 @@
|
||||
"""Utilities for defining Python tests."""
|
||||
|
||||
import socket
|
||||
from platform import system
|
||||
from typing import Any, TypedDict
|
||||
|
||||
PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
|
||||
|
||||
|
||||
def has_ipv6() -> bool:
|
||||
"""Check whether IPv6 is enabled on this host."""
|
||||
# connection error in macos, still need some fixes.
|
||||
if system() not in ("Linux", "Windows"):
|
||||
return False
|
||||
|
||||
if socket.has_ipv6:
|
||||
try:
|
||||
with socket.socket(
|
||||
socket.AF_INET6, socket.SOCK_STREAM
|
||||
) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
|
||||
server.bind(("::1", 0))
|
||||
port = server.getsockname()[1]
|
||||
server.listen()
|
||||
|
||||
client.connect(("::1", port))
|
||||
conn, _ = server.accept()
|
||||
|
||||
client.sendall("abc".encode())
|
||||
msg = conn.recv(3).decode()
|
||||
# if the code can be executed to this point, the message should be
|
||||
# correct.
|
||||
assert msg == "abc"
|
||||
return True
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def skip_ipv6() -> PytestSkip:
|
||||
"""PyTest skip mark for IPv6."""
|
||||
return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
|
||||
|
||||
|
||||
def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
|
||||
"""Make a pytest mark for the `pytest-timeout` package.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
sec :
|
||||
Timeout seconds.
|
||||
enable :
|
||||
Control whether timeout should be applied, used for debugging.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pytest.mark.timeout
|
||||
"""
|
||||
import pytest # pylint: disable=import-error
|
||||
|
||||
# This is disabled for now due to regression caused by conflicts between federated
|
||||
# learning build and the CI container environment.
|
||||
if enable:
|
||||
return pytest.mark.timeout(sec, *args, **kwargs)
|
||||
return pytest.mark.timeout(None, *args, **kwargs)
|
||||
@ -1,192 +1,190 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import os
|
||||
"""Utilities for defining Python tests. The module is private and subject to frequent
|
||||
change without notice.
|
||||
|
||||
"""
|
||||
# pylint: disable=invalid-name,missing-function-docstring,import-error
|
||||
import gc
|
||||
import importlib.util
|
||||
import multiprocessing
|
||||
from typing import Tuple, Union, List, Sequence, Callable
|
||||
import os
|
||||
import platform
|
||||
import socket
|
||||
import sys
|
||||
import urllib
|
||||
import zipfile
|
||||
import sys
|
||||
from typing import Optional, Dict, Any
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from contextlib import contextmanager
|
||||
from io import StringIO
|
||||
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
||||
import pytest
|
||||
import gc
|
||||
import xgboost as xgb
|
||||
from xgboost.core import ArrayLike
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
import platform
|
||||
from platform import system
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Generator,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Set,
|
||||
Tuple,
|
||||
TypedDict,
|
||||
Union,
|
||||
)
|
||||
|
||||
hypothesis = pytest.importorskip('hypothesis')
|
||||
sklearn = pytest.importorskip('sklearn')
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy import sparse
|
||||
from xgboost.core import ArrayLike
|
||||
from xgboost.sklearn import SklObjective
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
hypothesis = pytest.importorskip("hypothesis")
|
||||
|
||||
# pylint:disable=wrong-import-position,wrong-import-order
|
||||
from hypothesis import strategies
|
||||
from hypothesis.extra.numpy import arrays
|
||||
from joblib import Memory
|
||||
from sklearn import datasets
|
||||
|
||||
joblib = pytest.importorskip("joblib")
|
||||
datasets = pytest.importorskip("sklearn.datasets")
|
||||
|
||||
Memory = joblib.Memory
|
||||
|
||||
memory = Memory("./cachedir", verbose=0)
|
||||
|
||||
PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
|
||||
|
||||
|
||||
def has_ipv6() -> bool:
|
||||
"""Check whether IPv6 is enabled on this host."""
|
||||
# connection error in macos, still need some fixes.
|
||||
if system() not in ("Linux", "Windows"):
|
||||
return False
|
||||
|
||||
if socket.has_ipv6:
|
||||
try:
|
||||
import cupy as cp
|
||||
except ImportError:
|
||||
cp = None
|
||||
with socket.socket(
|
||||
socket.AF_INET6, socket.SOCK_STREAM
|
||||
) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
|
||||
server.bind(("::1", 0))
|
||||
port = server.getsockname()[1]
|
||||
server.listen()
|
||||
|
||||
memory = Memory('./cachedir', verbose=0)
|
||||
client.connect(("::1", port))
|
||||
conn, _ = server.accept()
|
||||
|
||||
client.sendall("abc".encode())
|
||||
msg = conn.recv(3).decode()
|
||||
# if the code can be executed to this point, the message should be
|
||||
# correct.
|
||||
assert msg == "abc"
|
||||
return True
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def no_ubjson():
|
||||
reason = "ubjson is not intsalled."
|
||||
try:
|
||||
import ubjson # noqa
|
||||
return {"condition": False, "reason": reason}
|
||||
except ImportError:
|
||||
return {"condition": True, "reason": reason}
|
||||
def no_mod(name: str) -> PytestSkip:
|
||||
spec = importlib.util.find_spec(name)
|
||||
return {"condition": spec is None, "reason": f"{name} is not installed."}
|
||||
|
||||
|
||||
def no_sklearn():
|
||||
return {'condition': not SKLEARN_INSTALLED,
|
||||
'reason': 'Scikit-Learn is not installed'}
|
||||
def no_ipv6() -> PytestSkip:
|
||||
"""PyTest skip mark for IPv6."""
|
||||
return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
|
||||
|
||||
|
||||
def no_dask():
|
||||
try:
|
||||
import pkg_resources
|
||||
|
||||
pkg_resources.get_distribution("dask")
|
||||
DASK_INSTALLED = True
|
||||
except pkg_resources.DistributionNotFound:
|
||||
DASK_INSTALLED = False
|
||||
return {"condition": not DASK_INSTALLED, "reason": "Dask is not installed"}
|
||||
def no_ubjson() -> PytestSkip:
|
||||
return no_mod("ubjson")
|
||||
|
||||
|
||||
def no_spark():
|
||||
try:
|
||||
import pyspark # noqa
|
||||
SPARK_INSTALLED = True
|
||||
except ImportError:
|
||||
SPARK_INSTALLED = False
|
||||
return {"condition": not SPARK_INSTALLED, "reason": "Spark is not installed"}
|
||||
def no_sklearn() -> PytestSkip:
|
||||
return no_mod("sklearn")
|
||||
|
||||
|
||||
def no_pandas():
|
||||
return {'condition': not PANDAS_INSTALLED,
|
||||
'reason': 'Pandas is not installed.'}
|
||||
def no_dask() -> PytestSkip:
|
||||
return no_mod("dask")
|
||||
|
||||
|
||||
def no_arrow():
|
||||
reason = "pyarrow is not installed"
|
||||
try:
|
||||
import pyarrow # noqa
|
||||
return {"condition": False, "reason": reason}
|
||||
except ImportError:
|
||||
return {"condition": True, "reason": reason}
|
||||
def no_spark() -> PytestSkip:
|
||||
return no_mod("pyspark")
|
||||
|
||||
|
||||
def no_modin():
|
||||
reason = 'Modin is not installed.'
|
||||
try:
|
||||
import modin.pandas as _ # noqa
|
||||
return {'condition': False, 'reason': reason}
|
||||
except ImportError:
|
||||
return {'condition': True, 'reason': reason}
|
||||
def no_pandas() -> PytestSkip:
|
||||
return no_mod("pandas")
|
||||
|
||||
|
||||
def no_dt():
|
||||
import importlib.util
|
||||
spec = importlib.util.find_spec('datatable')
|
||||
return {'condition': spec is None,
|
||||
'reason': 'Datatable is not installed.'}
|
||||
def no_arrow() -> PytestSkip:
|
||||
return no_mod("pyarrow")
|
||||
|
||||
|
||||
def no_matplotlib():
|
||||
reason = 'Matplotlib is not installed.'
|
||||
def no_modin() -> PytestSkip:
|
||||
return no_mod("modin")
|
||||
|
||||
|
||||
def no_dt() -> PytestSkip:
|
||||
return no_mod("datatable")
|
||||
|
||||
|
||||
def no_matplotlib() -> PytestSkip:
|
||||
reason = "Matplotlib is not installed."
|
||||
try:
|
||||
import matplotlib.pyplot as _ # noqa
|
||||
return {'condition': False,
|
||||
'reason': reason}
|
||||
|
||||
return {"condition": False, "reason": reason}
|
||||
except ImportError:
|
||||
return {'condition': True,
|
||||
'reason': reason}
|
||||
return {"condition": True, "reason": reason}
|
||||
|
||||
|
||||
def no_dask_cuda():
|
||||
reason = 'dask_cuda is not installed.'
|
||||
try:
|
||||
import dask_cuda as _ # noqa
|
||||
return {'condition': False, 'reason': reason}
|
||||
except ImportError:
|
||||
return {'condition': True, 'reason': reason}
|
||||
def no_dask_cuda() -> PytestSkip:
|
||||
return no_mod("dask_cuda")
|
||||
|
||||
|
||||
def no_cudf():
|
||||
try:
|
||||
import cudf # noqa
|
||||
CUDF_INSTALLED = True
|
||||
except ImportError:
|
||||
CUDF_INSTALLED = False
|
||||
|
||||
return {'condition': not CUDF_INSTALLED,
|
||||
'reason': 'CUDF is not installed'}
|
||||
def no_cudf() -> PytestSkip:
|
||||
return no_mod("cudf")
|
||||
|
||||
|
||||
def no_cupy():
|
||||
reason = 'cupy is not installed.'
|
||||
try:
|
||||
import cupy as _ # noqa
|
||||
return {'condition': False, 'reason': reason}
|
||||
except ImportError:
|
||||
return {'condition': True, 'reason': reason}
|
||||
def no_cupy() -> PytestSkip:
|
||||
return no_mod("cupy")
|
||||
|
||||
|
||||
def no_dask_cudf():
|
||||
reason = 'dask_cudf is not installed.'
|
||||
try:
|
||||
import dask_cudf as _ # noqa
|
||||
return {'condition': False, 'reason': reason}
|
||||
except ImportError:
|
||||
return {'condition': True, 'reason': reason}
|
||||
def no_dask_cudf() -> PytestSkip:
|
||||
return no_mod("dask_cudf")
|
||||
|
||||
|
||||
def no_json_schema():
|
||||
reason = 'jsonschema is not installed'
|
||||
try:
|
||||
import jsonschema # noqa
|
||||
return {'condition': False, 'reason': reason}
|
||||
except ImportError:
|
||||
return {'condition': True, 'reason': reason}
|
||||
def no_json_schema() -> PytestSkip:
|
||||
return no_mod("jsonschema")
|
||||
|
||||
|
||||
def no_graphviz():
|
||||
reason = 'graphviz is not installed'
|
||||
try:
|
||||
import graphviz # noqa
|
||||
return {'condition': False, 'reason': reason}
|
||||
except ImportError:
|
||||
return {'condition': True, 'reason': reason}
|
||||
def no_graphviz() -> PytestSkip:
|
||||
return no_mod("graphviz")
|
||||
|
||||
|
||||
def no_multiple(*args):
|
||||
def no_multiple(*args: Any) -> PytestSkip:
|
||||
condition = False
|
||||
reason = ''
|
||||
reason = ""
|
||||
for arg in args:
|
||||
condition = (condition or arg['condition'])
|
||||
if arg['condition']:
|
||||
reason = arg['reason']
|
||||
condition = condition or arg["condition"]
|
||||
if arg["condition"]:
|
||||
reason = arg["reason"]
|
||||
break
|
||||
return {'condition': condition, 'reason': reason}
|
||||
return {"condition": condition, "reason": reason}
|
||||
|
||||
|
||||
def skip_s390x():
|
||||
def skip_s390x() -> PytestSkip:
|
||||
condition = platform.machine() == "s390x"
|
||||
reason = "Known to fail on s390x"
|
||||
return {"condition": condition, "reason": reason}
|
||||
|
||||
|
||||
class IteratorForTest(xgb.core.DataIter):
|
||||
"""Iterator for testing streaming DMatrix. (external memory, quantile)"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
X: Sequence,
|
||||
y: Sequence,
|
||||
w: Optional[Sequence],
|
||||
cache: Optional[str] = "./"
|
||||
cache: Optional[str] = "./",
|
||||
) -> None:
|
||||
assert len(X) == len(y)
|
||||
self.X = X
|
||||
@ -242,7 +240,7 @@ def make_batches(
|
||||
rng = cupy.random.RandomState(1994)
|
||||
else:
|
||||
rng = np.random.RandomState(1994)
|
||||
for i in range(n_batches):
|
||||
for _ in range(n_batches):
|
||||
_X = rng.randn(n_samples_per_batch, n_features)
|
||||
_y = rng.randn(n_samples_per_batch)
|
||||
_w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
|
||||
@ -259,7 +257,7 @@ def make_batches_sparse(
|
||||
y = []
|
||||
w = []
|
||||
rng = np.random.RandomState(1994)
|
||||
for i in range(n_batches):
|
||||
for _ in range(n_batches):
|
||||
_X = sparse.random(
|
||||
n_samples_per_batch,
|
||||
n_features,
|
||||
@ -276,8 +274,9 @@ def make_batches_sparse(
|
||||
return X, y, w
|
||||
|
||||
|
||||
# Contains a dataset in numpy format as well as the relevant objective and metric
|
||||
class TestDataset:
|
||||
"""Contains a dataset in numpy format as well as the relevant objective and metric."""
|
||||
|
||||
def __init__(
|
||||
self, name: str, get_dataset: Callable, objective: str, metric: str
|
||||
) -> None:
|
||||
@ -289,18 +288,24 @@ class TestDataset:
|
||||
self.margin: Optional[np.ndarray] = None
|
||||
|
||||
def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]:
|
||||
params_in['objective'] = self.objective
|
||||
params_in['eval_metric'] = self.metric
|
||||
params_in["objective"] = self.objective
|
||||
params_in["eval_metric"] = self.metric
|
||||
if self.objective == "multi:softmax":
|
||||
params_in["num_class"] = int(np.max(self.y) + 1)
|
||||
return params_in
|
||||
|
||||
def get_dmat(self) -> xgb.DMatrix:
|
||||
return xgb.DMatrix(
|
||||
self.X, self.y, self.w, base_margin=self.margin, enable_categorical=True
|
||||
self.X,
|
||||
self.y,
|
||||
weight=self.w,
|
||||
base_margin=self.margin,
|
||||
enable_categorical=True,
|
||||
)
|
||||
|
||||
def get_device_dmat(self) -> xgb.DeviceQuantileDMatrix:
|
||||
import cupy as cp
|
||||
|
||||
w = None if self.w is None else cp.array(self.w)
|
||||
X = cp.array(self.X, dtype=np.float32)
|
||||
y = cp.array(self.y, dtype=np.float32)
|
||||
@ -334,25 +339,24 @@ class TestDataset:
|
||||
|
||||
|
||||
@memory.cache
|
||||
def get_california_housing():
|
||||
def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
|
||||
data = datasets.fetch_california_housing()
|
||||
return data.data, data.target
|
||||
|
||||
|
||||
@memory.cache
|
||||
def get_digits():
|
||||
def get_digits() -> Tuple[np.ndarray, np.ndarray]:
|
||||
data = datasets.load_digits()
|
||||
return data.data, data.target
|
||||
|
||||
|
||||
@memory.cache
|
||||
def get_cancer():
|
||||
data = datasets.load_breast_cancer()
|
||||
return data.data, data.target
|
||||
def get_cancer() -> Tuple[np.ndarray, np.ndarray]:
|
||||
return datasets.load_breast_cancer(return_X_y=True)
|
||||
|
||||
|
||||
@memory.cache
|
||||
def get_sparse():
|
||||
def get_sparse() -> Tuple[np.ndarray, np.ndarray]:
|
||||
rng = np.random.RandomState(199)
|
||||
n = 2000
|
||||
sparsity = 0.75
|
||||
@ -366,7 +370,7 @@ def get_sparse():
|
||||
|
||||
|
||||
@memory.cache
|
||||
def get_ames_housing():
|
||||
def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Number of samples: 1460
|
||||
Number of features: 20
|
||||
@ -374,9 +378,10 @@ def get_ames_housing():
|
||||
Number of numerical features: 10
|
||||
"""
|
||||
from sklearn.datasets import fetch_openml
|
||||
|
||||
X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True)
|
||||
|
||||
categorical_columns_subset: list[str] = [
|
||||
categorical_columns_subset: List[str] = [
|
||||
"BldgType", # 5 cats, no nan
|
||||
"GarageFinish", # 3 cats, nan
|
||||
"LotConfig", # 5 cats, no nan
|
||||
@ -389,7 +394,7 @@ def get_ames_housing():
|
||||
"PoolQC", # 3 cats, nan
|
||||
]
|
||||
|
||||
numerical_columns_subset: list[str] = [
|
||||
numerical_columns_subset: List[str] = [
|
||||
"3SsnPorch",
|
||||
"Fireplaces",
|
||||
"BsmtHalfBath",
|
||||
@ -408,32 +413,70 @@ def get_ames_housing():
|
||||
|
||||
|
||||
@memory.cache
|
||||
def get_mq2008(dpath):
|
||||
def get_mq2008(
|
||||
dpath: str,
|
||||
) -> Tuple[
|
||||
sparse.csr_matrix,
|
||||
np.ndarray,
|
||||
np.ndarray,
|
||||
sparse.csr_matrix,
|
||||
np.ndarray,
|
||||
np.ndarray,
|
||||
sparse.csr_matrix,
|
||||
np.ndarray,
|
||||
np.ndarray,
|
||||
]:
|
||||
from sklearn.datasets import load_svmlight_files
|
||||
|
||||
src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
|
||||
target = dpath + '/MQ2008.zip'
|
||||
src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
|
||||
target = dpath + "/MQ2008.zip"
|
||||
if not os.path.exists(target):
|
||||
urllib.request.urlretrieve(url=src, filename=target)
|
||||
|
||||
with zipfile.ZipFile(target, 'r') as f:
|
||||
with zipfile.ZipFile(target, "r") as f:
|
||||
f.extractall(path=dpath)
|
||||
|
||||
(x_train, y_train, qid_train, x_test, y_test, qid_test,
|
||||
x_valid, y_valid, qid_valid) = load_svmlight_files(
|
||||
(dpath + "MQ2008/Fold1/train.txt",
|
||||
(
|
||||
x_train,
|
||||
y_train,
|
||||
qid_train,
|
||||
x_test,
|
||||
y_test,
|
||||
qid_test,
|
||||
x_valid,
|
||||
y_valid,
|
||||
qid_valid,
|
||||
) = load_svmlight_files(
|
||||
(
|
||||
dpath + "MQ2008/Fold1/train.txt",
|
||||
dpath + "MQ2008/Fold1/test.txt",
|
||||
dpath + "MQ2008/Fold1/vali.txt"),
|
||||
query_id=True, zero_based=False)
|
||||
dpath + "MQ2008/Fold1/vali.txt",
|
||||
),
|
||||
query_id=True,
|
||||
zero_based=False,
|
||||
)
|
||||
|
||||
return (x_train, y_train, qid_train, x_test, y_test, qid_test,
|
||||
x_valid, y_valid, qid_valid)
|
||||
return (
|
||||
x_train,
|
||||
y_train,
|
||||
qid_train,
|
||||
x_test,
|
||||
y_test,
|
||||
qid_test,
|
||||
x_valid,
|
||||
y_valid,
|
||||
qid_valid,
|
||||
)
|
||||
|
||||
|
||||
@memory.cache
|
||||
def make_categorical(
|
||||
n_samples: int, n_features: int, n_categories: int, onehot: bool, sparsity=0.0,
|
||||
):
|
||||
n_samples: int,
|
||||
n_features: int,
|
||||
n_categories: int,
|
||||
onehot: bool,
|
||||
sparsity: float = 0.0,
|
||||
) -> Tuple[ArrayLike, np.ndarray]:
|
||||
import pandas as pd
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
@ -457,7 +500,9 @@ def make_categorical(
|
||||
|
||||
if sparsity > 0.0:
|
||||
for i in range(n_features):
|
||||
index = rng.randint(low=0, high=n_samples-1, size=int(n_samples * sparsity))
|
||||
index = rng.randint(
|
||||
low=0, high=n_samples - 1, size=int(n_samples * sparsity)
|
||||
)
|
||||
df.iloc[index, i] = np.NaN
|
||||
assert n_categories == np.unique(df.dtypes[i].categories).size
|
||||
|
||||
@ -466,9 +511,9 @@ def make_categorical(
|
||||
return df, label
|
||||
|
||||
|
||||
def _cat_sampled_from():
|
||||
def _cat_sampled_from() -> strategies.SearchStrategy:
|
||||
@strategies.composite
|
||||
def _make_cat(draw):
|
||||
def _make_cat(draw: Callable) -> Tuple[int, int, int, float]:
|
||||
n_samples = draw(strategies.integers(2, 512))
|
||||
n_features = draw(strategies.integers(1, 4))
|
||||
n_cats = draw(strategies.integers(1, 128))
|
||||
@ -483,7 +528,7 @@ def _cat_sampled_from():
|
||||
)
|
||||
return n_samples, n_features, n_cats, sparsity
|
||||
|
||||
def _build(args):
|
||||
def _build(args: Tuple[int, int, int, float]) -> TestDataset:
|
||||
n_samples = args[0]
|
||||
n_features = args[1]
|
||||
n_cats = args[2]
|
||||
@ -495,12 +540,13 @@ def _cat_sampled_from():
|
||||
"rmse",
|
||||
)
|
||||
|
||||
return _make_cat().map(_build)
|
||||
return _make_cat().map(_build) # pylint: disable=no-member
|
||||
|
||||
|
||||
categorical_dataset_strategy = _cat_sampled_from()
|
||||
categorical_dataset_strategy: strategies.SearchStrategy = _cat_sampled_from()
|
||||
|
||||
|
||||
# pylint: disable=too-many-locals
|
||||
@memory.cache
|
||||
def make_sparse_regression(
|
||||
n_samples: int, n_features: int, sparsity: float, as_dense: bool
|
||||
@ -530,8 +576,7 @@ def make_sparse_regression(
|
||||
|
||||
# Use multi-thread to speed up the generation, convenient if you use this function
|
||||
# for benchmarking.
|
||||
n_threads = multiprocessing.cpu_count()
|
||||
n_threads = min(n_threads, n_features)
|
||||
n_threads = min(multiprocessing.cpu_count(), n_features)
|
||||
|
||||
def random_csc(t_id: int) -> sparse.csc_matrix:
|
||||
rng = np.random.default_rng(1994 * t_id)
|
||||
@ -653,7 +698,7 @@ _unweighted_datasets_strategy = strategies.sampled_from(
|
||||
|
||||
|
||||
@strategies.composite
|
||||
def _dataset_weight_margin(draw):
|
||||
def _dataset_weight_margin(draw: Callable) -> TestDataset:
|
||||
data: TestDataset = draw(_unweighted_datasets_strategy)
|
||||
if draw(strategies.booleans()):
|
||||
data.w = draw(
|
||||
@ -673,6 +718,7 @@ def _dataset_weight_margin(draw):
|
||||
elements=strategies.floats(0.5, 1.0),
|
||||
)
|
||||
)
|
||||
assert data.margin is not None
|
||||
if num_class != 1:
|
||||
data.margin = data.margin.reshape(data.y.shape[0], num_class)
|
||||
|
||||
@ -684,24 +730,24 @@ def _dataset_weight_margin(draw):
|
||||
dataset_strategy = _dataset_weight_margin()
|
||||
|
||||
|
||||
def non_increasing(L, tolerance=1e-4):
|
||||
def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:
|
||||
return all((y - x) < tolerance for x, y in zip(L, L[1:]))
|
||||
|
||||
|
||||
def eval_error_metric(predt, dtrain: xgb.DMatrix):
|
||||
def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
|
||||
"""Evaluation metric for xgb.train"""
|
||||
label = dtrain.get_label()
|
||||
r = np.zeros(predt.shape)
|
||||
gt = predt > 0.5
|
||||
if predt.size == 0:
|
||||
return "CustomErr", 0
|
||||
return "CustomErr", np.float64(0.0)
|
||||
r[gt] = 1 - label[gt]
|
||||
le = predt <= 0.5
|
||||
r[le] = label[le]
|
||||
return 'CustomErr', np.sum(r)
|
||||
return "CustomErr", np.sum(r)
|
||||
|
||||
|
||||
def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> float:
|
||||
def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> np.float64:
|
||||
"""Evaluation metric that looks like metrics provided by sklearn."""
|
||||
r = np.zeros(y_score.shape)
|
||||
gt = y_score > 0.5
|
||||
@ -717,13 +763,15 @@ def root_mean_square(y_true: np.ndarray, y_score: np.ndarray) -> float:
|
||||
return rmse
|
||||
|
||||
|
||||
def softmax(x):
|
||||
def softmax(x: np.ndarray) -> np.ndarray:
|
||||
e = np.exp(x)
|
||||
return e / np.sum(e)
|
||||
|
||||
|
||||
def softprob_obj(classes):
|
||||
def objective(labels, predt):
|
||||
def softprob_obj(classes: int) -> SklObjective:
|
||||
def objective(
|
||||
labels: np.ndarray, predt: np.ndarray
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
rows = labels.shape[0]
|
||||
grad = np.zeros((rows, classes), dtype=float)
|
||||
hess = np.zeros((rows, classes), dtype=float)
|
||||
@ -746,29 +794,33 @@ def softprob_obj(classes):
|
||||
|
||||
|
||||
class DirectoryExcursion:
|
||||
def __init__(self, path: os.PathLike, cleanup=False):
|
||||
'''Change directory. Change back and optionally cleaning up the directory when exit.
|
||||
"""Change directory. Change back and optionally cleaning up the directory when
|
||||
exit.
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, path: os.PathLike, cleanup: bool = False):
|
||||
self.path = path
|
||||
self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))
|
||||
self.cleanup = cleanup
|
||||
self.files = {}
|
||||
self.files: Set[str] = set()
|
||||
|
||||
def __enter__(self):
|
||||
def __enter__(self) -> None:
|
||||
os.chdir(self.path)
|
||||
if self.cleanup:
|
||||
self.files = {
|
||||
os.path.join(root, f)
|
||||
for root, subdir, files in os.walk(self.path) for f in files
|
||||
for root, subdir, files in os.walk(os.path.expanduser(self.path))
|
||||
for f in files
|
||||
}
|
||||
|
||||
def __exit__(self, *args):
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
os.chdir(self.curdir)
|
||||
if self.cleanup:
|
||||
files = {
|
||||
os.path.join(root, f)
|
||||
for root, subdir, files in os.walk(self.path) for f in files
|
||||
for root, subdir, files in os.walk(os.path.expanduser(self.path))
|
||||
for f in files
|
||||
}
|
||||
diff = files.difference(self.files)
|
||||
for f in diff:
|
||||
@ -776,7 +828,7 @@ class DirectoryExcursion:
|
||||
|
||||
|
||||
@contextmanager
|
||||
def captured_output():
|
||||
def captured_output() -> Generator[Tuple[StringIO, StringIO], None, None]:
|
||||
"""Reassign stdout temporarily in order to test printed statements
|
||||
Taken from:
|
||||
https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
|
||||
@ -793,14 +845,46 @@ def captured_output():
|
||||
sys.stdout, sys.stderr = old_out, old_err
|
||||
|
||||
|
||||
try:
|
||||
# Python 3.7+
|
||||
from contextlib import nullcontext as noop_context
|
||||
except ImportError:
|
||||
# Python 3.6
|
||||
from contextlib import suppress as noop_context
|
||||
def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
|
||||
"""Make a pytest mark for the `pytest-timeout` package.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
sec :
|
||||
Timeout seconds.
|
||||
enable :
|
||||
Control whether timeout should be applied, used for debugging.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pytest.mark.timeout
|
||||
"""
|
||||
|
||||
if enable:
|
||||
return pytest.mark.timeout(sec, *args, **kwargs)
|
||||
return pytest.mark.timeout(None, *args, **kwargs)
|
||||
|
||||
|
||||
CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
|
||||
PROJECT_ROOT = os.path.normpath(
|
||||
os.path.join(CURDIR, os.path.pardir, os.path.pardir))
|
||||
def demo_dir(path: str) -> str:
|
||||
"""Look for the demo directory based on the test file name."""
|
||||
path = normpath(os.path.dirname(path))
|
||||
while True:
|
||||
subdirs = [f.path for f in os.scandir(path) if f.is_dir()]
|
||||
subdirs = [os.path.basename(d) for d in subdirs]
|
||||
if "demo" in subdirs:
|
||||
return os.path.join(path, "demo")
|
||||
new_path = normpath(os.path.join(path, os.path.pardir))
|
||||
assert new_path != path
|
||||
path = new_path
|
||||
|
||||
|
||||
def normpath(path: str) -> str:
|
||||
return os.path.normpath(os.path.abspath(path))
|
||||
|
||||
|
||||
def data_dir(path: str) -> str:
|
||||
return os.path.join(demo_dir(path), "data")
|
||||
|
||||
|
||||
def project_root(path: str) -> str:
|
||||
return normpath(os.path.join(demo_dir(path), os.path.pardir))
|
||||
@ -121,12 +121,14 @@ if __name__ == "__main__":
|
||||
"python-package/xgboost/sklearn.py",
|
||||
"python-package/xgboost/spark",
|
||||
"python-package/xgboost/federated.py",
|
||||
"python-package/xgboost/testing.py",
|
||||
"python-package/xgboost/testing",
|
||||
# tests
|
||||
"tests/python/test_config.py",
|
||||
"tests/python/test_data_iterator.py",
|
||||
"tests/python/test_spark/",
|
||||
"tests/python/test_quantile_dmatrix.py",
|
||||
"tests/python-gpu/test_gpu_spark/",
|
||||
"tests/python-gpu/test_gpu_data_iterator.py",
|
||||
"tests/ci_build/lint_python.py",
|
||||
# demo
|
||||
"demo/guide-python/cat_in_the_dat.py",
|
||||
|
||||
@ -1,9 +1,7 @@
|
||||
import sys
|
||||
import pytest
|
||||
import logging
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm # noqa
|
||||
from xgboost import testing as tm # noqa
|
||||
|
||||
|
||||
def has_rmm():
|
||||
try:
|
||||
@ -34,8 +32,8 @@ def local_cuda_client(request, pytestconfig):
|
||||
kwargs['rmm_pool_size'] = '2GB'
|
||||
if tm.no_dask_cuda()['condition']:
|
||||
raise ImportError('The local_cuda_cluster fixture requires dask_cuda package')
|
||||
from dask_cuda import LocalCUDACluster
|
||||
from dask.distributed import Client
|
||||
from dask_cuda import LocalCUDACluster
|
||||
yield Client(LocalCUDACluster(**kwargs))
|
||||
|
||||
def pytest_addoption(parser):
|
||||
|
||||
@ -1,16 +1,14 @@
|
||||
'''Loading a pickled model generated by test_pickling.py, only used by
|
||||
`test_gpu_with_dask.py`'''
|
||||
import os
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import json
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import sys
|
||||
from test_gpu_pickling import build_dataset, load_pickle, model_path
|
||||
|
||||
from test_gpu_pickling import build_dataset, model_path, load_pickle
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
|
||||
class TestLoadPickle:
|
||||
|
||||
@ -5,10 +5,10 @@ import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_quantile_dmatrix as tqd
|
||||
import testing as tm
|
||||
|
||||
|
||||
class TestDeviceQuantileDMatrix:
|
||||
|
||||
@ -2,11 +2,12 @@ import json
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
from test_dmatrix import set_base_margin_info
|
||||
|
||||
|
||||
@ -85,8 +86,8 @@ def _test_from_cudf(DMatrixT):
|
||||
|
||||
|
||||
def _test_cudf_training(DMatrixT):
|
||||
from cudf import DataFrame as df
|
||||
import pandas as pd
|
||||
from cudf import DataFrame as df
|
||||
np.random.seed(1)
|
||||
X = pd.DataFrame(np.random.randn(50, 10))
|
||||
y = pd.DataFrame(np.random.randn(50))
|
||||
@ -109,8 +110,8 @@ def _test_cudf_training(DMatrixT):
|
||||
|
||||
|
||||
def _test_cudf_metainfo(DMatrixT):
|
||||
from cudf import DataFrame as df
|
||||
import pandas as pd
|
||||
from cudf import DataFrame as df
|
||||
n = 100
|
||||
X = np.random.random((n, 2))
|
||||
dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))
|
||||
@ -247,9 +248,9 @@ Arrow specification.'''
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_cudf_training_with_sklearn():
|
||||
import pandas as pd
|
||||
from cudf import DataFrame as df
|
||||
from cudf import Series as ss
|
||||
import pandas as pd
|
||||
np.random.seed(1)
|
||||
X = pd.DataFrame(np.random.randn(50, 10))
|
||||
y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))
|
||||
|
||||
@ -1,12 +1,15 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
from test_dmatrix import set_base_margin_info
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
|
||||
def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
|
||||
'''Test constructing DMatrix from cupy'''
|
||||
|
||||
@ -1,13 +1,18 @@
|
||||
import sys
|
||||
import os
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_basic_models as test_bm
|
||||
|
||||
# Don't import the test class, otherwise they will run twice.
|
||||
import test_callback as test_cb # noqa
|
||||
import test_basic_models as test_bm
|
||||
import testing as tm
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
|
||||
@ -1,13 +1,12 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
from hypothesis import given, strategies, settings
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
from xgboost.testing import no_cupy
|
||||
|
||||
sys.path.append("tests/python")
|
||||
from test_data_iterator import test_single_batch as cpu_single_batch
|
||||
from test_data_iterator import run_data_iterator
|
||||
from testing import no_cupy
|
||||
from test_data_iterator import test_single_batch as cpu_single_batch
|
||||
|
||||
|
||||
def test_gpu_single_batch() -> None:
|
||||
@ -24,7 +23,11 @@ def test_gpu_single_batch() -> None:
|
||||
)
|
||||
@settings(deadline=None, max_examples=10, print_blob=True)
|
||||
def test_gpu_data_iterator(
|
||||
n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool
|
||||
n_samples_per_batch: int,
|
||||
n_features: int,
|
||||
n_batches: int,
|
||||
subsample: bool,
|
||||
use_cupy: bool,
|
||||
) -> None:
|
||||
run_data_iterator(
|
||||
n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
import test_demos as td # noqa
|
||||
|
||||
|
||||
@ -31,6 +34,6 @@ def test_categorical_demo():
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.mgpu
|
||||
def test_dask_training():
|
||||
script = os.path.join(tm.PROJECT_ROOT, 'demo', 'dask', 'gpu_training.py')
|
||||
script = os.path.join(tm.demo_dir(__file__), 'dask', 'gpu_training.py')
|
||||
cmd = ['python', script]
|
||||
subprocess.check_call(cmd)
|
||||
@ -1,7 +1,9 @@
|
||||
import sys
|
||||
import xgboost
|
||||
|
||||
import pytest
|
||||
|
||||
import xgboost
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_eval_metrics as test_em # noqa
|
||||
|
||||
|
||||
@ -1,8 +1,11 @@
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
sys.path.append("tests/python")
|
||||
# Don't import the test class, otherwise they will run twice.
|
||||
import test_interaction_constraints as test_ic # noqa
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
|
||||
@ -1,15 +1,10 @@
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from hypothesis import assume, given, note, settings, strategies
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
|
||||
pytestmark = testing.timeout(10)
|
||||
pytestmark = tm.timeout(10)
|
||||
|
||||
parameter_strategy = strategies.fixed_dictionaries({
|
||||
'booster': strategies.just('gblinear'),
|
||||
|
||||
@ -3,20 +3,17 @@ import json
|
||||
import os
|
||||
import pickle
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import XGBClassifier, testing
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
from xgboost import XGBClassifier
|
||||
from xgboost import testing as tm
|
||||
|
||||
model_path = './model.pkl'
|
||||
|
||||
pytestmark = testing.timeout(30)
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
|
||||
def build_dataset():
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
import test_plotting as tp
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_plotting as tp
|
||||
|
||||
pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))
|
||||
|
||||
|
||||
@ -6,7 +6,7 @@ from hypothesis import assume, given, settings, strategies
|
||||
from xgboost.compat import PANDAS_INSTALLED
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
if PANDAS_INSTALLED:
|
||||
from hypothesis.extra.pandas import column, data_frames, range_indexes
|
||||
@ -16,7 +16,6 @@ else:
|
||||
column, data_frames, range_indexes = noop, noop, noop
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
from test_predict import run_predict_leaf # noqa
|
||||
from test_predict import run_threaded_predict # noqa
|
||||
|
||||
@ -33,7 +32,7 @@ predict_parameter_strategy = strategies.fixed_dictionaries({
|
||||
'num_parallel_tree': strategies.sampled_from([1, 4]),
|
||||
})
|
||||
|
||||
pytestmark = testing.timeout(20)
|
||||
pytestmark = tm.timeout(20)
|
||||
|
||||
|
||||
class TestGPUPredict:
|
||||
@ -227,8 +226,8 @@ class TestGPUPredict:
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_inplace_predict_cudf(self):
|
||||
import cupy as cp
|
||||
import cudf
|
||||
import cupy as cp
|
||||
import pandas as pd
|
||||
rows = 1000
|
||||
cols = 10
|
||||
@ -379,8 +378,8 @@ class TestGPUPredict:
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.parametrize("n_classes", [2, 3])
|
||||
def test_predict_dart(self, n_classes):
|
||||
from sklearn.datasets import make_classification
|
||||
import cupy as cp
|
||||
from sklearn.datasets import make_classification
|
||||
n_samples = 1000
|
||||
X_, y_ = make_classification(
|
||||
n_samples=n_samples, n_informative=5, n_classes=n_classes
|
||||
|
||||
@ -1,20 +1,15 @@
|
||||
import itertools
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import urllib.request
|
||||
import zipfile
|
||||
|
||||
import numpy as np
|
||||
|
||||
import xgboost
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
|
||||
import testing as tm # noqa
|
||||
|
||||
pytestmark = testing.timeout(10)
|
||||
pytestmark = tm.timeout(10)
|
||||
|
||||
|
||||
class TestRanking:
|
||||
@ -24,8 +19,9 @@ class TestRanking:
|
||||
Download and setup the test fixtures
|
||||
"""
|
||||
from sklearn.datasets import load_svmlight_files
|
||||
|
||||
# download the test data
|
||||
cls.dpath = os.path.join(tm.PROJECT_ROOT, "demo/rank/")
|
||||
cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
|
||||
src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
|
||||
target = os.path.join(cls.dpath, "MQ2008.zip")
|
||||
|
||||
|
||||
@ -1,13 +1,8 @@
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
sys.path.append("tests/python")
|
||||
|
||||
import testing as tm
|
||||
from xgboost import testing as tm
|
||||
|
||||
if tm.no_spark()["condition"]:
|
||||
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
|
||||
@ -15,6 +10,7 @@ if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
|
||||
pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
|
||||
|
||||
|
||||
sys.path.append("tests/python")
|
||||
from test_spark.test_data import run_dmatrix_ctor
|
||||
|
||||
|
||||
|
||||
@ -6,8 +6,7 @@ import sys
|
||||
import pytest
|
||||
import sklearn
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
from xgboost import testing as tm
|
||||
|
||||
if tm.no_spark()["condition"]:
|
||||
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
|
||||
@ -6,13 +6,12 @@ import pytest
|
||||
from hypothesis import assume, given, note, settings, strategies
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_updaters as test_up
|
||||
import testing as tm
|
||||
|
||||
pytestmark = testing.timeout(30)
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
parameter_strategy = strategies.fixed_dictionaries({
|
||||
'max_depth': strategies.integers(0, 11),
|
||||
|
||||
@ -1,52 +1,54 @@
|
||||
"""Copyright 2019-2022 XGBoost contributors"""
|
||||
import sys
|
||||
import os
|
||||
from typing import Type, TypeVar, Any, Dict, List, Union
|
||||
import pytest
|
||||
import numpy as np
|
||||
import asyncio
|
||||
import xgboost
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
from inspect import signature
|
||||
from hypothesis import given, strategies, settings, note
|
||||
from typing import Any, Dict, Type, TypeVar, Union
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, note, settings, strategies
|
||||
from hypothesis._settings import duration
|
||||
from test_gpu_updaters import parameter_strategy
|
||||
|
||||
import xgboost
|
||||
from xgboost import testing as tm
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm # noqa
|
||||
|
||||
if tm.no_dask_cuda()["condition"]:
|
||||
pytest.skip(tm.no_dask_cuda()["reason"], allow_module_level=True)
|
||||
|
||||
|
||||
from test_with_dask import run_empty_dmatrix_reg # noqa
|
||||
from test_with_dask import run_empty_dmatrix_auc # noqa
|
||||
from test_with_dask import _get_client_workers # noqa
|
||||
from test_with_dask import generate_array # noqa
|
||||
from test_with_dask import make_categorical # noqa
|
||||
from test_with_dask import run_auc # noqa
|
||||
from test_with_dask import run_boost_from_prediction # noqa
|
||||
from test_with_dask import run_boost_from_prediction_multi_class # noqa
|
||||
from test_with_dask import run_dask_classifier # noqa
|
||||
from test_with_dask import run_empty_dmatrix_cls # noqa
|
||||
from test_with_dask import _get_client_workers # noqa
|
||||
from test_with_dask import generate_array # noqa
|
||||
from test_with_dask import kCols as random_cols # noqa
|
||||
from test_with_dask import suppress # noqa
|
||||
from test_with_dask import run_tree_stats # noqa
|
||||
from test_with_dask import run_categorical # noqa
|
||||
from test_with_dask import make_categorical # noqa
|
||||
|
||||
from test_with_dask import run_dask_classifier # noqa
|
||||
from test_with_dask import run_empty_dmatrix_auc # noqa
|
||||
from test_with_dask import run_empty_dmatrix_cls # noqa
|
||||
from test_with_dask import run_empty_dmatrix_reg # noqa
|
||||
from test_with_dask import run_tree_stats # noqa
|
||||
from test_with_dask import suppress # noqa
|
||||
from test_with_dask import kCols as random_cols # noqa
|
||||
|
||||
try:
|
||||
import dask.dataframe as dd
|
||||
from xgboost import dask as dxgb
|
||||
import xgboost as xgb
|
||||
from dask.distributed import Client
|
||||
from dask import array as da
|
||||
from dask_cuda import LocalCUDACluster, utils
|
||||
import cudf
|
||||
import dask.dataframe as dd
|
||||
from dask import array as da
|
||||
from dask.distributed import Client
|
||||
from dask_cuda import LocalCUDACluster, utils
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import dask as dxgb
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
@ -334,9 +336,9 @@ class TestDistributedGPU:
|
||||
|
||||
@pytest.mark.skipif(**tm.no_dask_cudf())
|
||||
def test_empty_partition(self, local_cuda_client: Client) -> None:
|
||||
import dask_cudf
|
||||
import cudf
|
||||
import cupy
|
||||
import dask_cudf
|
||||
|
||||
mult = 100
|
||||
df = cudf.DataFrame(
|
||||
|
||||
@ -1,13 +1,15 @@
|
||||
import json
|
||||
import xgboost as xgb
|
||||
import pytest
|
||||
import tempfile
|
||||
import sys
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm # noqa
|
||||
import test_with_sklearn as twskl # noqa
|
||||
|
||||
pytestmark = pytest.mark.skipif(**tm.no_sklearn())
|
||||
@ -38,9 +40,9 @@ def test_gpu_binary_classification():
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_boost_from_prediction_gpu_hist():
|
||||
from sklearn.datasets import load_breast_cancer, load_digits
|
||||
import cupy as cp
|
||||
import cudf
|
||||
import cupy as cp
|
||||
from sklearn.datasets import load_breast_cancer, load_digits
|
||||
|
||||
tree_method = "gpu_hist"
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
@ -68,12 +70,12 @@ def test_num_parallel_tree():
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_categorical():
|
||||
import pandas as pd
|
||||
import cudf
|
||||
import cupy as cp
|
||||
import pandas as pd
|
||||
from sklearn.datasets import load_svmlight_file
|
||||
|
||||
data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
|
||||
data_dir = tm.data_dir(__file__)
|
||||
X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train"))
|
||||
clf = xgb.XGBClassifier(
|
||||
tree_method="gpu_hist",
|
||||
@ -123,9 +125,9 @@ def test_categorical():
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_classififer():
|
||||
from sklearn.datasets import load_digits
|
||||
import cupy as cp
|
||||
import cudf
|
||||
import cupy as cp
|
||||
from sklearn.datasets import load_digits
|
||||
|
||||
X, y = load_digits(return_X_y=True)
|
||||
y *= 10
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import cupy as cp
|
||||
import time
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
|
||||
# Test for integer overflow or out of memory exceptions
|
||||
def test_large_input():
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
import test_monotone_constraints as tmc
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
import xgboost
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
|
||||
import xgboost
|
||||
|
||||
kRounds = 2
|
||||
kRows = 1000
|
||||
kCols = 4
|
||||
|
||||
@ -1,12 +1,13 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import os
|
||||
import xgboost as xgb
|
||||
import pytest
|
||||
import json
|
||||
from pathlib import Path
|
||||
import os
|
||||
import tempfile
|
||||
import testing as tm
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
dpath = 'demo/data/'
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
@ -1,13 +1,15 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import os
|
||||
import json
|
||||
import testing as tm
|
||||
import pytest
|
||||
import locale
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
dpath = tm.data_dir(__file__)
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
@ -36,8 +38,8 @@ class TestModels:
|
||||
param = {'verbosity': 0, 'objective': 'binary:logistic',
|
||||
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
|
||||
'nthread': 1}
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 4
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||
@ -49,8 +51,8 @@ class TestModels:
|
||||
assert err < 0.2
|
||||
|
||||
def test_dart(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
param = {'max_depth': 5, 'objective': 'binary:logistic',
|
||||
'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1}
|
||||
# specify validations set to watch performance
|
||||
@ -116,7 +118,7 @@ class TestModels:
|
||||
|
||||
def test_boost_from_prediction(self):
|
||||
# Re-construct dtrain here to avoid modification
|
||||
margined = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
margined = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
bst = xgb.train({'tree_method': 'hist'}, margined, 1)
|
||||
predt_0 = bst.predict(margined, output_margin=True)
|
||||
margined.set_base_margin(predt_0)
|
||||
@ -124,13 +126,13 @@ class TestModels:
|
||||
predt_1 = bst.predict(margined)
|
||||
|
||||
assert np.any(np.abs(predt_1 - predt_0) > 1e-6)
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
bst = xgb.train({'tree_method': 'hist'}, dtrain, 2)
|
||||
predt_2 = bst.predict(dtrain)
|
||||
assert np.all(np.abs(predt_2 - predt_1) < 1e-6)
|
||||
|
||||
def test_boost_from_existing_model(self):
|
||||
X = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
X = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4)
|
||||
assert booster.num_boosted_rounds() == 4
|
||||
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4,
|
||||
@ -150,8 +152,8 @@ class TestModels:
|
||||
'objective': 'reg:logistic',
|
||||
"tree_method": tree_method
|
||||
}
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 10
|
||||
|
||||
@ -197,8 +199,8 @@ class TestModels:
|
||||
self.run_custom_objective()
|
||||
|
||||
def test_multi_eval_metric(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
@ -220,7 +222,7 @@ class TestModels:
|
||||
param['scale_pos_weight'] = ratio
|
||||
return (dtrain, dtest, param)
|
||||
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'auc'}, seed=0, fpreproc=fpreproc)
|
||||
|
||||
@ -228,7 +230,7 @@ class TestModels:
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic'}
|
||||
num_round = 2
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'error'}, seed=0, show_stdv=False)
|
||||
|
||||
@ -346,7 +348,7 @@ class TestModels:
|
||||
os.remove(model_path)
|
||||
|
||||
try:
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
|
||||
except ValueError as e:
|
||||
e_str = str(e)
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
from typing import Union
|
||||
import xgboost as xgb
|
||||
import pytest
|
||||
import os
|
||||
import testing as tm
|
||||
import tempfile
|
||||
from contextlib import nullcontext
|
||||
from typing import Union
|
||||
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
# We use the dataset for tests.
|
||||
pytestmark = pytest.mark.skipif(**tm.no_sklearn())
|
||||
@ -271,13 +274,14 @@ class TestCallbacks:
|
||||
"""Test learning rate scheduler, used by both CPU and GPU tests."""
|
||||
scheduler = xgb.callback.LearningRateScheduler
|
||||
|
||||
dpath = os.path.join(tm.PROJECT_ROOT, 'demo/data/')
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dpath = tm.data_dir(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 4
|
||||
|
||||
warning_check = tm.noop_context()
|
||||
warning_check = nullcontext()
|
||||
|
||||
# learning_rates as a list
|
||||
# init eta with 0 to check whether learning_rates work
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
import os
|
||||
import tempfile
|
||||
import platform
|
||||
import xgboost
|
||||
import subprocess
|
||||
import numpy
|
||||
import json
|
||||
import testing as tm
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
import numpy
|
||||
|
||||
import xgboost
|
||||
from xgboost import testing as tm
|
||||
|
||||
|
||||
class TestCLI:
|
||||
@ -29,7 +31,7 @@ data = {data_path}
|
||||
eval[test] = {data_path}
|
||||
'''
|
||||
|
||||
PROJECT_ROOT = tm.PROJECT_ROOT
|
||||
PROJECT_ROOT = tm.project_root(__file__)
|
||||
|
||||
def get_exe(self):
|
||||
if platform.system() == 'Windows':
|
||||
|
||||
@ -1,14 +1,16 @@
|
||||
from typing import Dict, List
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy.sparse import csr_matrix
|
||||
from testing import IteratorForTest, make_batches, non_increasing
|
||||
from xgboost.data import SingleBatchInternalIter as SingleBatch
|
||||
from xgboost.testing import IteratorForTest, make_batches, non_increasing
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = testing.timeout(30)
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
|
||||
def test_single_batch(tree_method: str = "approx") -> None:
|
||||
@ -83,7 +85,7 @@ def run_data_iterator(
|
||||
if tree_method == "gpu_hist":
|
||||
parameters["sampling_method"] = "gradient_based"
|
||||
|
||||
results_from_it: xgb.callback.EvaluationMonitor.EvalsLog = {}
|
||||
results_from_it: Dict[str, Dict[str, List[float]]] = {}
|
||||
from_it = xgb.train(
|
||||
parameters,
|
||||
Xy,
|
||||
@ -106,7 +108,7 @@ def run_data_iterator(
|
||||
assert Xy.num_row() == n_samples_per_batch * n_batches
|
||||
assert Xy.num_col() == n_features
|
||||
|
||||
results_from_arrays: xgb.callback.EvaluationMonitor.EvalsLog = {}
|
||||
results_from_arrays: Dict[str, Dict[str, List[float]]] = {}
|
||||
from_arrays = xgb.train(
|
||||
parameters,
|
||||
Xy,
|
||||
|
||||
@ -3,14 +3,12 @@ import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
import testing as tm
|
||||
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = testing.timeout(30)
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
ROOT_DIR = tm.PROJECT_ROOT
|
||||
DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
|
||||
DEMO_DIR = tm.demo_dir(__file__)
|
||||
PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, 'guide-python')
|
||||
CLI_DEMO_DIR = os.path.join(DEMO_DIR, 'CLI')
|
||||
|
||||
@ -156,7 +154,7 @@ def test_cli_regression_demo():
|
||||
cmd = ['python', script, 'machine.txt', '1']
|
||||
subprocess.check_call(cmd, cwd=reg_dir)
|
||||
|
||||
exe = os.path.join(tm.PROJECT_ROOT, 'xgboost')
|
||||
exe = os.path.join(DEMO_DIR, os.path.pardir, 'xgboost')
|
||||
conf = os.path.join(reg_dir, 'machine.conf')
|
||||
subprocess.check_call([exe, conf], cwd=reg_dir)
|
||||
|
||||
|
||||
@ -4,11 +4,11 @@ import tempfile
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse
|
||||
import testing as tm
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy.sparse import csr_matrix, rand
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
rng = np.random.RandomState(1)
|
||||
|
||||
|
||||
@ -1,9 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import pytest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import testing as tm
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
try:
|
||||
import datatable as dt
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
import xgboost as xgb
|
||||
import testing as tm
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
import xgboost as xgb
|
||||
import testing as tm
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
rng = np.random.RandomState(1337)
|
||||
|
||||
|
||||
@ -254,8 +255,8 @@ class TestEvalMetrics:
|
||||
self.run_roc_auc_multi("hist", n_samples, weighted)
|
||||
|
||||
def run_pr_auc_binary(self, tree_method):
|
||||
from sklearn.metrics import precision_recall_curve, auc
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.metrics import auc, precision_recall_curve
|
||||
X, y = make_classification(128, 4, n_classes=2, random_state=1994)
|
||||
clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
|
||||
clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import xgboost
|
||||
import testing as tm
|
||||
import pytest
|
||||
|
||||
import xgboost
|
||||
from xgboost import testing as tm
|
||||
|
||||
dpath = 'demo/data/'
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
@ -1,10 +1,9 @@
|
||||
import testing as tm
|
||||
from hypothesis import given, note, settings, strategies
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = testing.timeout(10)
|
||||
pytestmark = tm.timeout(10)
|
||||
|
||||
|
||||
parameter_strategy = strategies.fixed_dictionaries({
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
import xgboost
|
||||
import os
|
||||
import generate_models as gm
|
||||
import testing as tm
|
||||
import json
|
||||
import zipfile
|
||||
import pytest
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import urllib.request
|
||||
import zipfile
|
||||
|
||||
import generate_models as gm
|
||||
import pytest
|
||||
|
||||
import xgboost
|
||||
from xgboost import testing as tm
|
||||
|
||||
|
||||
def run_model_param_check(config):
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import testing as tm
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
dpath = 'demo/data/'
|
||||
|
||||
|
||||
|
||||
@ -4,12 +4,11 @@ import tempfile
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import testing as tm
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = testing.timeout(10)
|
||||
pytestmark = tm.timeout(10)
|
||||
|
||||
|
||||
class TestOMP:
|
||||
@ -86,7 +85,7 @@ class TestOMP:
|
||||
def test_with_omp_thread_limit(self):
|
||||
args = [
|
||||
"python", os.path.join(
|
||||
tm.PROJECT_ROOT, "tests", "python", "with_omp_limit.py"
|
||||
os.path.dirname(tm.normpath(__file__)), "with_omp_limit.py"
|
||||
)
|
||||
]
|
||||
results = []
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
import pytest
|
||||
import testing as tm
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = pytest.mark.skipif(**tm.no_pandas())
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
import pickle
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import os
|
||||
import json
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
kRows = 100
|
||||
kCols = 10
|
||||
|
||||
@ -1,15 +1,16 @@
|
||||
import json
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import testing as tm
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
try:
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
from matplotlib.axes import Axes
|
||||
from graphviz import Source
|
||||
from matplotlib.axes import Axes
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
@ -1,12 +1,13 @@
|
||||
'''Tests for running inplace prediction.'''
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
import pytest
|
||||
import pandas as pd
|
||||
|
||||
import testing as tm
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from scipy import sparse
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
|
||||
def run_threaded_predict(X, rows, predict_func):
|
||||
|
||||
@ -4,7 +4,7 @@ import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy import sparse
|
||||
from testing import (
|
||||
from xgboost.testing import (
|
||||
IteratorForTest,
|
||||
make_batches,
|
||||
make_batches_sparse,
|
||||
|
||||
@ -1,13 +1,15 @@
|
||||
import numpy as np
|
||||
from scipy.sparse import csr_matrix
|
||||
import testing as tm
|
||||
import xgboost
|
||||
import os
|
||||
import itertools
|
||||
import os
|
||||
import shutil
|
||||
import urllib.request
|
||||
import zipfile
|
||||
|
||||
import numpy as np
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
import xgboost
|
||||
from xgboost import testing as tm
|
||||
|
||||
|
||||
def test_ranking_with_unweighted_data():
|
||||
Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import itertools
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import scipy
|
||||
import scipy.special
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
dpath = 'demo/data/'
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
@ -4,7 +4,8 @@ from typing import List
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import testing as tm
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
if tm.no_spark()["condition"]:
|
||||
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
|
||||
|
||||
@ -6,10 +6,9 @@ import uuid
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import testing as tm
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
if tm.no_spark()["condition"]:
|
||||
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
|
||||
@ -38,7 +37,7 @@ from .utils import SparkTestCase
|
||||
|
||||
logging.getLogger("py4j").setLevel(logging.INFO)
|
||||
|
||||
pytestmark = testing.timeout(60)
|
||||
pytestmark = tm.timeout(60)
|
||||
|
||||
|
||||
class XgboostLocalTest(SparkTestCase):
|
||||
|
||||
@ -6,7 +6,8 @@ import uuid
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import testing as tm
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
if tm.no_spark()["condition"]:
|
||||
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
|
||||
|
||||
@ -6,9 +6,10 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
import testing as tm
|
||||
from six import StringIO
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
if tm.no_spark()["condition"]:
|
||||
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
|
||||
if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
import testing as tm
|
||||
import pytest
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import json
|
||||
import os
|
||||
|
||||
dpath = os.path.join(tm.PROJECT_ROOT, 'demo', 'data')
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
dpath = tm.data_dir(__file__)
|
||||
|
||||
|
||||
def test_aft_survival_toy_data():
|
||||
|
||||
@ -3,10 +3,10 @@ import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import testing as tm
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import RabitTracker, testing
|
||||
from xgboost import RabitTracker
|
||||
from xgboost import testing as tm
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
||||
@ -61,7 +61,7 @@ def test_rabit_ops():
|
||||
run_rabit_ops(client, n_workers)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**testing.skip_ipv6())
|
||||
@pytest.mark.skipif(**tm.no_ipv6())
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
def test_rabit_ops_ipv6():
|
||||
import dask
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
import xgboost as xgb
|
||||
import testing as tm
|
||||
import numpy as np
|
||||
import pytest
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
rng = np.random.RandomState(1337)
|
||||
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
|
||||
from numpy.testing import assert_approx_equal
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))
|
||||
|
||||
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
import json
|
||||
from string import ascii_lowercase
|
||||
from typing import Dict, Any
|
||||
import testing as tm
|
||||
import pytest
|
||||
import xgboost as xgb
|
||||
from typing import Any, Dict
|
||||
|
||||
import numpy as np
|
||||
from hypothesis import given, strategies, settings, note
|
||||
import pytest
|
||||
from hypothesis import given, note, settings, strategies
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
exact_parameter_strategy = strategies.fixed_dictionaries({
|
||||
'nthread': strategies.integers(1, 4),
|
||||
|
||||
@ -1,14 +1,16 @@
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
import testing as tm
|
||||
import xgboost as xgb
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pyarrow.csv as pc
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
@ -73,7 +75,7 @@ class TestArrowTable(unittest.TestCase):
|
||||
np.testing.assert_allclose(preds1, preds2)
|
||||
|
||||
def test_arrow_survival(self):
|
||||
data = os.path.join(tm.PROJECT_ROOT, "demo", "data", "veterans_lung_cancer.csv")
|
||||
data = os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
|
||||
table = pc.read_csv(data)
|
||||
y_lower_bound = table["Survival_label_lower_bound"]
|
||||
y_upper_bound = table["Survival_label_upper_bound"]
|
||||
|
||||
@ -20,7 +20,6 @@ import numpy as np
|
||||
import pytest
|
||||
import scipy
|
||||
import sklearn
|
||||
import testing as tm
|
||||
from hypothesis import HealthCheck, given, note, settings
|
||||
from sklearn.datasets import make_classification, make_regression
|
||||
from test_predict import verify_leaf_output
|
||||
@ -29,7 +28,7 @@ from test_with_sklearn import run_data_initialization, run_feature_weights
|
||||
from xgboost.data import _is_cudf_df
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
||||
@ -45,7 +44,7 @@ from xgboost.dask import DaskDMatrix
|
||||
|
||||
dask.config.set({"distributed.scheduler.allowed-failures": False})
|
||||
|
||||
pytestmark = testing.timeout(30)
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
if hasattr(HealthCheck, 'function_scoped_fixture'):
|
||||
suppress = [HealthCheck.function_scoped_fixture]
|
||||
@ -1116,8 +1115,9 @@ def test_predict_with_meta(client: "Client") -> None:
|
||||
|
||||
|
||||
def run_aft_survival(client: "Client", dmatrix_t: Type) -> None:
|
||||
df = dd.read_csv(os.path.join(tm.PROJECT_ROOT, 'demo', 'data',
|
||||
'veterans_lung_cancer.csv'))
|
||||
df = dd.read_csv(
|
||||
os.path.join(tm.data_dir(__file__), "veterans_lung_cancer.csv")
|
||||
)
|
||||
y_lower_bound = df['Survival_label_lower_bound']
|
||||
y_upper_bound = df['Survival_label_upper_bound']
|
||||
X = df.drop(['Survival_label_lower_bound',
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import testing as tm
|
||||
import pytest
|
||||
from test_dmatrix import set_base_margin_info
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
try:
|
||||
import modin.pandas as md
|
||||
except ImportError:
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import testing as tm
|
||||
import pytest
|
||||
from test_dmatrix import set_base_margin_info
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
try:
|
||||
import shap
|
||||
except ImportError:
|
||||
|
||||
@ -8,14 +8,13 @@ from typing import Callable, Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import testing as tm
|
||||
from sklearn.utils.estimator_checks import parametrize_with_checks
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing
|
||||
from xgboost import testing as tm
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), testing.timeout(30)]
|
||||
pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)]
|
||||
|
||||
|
||||
def test_binary_classification():
|
||||
@ -155,11 +154,10 @@ def test_ranking():
|
||||
|
||||
|
||||
def test_stacking_regression():
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
|
||||
from sklearn.linear_model import RidgeCV
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.ensemble import StackingRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
estimators = [
|
||||
@ -177,13 +175,13 @@ def test_stacking_regression():
|
||||
|
||||
|
||||
def test_stacking_classification():
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.ensemble import StackingClassifier
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.svm import LinearSVC
|
||||
|
||||
X, y = load_iris(return_X_y=True)
|
||||
estimators = [
|
||||
@ -354,8 +352,8 @@ def test_num_parallel_tree():
|
||||
|
||||
|
||||
def test_regression():
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
X, y = fetch_california_housing(return_X_y=True)
|
||||
@ -383,8 +381,8 @@ def test_regression():
|
||||
|
||||
|
||||
def run_housing_rf_regression(tree_method):
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
X, y = fetch_california_housing(return_X_y=True)
|
||||
@ -407,8 +405,8 @@ def test_rf_regression():
|
||||
|
||||
|
||||
def test_parameter_tuning():
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
||||
X, y = fetch_california_housing(return_X_y=True)
|
||||
xgb_model = xgb.XGBRegressor(learning_rate=0.1)
|
||||
@ -421,8 +419,8 @@ def test_parameter_tuning():
|
||||
|
||||
|
||||
def test_regression_with_custom_objective():
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
def objective_ls(y_true, y_pred):
|
||||
@ -539,8 +537,8 @@ def test_sklearn_plotting():
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
|
||||
from matplotlib.axes import Axes
|
||||
from graphviz import Source
|
||||
from matplotlib.axes import Axes
|
||||
|
||||
ax = xgb.plot_importance(classifier)
|
||||
assert isinstance(ax, Axes)
|
||||
@ -666,8 +664,8 @@ def test_kwargs_error():
|
||||
|
||||
|
||||
def test_kwargs_grid_search():
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn import datasets
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
||||
params = {'tree_method': 'hist'}
|
||||
clf = xgb.XGBClassifier(n_estimators=1, learning_rate=1.0, **params)
|
||||
@ -841,9 +839,7 @@ def test_save_load_model():
|
||||
|
||||
|
||||
def test_RFECV():
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
|
||||
from sklearn.feature_selection import RFECV
|
||||
|
||||
# Regression
|
||||
@ -1046,8 +1042,9 @@ def run_feature_weights(X, y, fw, tree_method, model=xgb.XGBRegressor):
|
||||
with open(model_path) as fd:
|
||||
model = json.load(fd)
|
||||
|
||||
parser_path = os.path.join(tm.PROJECT_ROOT, 'demo', 'json-model',
|
||||
'json_parser.py')
|
||||
parser_path = os.path.join(
|
||||
tm.demo_dir(__file__), "json-model", "json_parser.py"
|
||||
)
|
||||
spec = importlib.util.spec_from_file_location("JsonParser",
|
||||
parser_path)
|
||||
foo = importlib.util.module_from_spec(spec)
|
||||
@ -1162,8 +1159,8 @@ def run_boost_from_prediction_multi_clasas(
|
||||
|
||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
|
||||
def test_boost_from_prediction(tree_method):
|
||||
from sklearn.datasets import load_breast_cancer, load_iris, make_regression
|
||||
import pandas as pd
|
||||
from sklearn.datasets import load_breast_cancer, load_iris, make_regression
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
import xgboost as xgb
|
||||
import sys
|
||||
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.metrics import roc_auc_score
|
||||
import sys
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
|
||||
def run_omp(output_path: str):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user