Move Python testing utilities into xgboost module. (#8379)

- Add typehints. - Fixes for pylint. Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
2022-10-26 16:56:11 +08:00
parent 7e53189e7c
commit cf70864fa3
66 changed files with 652 additions and 595 deletions
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -65,7 +65,7 @@ def _check_rf_callback(
        )


-_SklObjective = Optional[
+SklObjective = Optional[
    Union[str, Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]]
 ]

@@ -144,7 +144,7 @@ __model_doc = f"""
        Boosting learning rate (xgb's "eta")
    verbosity : Optional[int]
        The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
-    objective : {_SklObjective}
+    objective : {SklObjective}
        Specify the learning task and the corresponding learning objective or
        a custom objective function to be used (see note below).
    booster: Optional[str]
@@ -546,7 +546,7 @@ class XGBModel(XGBModelBase):
        learning_rate: Optional[float] = None,
        n_estimators: int = 100,
        verbosity: Optional[int] = None,
-        objective: _SklObjective = None,
+        objective: SklObjective = None,
        booster: Optional[str] = None,
        tree_method: Optional[str] = None,
        n_jobs: Optional[int] = None,
@@ -1409,7 +1409,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
    def __init__(
        self,
        *,
-        objective: _SklObjective = "binary:logistic",
+        objective: SklObjective = "binary:logistic",
        use_label_encoder: Optional[bool] = None,
        **kwargs: Any,
    ) -> None:
@@ -1712,7 +1712,7 @@ class XGBRegressor(XGBModel, XGBRegressorBase):
    # pylint: disable=missing-docstring
    @_deprecate_positional_args
    def __init__(
-        self, *, objective: _SklObjective = "reg:squarederror", **kwargs: Any
+        self, *, objective: SklObjective = "reg:squarederror", **kwargs: Any
    ) -> None:
        super().__init__(objective=objective, **kwargs)

--- a/python-package/xgboost/testing.py
+++ b/python-package/xgboost/testing.py
@@ -1,64 +0,0 @@
-"""Utilities for defining Python tests."""
-
-import socket
-from platform import system
-from typing import Any, TypedDict
-
-PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
-
-
-def has_ipv6() -> bool:
-    """Check whether IPv6 is enabled on this host."""
-    # connection error in macos, still need some fixes.
-    if system() not in ("Linux", "Windows"):
-        return False
-
-    if socket.has_ipv6:
-        try:
-            with socket.socket(
-                socket.AF_INET6, socket.SOCK_STREAM
-            ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
-                server.bind(("::1", 0))
-                port = server.getsockname()[1]
-                server.listen()
-
-                client.connect(("::1", port))
-                conn, _ = server.accept()
-
-                client.sendall("abc".encode())
-                msg = conn.recv(3).decode()
-                # if the code can be executed to this point, the message should be
-                # correct.
-                assert msg == "abc"
-            return True
-        except OSError:
-            pass
-    return False
-
-
-def skip_ipv6() -> PytestSkip:
-    """PyTest skip mark for IPv6."""
-    return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
-
-
-def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
-    """Make a pytest mark for the `pytest-timeout` package.
-
-    Parameters
-    ----------
-    sec :
-        Timeout seconds.
-    enable :
-        Control whether timeout should be applied, used for debugging.
-
-    Returns
-    -------
-    pytest.mark.timeout
-    """
-    import pytest  # pylint: disable=import-error
-
-    # This is disabled for now due to regression caused by conflicts between federated
-    # learning build and the CI container environment.
-    if enable:
-        return pytest.mark.timeout(sec, *args, **kwargs)
-    return pytest.mark.timeout(None, *args, **kwargs)
--- a/python-package/xgboost/testing/init.py
+++ b/python-package/xgboost/testing/init.py
@@ -0,0 +1,890 @@
+"""Utilities for defining Python tests. The module is private and subject to frequent
+change without notice.
+
+"""
+# pylint: disable=invalid-name,missing-function-docstring,import-error
+import gc
+import importlib.util
+import multiprocessing
+import os
+import platform
+import socket
+import sys
+import urllib
+import zipfile
+from concurrent.futures import ThreadPoolExecutor
+from contextlib import contextmanager
+from io import StringIO
+from platform import system
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    TypedDict,
+    Union,
+)
+
+import numpy as np
+import pytest
+from scipy import sparse
+from xgboost.core import ArrayLike
+from xgboost.sklearn import SklObjective
+
+import xgboost as xgb
+
+hypothesis = pytest.importorskip("hypothesis")
+
+# pylint:disable=wrong-import-position,wrong-import-order
+from hypothesis import strategies
+from hypothesis.extra.numpy import arrays
+
+joblib = pytest.importorskip("joblib")
+datasets = pytest.importorskip("sklearn.datasets")
+
+Memory = joblib.Memory
+
+memory = Memory("./cachedir", verbose=0)
+
+PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
+
+
+def has_ipv6() -> bool:
+    """Check whether IPv6 is enabled on this host."""
+    # connection error in macos, still need some fixes.
+    if system() not in ("Linux", "Windows"):
+        return False
+
+    if socket.has_ipv6:
+        try:
+            with socket.socket(
+                socket.AF_INET6, socket.SOCK_STREAM
+            ) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
+                server.bind(("::1", 0))
+                port = server.getsockname()[1]
+                server.listen()
+
+                client.connect(("::1", port))
+                conn, _ = server.accept()
+
+                client.sendall("abc".encode())
+                msg = conn.recv(3).decode()
+                # if the code can be executed to this point, the message should be
+                # correct.
+                assert msg == "abc"
+            return True
+        except OSError:
+            pass
+    return False
+
+
+def no_mod(name: str) -> PytestSkip:
+    spec = importlib.util.find_spec(name)
+    return {"condition": spec is None, "reason": f"{name} is not installed."}
+
+
+def no_ipv6() -> PytestSkip:
+    """PyTest skip mark for IPv6."""
+    return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
+
+
+def no_ubjson() -> PytestSkip:
+    return no_mod("ubjson")
+
+
+def no_sklearn() -> PytestSkip:
+    return no_mod("sklearn")
+
+
+def no_dask() -> PytestSkip:
+    return no_mod("dask")
+
+
+def no_spark() -> PytestSkip:
+    return no_mod("pyspark")
+
+
+def no_pandas() -> PytestSkip:
+    return no_mod("pandas")
+
+
+def no_arrow() -> PytestSkip:
+    return no_mod("pyarrow")
+
+
+def no_modin() -> PytestSkip:
+    return no_mod("modin")
+
+
+def no_dt() -> PytestSkip:
+    return no_mod("datatable")
+
+
+def no_matplotlib() -> PytestSkip:
+    reason = "Matplotlib is not installed."
+    try:
+        import matplotlib.pyplot as _  # noqa
+
+        return {"condition": False, "reason": reason}
+    except ImportError:
+        return {"condition": True, "reason": reason}
+
+
+def no_dask_cuda() -> PytestSkip:
+    return no_mod("dask_cuda")
+
+
+def no_cudf() -> PytestSkip:
+    return no_mod("cudf")
+
+
+def no_cupy() -> PytestSkip:
+    return no_mod("cupy")
+
+
+def no_dask_cudf() -> PytestSkip:
+    return no_mod("dask_cudf")
+
+
+def no_json_schema() -> PytestSkip:
+    return no_mod("jsonschema")
+
+
+def no_graphviz() -> PytestSkip:
+    return no_mod("graphviz")
+
+
+def no_multiple(*args: Any) -> PytestSkip:
+    condition = False
+    reason = ""
+    for arg in args:
+        condition = condition or arg["condition"]
+        if arg["condition"]:
+            reason = arg["reason"]
+            break
+    return {"condition": condition, "reason": reason}
+
+
+def skip_s390x() -> PytestSkip:
+    condition = platform.machine() == "s390x"
+    reason = "Known to fail on s390x"
+    return {"condition": condition, "reason": reason}
+
+
+class IteratorForTest(xgb.core.DataIter):
+    """Iterator for testing streaming DMatrix. (external memory, quantile)"""
+
+    def __init__(
+        self,
+        X: Sequence,
+        y: Sequence,
+        w: Optional[Sequence],
+        cache: Optional[str] = "./",
+    ) -> None:
+        assert len(X) == len(y)
+        self.X = X
+        self.y = y
+        self.w = w
+        self.it = 0
+        super().__init__(cache)
+
+    def next(self, input_data: Callable) -> int:
+        if self.it == len(self.X):
+            return 0
+
+        with pytest.raises(TypeError, match="keyword args"):
+            input_data(self.X[self.it], self.y[self.it], None)
+
+        # Use copy to make sure the iterator doesn't hold a reference to the data.
+        input_data(
+            data=self.X[self.it].copy(),
+            label=self.y[self.it].copy(),
+            weight=self.w[self.it].copy() if self.w else None,
+        )
+        gc.collect()  # clear up the copy, see if XGBoost access freed memory.
+        self.it += 1
+        return 1
+
+    def reset(self) -> None:
+        self.it = 0
+
+    def as_arrays(
+        self,
+    ) -> Tuple[Union[np.ndarray, sparse.csr_matrix], ArrayLike, ArrayLike]:
+        if isinstance(self.X[0], sparse.csr_matrix):
+            X = sparse.vstack(self.X, format="csr")
+        else:
+            X = np.concatenate(self.X, axis=0)
+        y = np.concatenate(self.y, axis=0)
+        if self.w:
+            w = np.concatenate(self.w, axis=0)
+        else:
+            w = None
+        return X, y, w
+
+
+def make_batches(
+    n_samples_per_batch: int, n_features: int, n_batches: int, use_cupy: bool = False
+) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
+    X = []
+    y = []
+    w = []
+    if use_cupy:
+        import cupy
+
+        rng = cupy.random.RandomState(1994)
+    else:
+        rng = np.random.RandomState(1994)
+    for _ in range(n_batches):
+        _X = rng.randn(n_samples_per_batch, n_features)
+        _y = rng.randn(n_samples_per_batch)
+        _w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
+        X.append(_X)
+        y.append(_y)
+        w.append(_w)
+    return X, y, w
+
+
+def make_batches_sparse(
+    n_samples_per_batch: int, n_features: int, n_batches: int, sparsity: float
+) -> Tuple[List[sparse.csr_matrix], List[np.ndarray], List[np.ndarray]]:
+    X = []
+    y = []
+    w = []
+    rng = np.random.RandomState(1994)
+    for _ in range(n_batches):
+        _X = sparse.random(
+            n_samples_per_batch,
+            n_features,
+            1.0 - sparsity,
+            format="csr",
+            dtype=np.float32,
+            random_state=rng,
+        )
+        _y = rng.randn(n_samples_per_batch)
+        _w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
+        X.append(_X)
+        y.append(_y)
+        w.append(_w)
+    return X, y, w
+
+
+class TestDataset:
+    """Contains a dataset in numpy format as well as the relevant objective and metric."""
+
+    def __init__(
+        self, name: str, get_dataset: Callable, objective: str, metric: str
+    ) -> None:
+        self.name = name
+        self.objective = objective
+        self.metric = metric
+        self.X, self.y = get_dataset()
+        self.w: Optional[np.ndarray] = None
+        self.margin: Optional[np.ndarray] = None
+
+    def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]:
+        params_in["objective"] = self.objective
+        params_in["eval_metric"] = self.metric
+        if self.objective == "multi:softmax":
+            params_in["num_class"] = int(np.max(self.y) + 1)
+        return params_in
+
+    def get_dmat(self) -> xgb.DMatrix:
+        return xgb.DMatrix(
+            self.X,
+            self.y,
+            weight=self.w,
+            base_margin=self.margin,
+            enable_categorical=True,
+        )
+
+    def get_device_dmat(self) -> xgb.DeviceQuantileDMatrix:
+        import cupy as cp
+
+        w = None if self.w is None else cp.array(self.w)
+        X = cp.array(self.X, dtype=np.float32)
+        y = cp.array(self.y, dtype=np.float32)
+        return xgb.DeviceQuantileDMatrix(X, y, w, base_margin=self.margin)
+
+    def get_external_dmat(self) -> xgb.DMatrix:
+        n_samples = self.X.shape[0]
+        n_batches = 10
+        per_batch = n_samples // n_batches + 1
+
+        predictor = []
+        response = []
+        weight = []
+        for i in range(n_batches):
+            beg = i * per_batch
+            end = min((i + 1) * per_batch, n_samples)
+            assert end != beg
+            X = self.X[beg:end, ...]
+            y = self.y[beg:end]
+            w = self.w[beg:end] if self.w is not None else None
+            predictor.append(X)
+            response.append(y)
+            if w is not None:
+                weight.append(w)
+
+        it = IteratorForTest(predictor, response, weight if weight else None)
+        return xgb.DMatrix(it)
+
+    def __repr__(self) -> str:
+        return self.name
+
+
+@memory.cache
+def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
+    data = datasets.fetch_california_housing()
+    return data.data, data.target
+
+
+@memory.cache
+def get_digits() -> Tuple[np.ndarray, np.ndarray]:
+    data = datasets.load_digits()
+    return data.data, data.target
+
+
+@memory.cache
+def get_cancer() -> Tuple[np.ndarray, np.ndarray]:
+    return datasets.load_breast_cancer(return_X_y=True)
+
+
+@memory.cache
+def get_sparse() -> Tuple[np.ndarray, np.ndarray]:
+    rng = np.random.RandomState(199)
+    n = 2000
+    sparsity = 0.75
+    X, y = datasets.make_regression(n, random_state=rng)
+    flag = rng.binomial(1, sparsity, X.shape)
+    for i in range(X.shape[0]):
+        for j in range(X.shape[1]):
+            if flag[i, j]:
+                X[i, j] = np.nan
+    return X, y
+
+
+@memory.cache
+def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Number of samples: 1460
+    Number of features: 20
+    Number of categorical features: 10
+    Number of numerical features: 10
+    """
+    from sklearn.datasets import fetch_openml
+
+    X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True)
+
+    categorical_columns_subset: List[str] = [
+        "BldgType",  # 5 cats, no nan
+        "GarageFinish",  # 3 cats, nan
+        "LotConfig",  # 5 cats, no nan
+        "Functional",  # 7 cats, no nan
+        "MasVnrType",  # 4 cats, nan
+        "HouseStyle",  # 8 cats, no nan
+        "FireplaceQu",  # 5 cats, nan
+        "ExterCond",  # 5 cats, no nan
+        "ExterQual",  # 4 cats, no nan
+        "PoolQC",  # 3 cats, nan
+    ]
+
+    numerical_columns_subset: List[str] = [
+        "3SsnPorch",
+        "Fireplaces",
+        "BsmtHalfBath",
+        "HalfBath",
+        "GarageCars",
+        "TotRmsAbvGrd",
+        "BsmtFinSF1",
+        "BsmtFinSF2",
+        "GrLivArea",
+        "ScreenPorch",
+    ]
+
+    X = X[categorical_columns_subset + numerical_columns_subset]
+    X[categorical_columns_subset] = X[categorical_columns_subset].astype("category")
+    return X, y
+
+
+@memory.cache
+def get_mq2008(
+    dpath: str,
+) -> Tuple[
+    sparse.csr_matrix,
+    np.ndarray,
+    np.ndarray,
+    sparse.csr_matrix,
+    np.ndarray,
+    np.ndarray,
+    sparse.csr_matrix,
+    np.ndarray,
+    np.ndarray,
+]:
+    from sklearn.datasets import load_svmlight_files
+
+    src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
+    target = dpath + "/MQ2008.zip"
+    if not os.path.exists(target):
+        urllib.request.urlretrieve(url=src, filename=target)
+
+    with zipfile.ZipFile(target, "r") as f:
+        f.extractall(path=dpath)
+
+    (
+        x_train,
+        y_train,
+        qid_train,
+        x_test,
+        y_test,
+        qid_test,
+        x_valid,
+        y_valid,
+        qid_valid,
+    ) = load_svmlight_files(
+        (
+            dpath + "MQ2008/Fold1/train.txt",
+            dpath + "MQ2008/Fold1/test.txt",
+            dpath + "MQ2008/Fold1/vali.txt",
+        ),
+        query_id=True,
+        zero_based=False,
+    )
+
+    return (
+        x_train,
+        y_train,
+        qid_train,
+        x_test,
+        y_test,
+        qid_test,
+        x_valid,
+        y_valid,
+        qid_valid,
+    )
+
+
+@memory.cache
+def make_categorical(
+    n_samples: int,
+    n_features: int,
+    n_categories: int,
+    onehot: bool,
+    sparsity: float = 0.0,
+) -> Tuple[ArrayLike, np.ndarray]:
+    import pandas as pd
+
+    rng = np.random.RandomState(1994)
+
+    pd_dict = {}
+    for i in range(n_features + 1):
+        c = rng.randint(low=0, high=n_categories, size=n_samples)
+        pd_dict[str(i)] = pd.Series(c, dtype=np.int64)
+
+    df = pd.DataFrame(pd_dict)
+    label = df.iloc[:, 0]
+    df = df.iloc[:, 1:]
+    for i in range(0, n_features):
+        label += df.iloc[:, i]
+    label += 1
+
+    df = df.astype("category")
+    categories = np.arange(0, n_categories)
+    for col in df.columns:
+        df[col] = df[col].cat.set_categories(categories)
+
+    if sparsity > 0.0:
+        for i in range(n_features):
+            index = rng.randint(
+                low=0, high=n_samples - 1, size=int(n_samples * sparsity)
+            )
+            df.iloc[index, i] = np.NaN
+            assert n_categories == np.unique(df.dtypes[i].categories).size
+
+    if onehot:
+        return pd.get_dummies(df), label
+    return df, label
+
+
+def _cat_sampled_from() -> strategies.SearchStrategy:
+    @strategies.composite
+    def _make_cat(draw: Callable) -> Tuple[int, int, int, float]:
+        n_samples = draw(strategies.integers(2, 512))
+        n_features = draw(strategies.integers(1, 4))
+        n_cats = draw(strategies.integers(1, 128))
+        sparsity = draw(
+            strategies.floats(
+                min_value=0,
+                max_value=1,
+                allow_nan=False,
+                allow_infinity=False,
+                allow_subnormal=False,
+            )
+        )
+        return n_samples, n_features, n_cats, sparsity
+
+    def _build(args: Tuple[int, int, int, float]) -> TestDataset:
+        n_samples = args[0]
+        n_features = args[1]
+        n_cats = args[2]
+        sparsity = args[3]
+        return TestDataset(
+            f"{n_samples}x{n_features}-{n_cats}-{sparsity}",
+            lambda: make_categorical(n_samples, n_features, n_cats, False, sparsity),
+            "reg:squarederror",
+            "rmse",
+        )
+
+    return _make_cat().map(_build)  # pylint: disable=no-member
+
+
+categorical_dataset_strategy: strategies.SearchStrategy = _cat_sampled_from()
+
+
+# pylint: disable=too-many-locals
+@memory.cache
+def make_sparse_regression(
+    n_samples: int, n_features: int, sparsity: float, as_dense: bool
+) -> Tuple[Union[sparse.csr_matrix], np.ndarray]:
+    """Make sparse matrix.
+
+    Parameters
+    ----------
+
+    as_dense:
+
+      Return the matrix as np.ndarray with missing values filled by NaN
+
+    """
+    if not hasattr(np.random, "default_rng"):
+        # old version of numpy on s390x
+        rng = np.random.RandomState(1994)
+        X = sparse.random(
+            m=n_samples,
+            n=n_features,
+            density=1.0 - sparsity,
+            random_state=rng,
+            format="csr",
+        )
+        y = rng.normal(loc=0.0, scale=1.0, size=n_samples)
+        return X, y
+
+    # Use multi-thread to speed up the generation, convenient if you use this function
+    # for benchmarking.
+    n_threads = min(multiprocessing.cpu_count(), n_features)
+
+    def random_csc(t_id: int) -> sparse.csc_matrix:
+        rng = np.random.default_rng(1994 * t_id)
+        thread_size = n_features // n_threads
+        if t_id == n_threads - 1:
+            n_features_tloc = n_features - t_id * thread_size
+        else:
+            n_features_tloc = thread_size
+
+        X = sparse.random(
+            m=n_samples,
+            n=n_features_tloc,
+            density=1.0 - sparsity,
+            random_state=rng,
+        ).tocsc()
+        y = np.zeros((n_samples, 1))
+
+        for i in range(X.shape[1]):
+            size = X.indptr[i + 1] - X.indptr[i]
+            if size != 0:
+                y += X[:, i].toarray() * rng.random((n_samples, 1)) * 0.2
+
+        return X, y
+
+    futures = []
+    with ThreadPoolExecutor(max_workers=n_threads) as executor:
+        for i in range(n_threads):
+            futures.append(executor.submit(random_csc, i))
+
+    X_results = []
+    y_results = []
+    for f in futures:
+        X, y = f.result()
+        X_results.append(X)
+        y_results.append(y)
+
+    assert len(y_results) == n_threads
+
+    csr: sparse.csr_matrix = sparse.hstack(X_results, format="csr")
+    y = np.asarray(y_results)
+    y = y.reshape((y.shape[0], y.shape[1])).T
+    y = np.sum(y, axis=1)
+
+    assert csr.shape[0] == n_samples
+    assert csr.shape[1] == n_features
+    assert y.shape[0] == n_samples
+
+    if as_dense:
+        arr = csr.toarray()
+        assert arr.shape[0] == n_samples
+        assert arr.shape[1] == n_features
+        arr[arr == 0] = np.nan
+        return arr, y
+
+    return csr, y
+
+
+sparse_datasets_strategy = strategies.sampled_from(
+    [
+        TestDataset(
+            "1e5x8-0.95-csr",
+            lambda: make_sparse_regression(int(1e5), 8, 0.95, False),
+            "reg:squarederror",
+            "rmse",
+        ),
+        TestDataset(
+            "1e5x8-0.5-csr",
+            lambda: make_sparse_regression(int(1e5), 8, 0.5, False),
+            "reg:squarederror",
+            "rmse",
+        ),
+        TestDataset(
+            "1e5x8-0.5-dense",
+            lambda: make_sparse_regression(int(1e5), 8, 0.5, True),
+            "reg:squarederror",
+            "rmse",
+        ),
+        TestDataset(
+            "1e5x8-0.05-csr",
+            lambda: make_sparse_regression(int(1e5), 8, 0.05, False),
+            "reg:squarederror",
+            "rmse",
+        ),
+        TestDataset(
+            "1e5x8-0.05-dense",
+            lambda: make_sparse_regression(int(1e5), 8, 0.05, True),
+            "reg:squarederror",
+            "rmse",
+        ),
+    ]
+)
+
+_unweighted_datasets_strategy = strategies.sampled_from(
+    [
+        TestDataset(
+            "calif_housing", get_california_housing, "reg:squarederror", "rmse"
+        ),
+        TestDataset(
+            "calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae"
+        ),
+        TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
+        TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
+        TestDataset(
+            "mtreg",
+            lambda: datasets.make_regression(n_samples=128, n_targets=3),
+            "reg:squarederror",
+            "rmse",
+        ),
+        TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
+        TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"),
+        TestDataset(
+            "empty",
+            lambda: (np.empty((0, 100)), np.empty(0)),
+            "reg:squarederror",
+            "rmse",
+        ),
+    ]
+)
+
+
+@strategies.composite
+def _dataset_weight_margin(draw: Callable) -> TestDataset:
+    data: TestDataset = draw(_unweighted_datasets_strategy)
+    if draw(strategies.booleans()):
+        data.w = draw(
+            arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0))
+        )
+    if draw(strategies.booleans()):
+        num_class = 1
+        if data.objective == "multi:softmax":
+            num_class = int(np.max(data.y) + 1)
+        elif data.name == "mtreg":
+            num_class = data.y.shape[1]
+
+        data.margin = draw(
+            arrays(
+                np.float64,
+                (data.y.shape[0] * num_class),
+                elements=strategies.floats(0.5, 1.0),
+            )
+        )
+        assert data.margin is not None
+        if num_class != 1:
+            data.margin = data.margin.reshape(data.y.shape[0], num_class)
+
+    return data
+
+
+# A strategy for drawing from a set of example datasets
+# May add random weights to the dataset
+dataset_strategy = _dataset_weight_margin()
+
+
+def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:
+    return all((y - x) < tolerance for x, y in zip(L, L[1:]))
+
+
+def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
+    """Evaluation metric for xgb.train"""
+    label = dtrain.get_label()
+    r = np.zeros(predt.shape)
+    gt = predt > 0.5
+    if predt.size == 0:
+        return "CustomErr", np.float64(0.0)
+    r[gt] = 1 - label[gt]
+    le = predt <= 0.5
+    r[le] = label[le]
+    return "CustomErr", np.sum(r)
+
+
+def eval_error_metric_skl(y_true: np.ndarray, y_score: np.ndarray) -> np.float64:
+    """Evaluation metric that looks like metrics provided by sklearn."""
+    r = np.zeros(y_score.shape)
+    gt = y_score > 0.5
+    r[gt] = 1 - y_true[gt]
+    le = y_score <= 0.5
+    r[le] = y_true[le]
+    return np.sum(r)
+
+
+def root_mean_square(y_true: np.ndarray, y_score: np.ndarray) -> float:
+    err = y_score - y_true
+    rmse = np.sqrt(np.dot(err, err) / y_score.size)
+    return rmse
+
+
+def softmax(x: np.ndarray) -> np.ndarray:
+    e = np.exp(x)
+    return e / np.sum(e)
+
+
+def softprob_obj(classes: int) -> SklObjective:
+    def objective(
+        labels: np.ndarray, predt: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        rows = labels.shape[0]
+        grad = np.zeros((rows, classes), dtype=float)
+        hess = np.zeros((rows, classes), dtype=float)
+        eps = 1e-6
+        for r in range(predt.shape[0]):
+            target = labels[r]
+            p = softmax(predt[r, :])
+            for c in range(predt.shape[1]):
+                assert target >= 0 or target <= classes
+                g = p[c] - 1.0 if c == target else p[c]
+                h = max((2.0 * p[c] * (1.0 - p[c])).item(), eps)
+                grad[r, c] = g
+                hess[r, c] = h
+
+        grad = grad.reshape((rows * classes, 1))
+        hess = hess.reshape((rows * classes, 1))
+        return grad, hess
+
+    return objective
+
+
+class DirectoryExcursion:
+    """Change directory.  Change back and optionally cleaning up the directory when
+    exit.
+
+    """
+
+    def __init__(self, path: os.PathLike, cleanup: bool = False):
+        self.path = path
+        self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))
+        self.cleanup = cleanup
+        self.files: Set[str] = set()
+
+    def __enter__(self) -> None:
+        os.chdir(self.path)
+        if self.cleanup:
+            self.files = {
+                os.path.join(root, f)
+                for root, subdir, files in os.walk(os.path.expanduser(self.path))
+                for f in files
+            }
+
+    def __exit__(self, *args: Any) -> None:
+        os.chdir(self.curdir)
+        if self.cleanup:
+            files = {
+                os.path.join(root, f)
+                for root, subdir, files in os.walk(os.path.expanduser(self.path))
+                for f in files
+            }
+            diff = files.difference(self.files)
+            for f in diff:
+                os.remove(f)
+
+
+@contextmanager
+def captured_output() -> Generator[Tuple[StringIO, StringIO], None, None]:
+    """Reassign stdout temporarily in order to test printed statements
+    Taken from:
+    https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
+
+    Also works for pytest.
+
+    """
+    new_out, new_err = StringIO(), StringIO()
+    old_out, old_err = sys.stdout, sys.stderr
+    try:
+        sys.stdout, sys.stderr = new_out, new_err
+        yield sys.stdout, sys.stderr
+    finally:
+        sys.stdout, sys.stderr = old_out, old_err
+
+
+def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
+    """Make a pytest mark for the `pytest-timeout` package.
+
+    Parameters
+    ----------
+    sec :
+        Timeout seconds.
+    enable :
+        Control whether timeout should be applied, used for debugging.
+
+    Returns
+    -------
+    pytest.mark.timeout
+    """
+
+    if enable:
+        return pytest.mark.timeout(sec, *args, **kwargs)
+    return pytest.mark.timeout(None, *args, **kwargs)
+
+
+def demo_dir(path: str) -> str:
+    """Look for the demo directory based on the test file name."""
+    path = normpath(os.path.dirname(path))
+    while True:
+        subdirs = [f.path for f in os.scandir(path) if f.is_dir()]
+        subdirs = [os.path.basename(d) for d in subdirs]
+        if "demo" in subdirs:
+            return os.path.join(path, "demo")
+        new_path = normpath(os.path.join(path, os.path.pardir))
+        assert new_path != path
+        path = new_path
+
+
+def normpath(path: str) -> str:
+    return os.path.normpath(os.path.abspath(path))
+
+
+def data_dir(path: str) -> str:
+    return os.path.join(demo_dir(path), "data")
+
+
+def project_root(path: str) -> str:
+    return normpath(os.path.join(demo_dir(path), os.path.pardir))