Extract dask and spark test into distributed test. (#8395)

- Move test files.
- Run spark and dask separately to prevent conflicts.
- Gather common code into the testing module.
This commit is contained in:
Jiaming Yuan
2022-10-28 16:24:32 +08:00
committed by GitHub
parent f73520bfff
commit cfd2a9f872
34 changed files with 405 additions and 337 deletions

View File

@@ -1,43 +1,21 @@
import pytest
from xgboost import testing as tm # noqa
from xgboost import testing as tm
def has_rmm():
try:
import rmm
return True
except ImportError:
return False
return tm.no_rmm()["condition"]
@pytest.fixture(scope='session', autouse=True)
@pytest.fixture(scope="session", autouse=True)
def setup_rmm_pool(request, pytestconfig):
if pytestconfig.getoption('--use-rmm-pool'):
if not has_rmm():
raise ImportError('The --use-rmm-pool option requires the RMM package')
import rmm
from dask_cuda.utils import get_n_gpus
rmm.reinitialize(pool_allocator=True, initial_pool_size=1024*1024*1024,
devices=list(range(get_n_gpus())))
tm.setup_rmm_pool(request, pytestconfig)
@pytest.fixture(scope='class')
def local_cuda_client(request, pytestconfig):
kwargs = {}
if hasattr(request, 'param'):
kwargs.update(request.param)
if pytestconfig.getoption('--use-rmm-pool'):
if not has_rmm():
raise ImportError('The --use-rmm-pool option requires the RMM package')
import rmm
kwargs['rmm_pool_size'] = '2GB'
if tm.no_dask_cuda()['condition']:
raise ImportError('The local_cuda_cluster fixture requires dask_cuda package')
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
yield Client(LocalCUDACluster(**kwargs))
def pytest_addoption(parser):
parser.addoption('--use-rmm-pool', action='store_true', default=False, help='Use RMM pool')
def pytest_addoption(parser: pytest.Parser) -> None:
parser.addoption(
"--use-rmm-pool", action="store_true", default=False, help="Use RMM pool"
)
def pytest_collection_modifyitems(config, items):
@@ -53,13 +31,3 @@ def pytest_collection_modifyitems(config, items):
for item in items:
if any(item.nodeid.startswith(x) for x in blocklist):
item.add_marker(skip_mark)
# mark dask tests as `mgpu`.
mgpu_mark = pytest.mark.mgpu
for item in items:
if item.nodeid.startswith(
"python-gpu/test_gpu_with_dask/test_gpu_with_dask.py"
) or item.nodeid.startswith(
"python-gpu/test_gpu_spark/test_gpu_spark.py"
):
item.add_marker(mgpu_mark)

View File

@@ -1,16 +0,0 @@
#!/bin/bash
# This script is only made for running XGBoost tests on official CI where we have access
# to a 4-GPU cluster, the discovery command is for running tests on a local machine where
# the driver and the GPU worker might be the same machine for the ease of development.
if ! command -v nvidia-smi &> /dev/null
then
# default to 4 GPUs
echo "{\"name\":\"gpu\",\"addresses\":[\"0\",\"1\",\"2\",\"3\"]}"
exit
else
# https://github.com/apache/spark/blob/master/examples/src/main/scripts/getGpusResources.sh
ADDRS=`nvidia-smi --query-gpu=index --format=csv,noheader | sed -e ':a' -e 'N' -e'$!ba' -e 's/\n/","/g'`
echo {\"name\": \"gpu\", \"addresses\":[\"$ADDRS\"]}
fi

View File

@@ -1,23 +0,0 @@
import sys
import pytest
from xgboost import testing as tm
if tm.no_spark()["condition"]:
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
sys.path.append("tests/python")
from test_spark.test_data import run_dmatrix_ctor
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.parametrize(
"is_feature_cols,is_qdm",
[(True, True), (True, False), (False, True), (False, False)],
)
def test_dmatrix_ctor(is_feature_cols: bool, is_qdm: bool) -> None:
run_dmatrix_ctor(is_feature_cols, is_qdm, on_gpu=True)

View File

@@ -1,228 +0,0 @@
import json
import logging
import subprocess
import sys
import pytest
import sklearn
from xgboost import testing as tm
if tm.no_spark()["condition"]:
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
if sys.platform.startswith("win"):
pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
from pyspark.ml.linalg import Vectors
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.sql import SparkSession
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
gpu_discovery_script_path = "tests/python-gpu/test_gpu_spark/discover_gpu.sh"
def get_devices():
"""This works only if driver is the same machine of worker."""
completed = subprocess.run(gpu_discovery_script_path, stdout=subprocess.PIPE)
assert completed.returncode == 0, "Failed to execute discovery script."
msg = completed.stdout.decode("utf-8")
result = json.loads(msg)
addresses = result["addresses"]
return addresses
executor_gpu_amount = len(get_devices())
executor_cores = executor_gpu_amount
num_workers = executor_gpu_amount
@pytest.fixture(scope="module", autouse=True)
def spark_session_with_gpu():
spark_config = {
"spark.master": f"local-cluster[1, {executor_gpu_amount}, 1024]",
"spark.python.worker.reuse": "false",
"spark.driver.host": "127.0.0.1",
"spark.task.maxFailures": "1",
"spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled": "false",
"spark.sql.pyspark.jvmStacktrace.enabled": "true",
"spark.cores.max": executor_cores,
"spark.task.cpus": "1",
"spark.executor.cores": executor_cores,
"spark.worker.resource.gpu.amount": executor_gpu_amount,
"spark.task.resource.gpu.amount": "1",
"spark.executor.resource.gpu.amount": executor_gpu_amount,
"spark.worker.resource.gpu.discoveryScript": gpu_discovery_script_path,
}
builder = SparkSession.builder.appName("xgboost spark python API Tests with GPU")
for k, v in spark_config.items():
builder.config(k, v)
spark = builder.getOrCreate()
logging.getLogger("pyspark").setLevel(logging.INFO)
# We run a dummy job so that we block until the workers have connected to the master
spark.sparkContext.parallelize(
range(num_workers), num_workers
).barrier().mapPartitions(lambda _: []).collect()
yield spark
spark.stop()
@pytest.fixture
def spark_iris_dataset(spark_session_with_gpu):
spark = spark_session_with_gpu
data = sklearn.datasets.load_iris()
train_rows = [
(Vectors.dense(features), float(label))
for features, label in zip(data.data[0::2], data.target[0::2])
]
train_df = spark.createDataFrame(
spark.sparkContext.parallelize(train_rows, num_workers), ["features", "label"]
)
test_rows = [
(Vectors.dense(features), float(label))
for features, label in zip(data.data[1::2], data.target[1::2])
]
test_df = spark.createDataFrame(
spark.sparkContext.parallelize(test_rows, num_workers), ["features", "label"]
)
return train_df, test_df
@pytest.fixture
def spark_iris_dataset_feature_cols(spark_session_with_gpu):
spark = spark_session_with_gpu
data = sklearn.datasets.load_iris()
train_rows = [
(*features.tolist(), float(label))
for features, label in zip(data.data[0::2], data.target[0::2])
]
train_df = spark.createDataFrame(
spark.sparkContext.parallelize(train_rows, num_workers),
[*data.feature_names, "label"],
)
test_rows = [
(*features.tolist(), float(label))
for features, label in zip(data.data[1::2], data.target[1::2])
]
test_df = spark.createDataFrame(
spark.sparkContext.parallelize(test_rows, num_workers),
[*data.feature_names, "label"],
)
return train_df, test_df, data.feature_names
@pytest.fixture
def spark_diabetes_dataset(spark_session_with_gpu):
spark = spark_session_with_gpu
data = sklearn.datasets.load_diabetes()
train_rows = [
(Vectors.dense(features), float(label))
for features, label in zip(data.data[0::2], data.target[0::2])
]
train_df = spark.createDataFrame(
spark.sparkContext.parallelize(train_rows, num_workers), ["features", "label"]
)
test_rows = [
(Vectors.dense(features), float(label))
for features, label in zip(data.data[1::2], data.target[1::2])
]
test_df = spark.createDataFrame(
spark.sparkContext.parallelize(test_rows, num_workers), ["features", "label"]
)
return train_df, test_df
@pytest.fixture
def spark_diabetes_dataset_feature_cols(spark_session_with_gpu):
spark = spark_session_with_gpu
data = sklearn.datasets.load_diabetes()
train_rows = [
(*features.tolist(), float(label))
for features, label in zip(data.data[0::2], data.target[0::2])
]
train_df = spark.createDataFrame(
spark.sparkContext.parallelize(train_rows, num_workers),
[*data.feature_names, "label"],
)
test_rows = [
(*features.tolist(), float(label))
for features, label in zip(data.data[1::2], data.target[1::2])
]
test_df = spark.createDataFrame(
spark.sparkContext.parallelize(test_rows, num_workers),
[*data.feature_names, "label"],
)
return train_df, test_df, data.feature_names
def test_sparkxgb_classifier_with_gpu(spark_iris_dataset):
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
classifier = SparkXGBClassifier(use_gpu=True, num_workers=num_workers)
train_df, test_df = spark_iris_dataset
model = classifier.fit(train_df)
pred_result_df = model.transform(test_df)
evaluator = MulticlassClassificationEvaluator(metricName="f1")
f1 = evaluator.evaluate(pred_result_df)
assert f1 >= 0.97
def test_sparkxgb_classifier_feature_cols_with_gpu(spark_iris_dataset_feature_cols):
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
train_df, test_df, feature_names = spark_iris_dataset_feature_cols
classifier = SparkXGBClassifier(
features_col=feature_names, use_gpu=True, num_workers=num_workers
)
model = classifier.fit(train_df)
pred_result_df = model.transform(test_df)
evaluator = MulticlassClassificationEvaluator(metricName="f1")
f1 = evaluator.evaluate(pred_result_df)
assert f1 >= 0.97
def test_cv_sparkxgb_classifier_feature_cols_with_gpu(spark_iris_dataset_feature_cols):
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
train_df, test_df, feature_names = spark_iris_dataset_feature_cols
classifier = SparkXGBClassifier(
features_col=feature_names, use_gpu=True, num_workers=num_workers
)
grid = ParamGridBuilder().addGrid(classifier.max_depth, [6, 8]).build()
evaluator = MulticlassClassificationEvaluator(metricName="f1")
cv = CrossValidator(
estimator=classifier, evaluator=evaluator, estimatorParamMaps=grid, numFolds=3
)
cvModel = cv.fit(train_df)
pred_result_df = cvModel.transform(test_df)
f1 = evaluator.evaluate(pred_result_df)
assert f1 >= 0.97
def test_sparkxgb_regressor_with_gpu(spark_diabetes_dataset):
from pyspark.ml.evaluation import RegressionEvaluator
regressor = SparkXGBRegressor(use_gpu=True, num_workers=num_workers)
train_df, test_df = spark_diabetes_dataset
model = regressor.fit(train_df)
pred_result_df = model.transform(test_df)
evaluator = RegressionEvaluator(metricName="rmse")
rmse = evaluator.evaluate(pred_result_df)
assert rmse <= 65.0
def test_sparkxgb_regressor_feature_cols_with_gpu(spark_diabetes_dataset_feature_cols):
from pyspark.ml.evaluation import RegressionEvaluator
train_df, test_df, feature_names = spark_diabetes_dataset_feature_cols
regressor = SparkXGBRegressor(
features_col=feature_names, use_gpu=True, num_workers=num_workers
)
model = regressor.fit(train_df)
pred_result_df = model.transform(test_df)
evaluator = RegressionEvaluator(metricName="rmse")
rmse = evaluator.evaluate(pred_result_df)
assert rmse <= 65.0

View File

@@ -7,26 +7,16 @@ from hypothesis import assume, given, note, settings, strategies
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.params import (
hist_parameter_strategy,
cat_parameter_strategy,
)
sys.path.append("tests/python")
import test_updaters as test_up
pytestmark = tm.timeout(30)
parameter_strategy = strategies.fixed_dictionaries({
'max_depth': strategies.integers(0, 11),
'max_leaves': strategies.integers(0, 256),
'max_bin': strategies.integers(2, 1024),
'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
'min_child_weight': strategies.floats(0.5, 2.0),
'seed': strategies.integers(0, 10),
# We cannot enable subsampling as the training loss can increase
# 'subsample': strategies.floats(0.5, 1.0),
'colsample_bytree': strategies.floats(0.5, 1.0),
'colsample_bylevel': strategies.floats(0.5, 1.0),
}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
result: xgb.callback.TrainingCallback.EvalsLog = {}
@@ -47,7 +37,7 @@ def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
class TestGPUUpdaters:
cputest = test_up.TestTreeMethod()
@given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
@given(hist_parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
@settings(deadline=None, max_examples=50, print_blob=True)
def test_gpu_hist(self, param, num_rounds, dataset):
param["tree_method"] = "gpu_hist"
@@ -82,9 +72,8 @@ class TestGPUUpdaters:
@given(
tm.categorical_dataset_strategy,
test_up.exact_parameter_strategy,
test_up.hist_parameter_strategy,
test_up.cat_parameter_strategy,
hist_parameter_strategy,
cat_parameter_strategy,
strategies.integers(4, 32),
)
@settings(deadline=None, max_examples=20, print_blob=True)
@@ -92,12 +81,10 @@ class TestGPUUpdaters:
def test_categorical(
self,
dataset: tm.TestDataset,
exact_parameters: Dict[str, Any],
hist_parameters: Dict[str, Any],
cat_parameters: Dict[str, Any],
n_rounds: int,
) -> None:
cat_parameters.update(exact_parameters)
cat_parameters.update(hist_parameters)
cat_parameters["tree_method"] = "gpu_hist"
@@ -105,8 +92,8 @@ class TestGPUUpdaters:
tm.non_increasing(results["train"]["rmse"])
@given(
test_up.hist_parameter_strategy,
test_up.cat_parameter_strategy,
hist_parameter_strategy,
cat_parameter_strategy,
)
@settings(deadline=None, max_examples=10, print_blob=True)
def test_categorical_ames_housing(
@@ -149,8 +136,11 @@ class TestGPUUpdaters:
self.cputest.run_invalid_category("gpu_hist")
@pytest.mark.skipif(**tm.no_cupy())
@given(parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@given(
hist_parameter_strategy,
strategies.integers(1, 20),
tm.dataset_strategy
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
# We cannot handle empty dataset yet
@@ -161,8 +151,11 @@ class TestGPUUpdaters:
note(result)
assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)
@given(parameter_strategy, strategies.integers(1, 3),
tm.dataset_strategy)
@given(
hist_parameter_strategy,
strategies.integers(1, 3),
tm.dataset_strategy
)
@settings(deadline=None, max_examples=10, print_blob=True)
def test_external_memory(self, param, num_rounds, dataset):
if dataset.name.endswith("-l1"):

View File

@@ -1,572 +0,0 @@
"""Copyright 2019-2022 XGBoost contributors"""
import asyncio
import os
import subprocess
import sys
from collections import OrderedDict
from inspect import signature
from typing import Any, Dict, Type, TypeVar, Union
import numpy as np
import pytest
from hypothesis import given, note, settings, strategies
from hypothesis._settings import duration
from test_gpu_updaters import parameter_strategy
import xgboost
from xgboost import testing as tm
if sys.platform.startswith("win"):
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
sys.path.append("tests/python")
if tm.no_dask_cuda()["condition"]:
pytest.skip(tm.no_dask_cuda()["reason"], allow_module_level=True)
from test_with_dask import _get_client_workers # noqa
from test_with_dask import generate_array # noqa
from test_with_dask import make_categorical # noqa
from test_with_dask import run_auc # noqa
from test_with_dask import run_boost_from_prediction # noqa
from test_with_dask import run_boost_from_prediction_multi_class # noqa
from test_with_dask import run_categorical # noqa
from test_with_dask import run_dask_classifier # noqa
from test_with_dask import run_empty_dmatrix_auc # noqa
from test_with_dask import run_empty_dmatrix_cls # noqa
from test_with_dask import run_empty_dmatrix_reg # noqa
from test_with_dask import run_tree_stats # noqa
from test_with_dask import suppress # noqa
from test_with_dask import kCols as random_cols # noqa
try:
import cudf
import dask.dataframe as dd
from dask import array as da
from dask.distributed import Client
from dask_cuda import LocalCUDACluster, utils
import xgboost as xgb
from xgboost import dask as dxgb
except ImportError:
pass
def run_with_dask_dataframe(DMatrixT: Type, client: Client) -> None:
import cupy as cp
cp.cuda.runtime.setDevice(0)
X, y, _ = generate_array()
X = dd.from_dask_array(X)
y = dd.from_dask_array(y)
X = X.map_partitions(cudf.from_pandas)
y = y.map_partitions(cudf.from_pandas)
dtrain = DMatrixT(client, X, y)
out = dxgb.train(
client,
{"tree_method": "gpu_hist", "debug_synchronize": True},
dtrain=dtrain,
evals=[(dtrain, "X")],
num_boost_round=4,
)
assert isinstance(out["booster"], dxgb.Booster)
assert len(out["history"]["X"]["rmse"]) == 4
predictions = dxgb.predict(client, out, dtrain)
assert isinstance(predictions.compute(), np.ndarray)
series_predictions = dxgb.inplace_predict(client, out, X)
assert isinstance(series_predictions, dd.Series)
single_node = out["booster"].predict(xgboost.DMatrix(X.compute()))
cp.testing.assert_allclose(single_node, predictions.compute())
np.testing.assert_allclose(single_node, series_predictions.compute().to_numpy())
predt = dxgb.predict(client, out, X)
assert isinstance(predt, dd.Series)
T = TypeVar("T")
def is_df(part: T) -> T:
assert isinstance(part, cudf.DataFrame), part
return part
predt.map_partitions(is_df, meta=dd.utils.make_meta({"prediction": "f4"}))
cp.testing.assert_allclose(predt.values.compute(), single_node)
# Make sure the output can be integrated back to original dataframe
X["predict"] = predictions
X["inplace_predict"] = series_predictions
has_null = X.isnull().values.any().compute()
assert bool(has_null) is False
def run_with_dask_array(DMatrixT: Type, client: Client) -> None:
import cupy as cp
cp.cuda.runtime.setDevice(0)
X, y, _ = generate_array()
X = X.map_blocks(cp.asarray)
y = y.map_blocks(cp.asarray)
dtrain = DMatrixT(client, X, y)
out = dxgb.train(
client,
{"tree_method": "gpu_hist", "debug_synchronize": True},
dtrain=dtrain,
evals=[(dtrain, "X")],
num_boost_round=2,
)
from_dmatrix = dxgb.predict(client, out, dtrain).compute()
inplace_predictions = dxgb.inplace_predict(client, out, X).compute()
single_node = out["booster"].predict(xgboost.DMatrix(X.compute()))
np.testing.assert_allclose(single_node, from_dmatrix)
device = cp.cuda.runtime.getDevice()
assert device == inplace_predictions.device.id
single_node = cp.array(single_node)
assert device == single_node.device.id
cp.testing.assert_allclose(single_node, inplace_predictions)
def to_cp(x: Any, DMatrixT: Type) -> Any:
import cupy
if isinstance(x, np.ndarray) and DMatrixT is dxgb.DaskDeviceQuantileDMatrix:
X = cupy.array(x)
else:
X = x
return X
def run_gpu_hist(
params: Dict,
num_rounds: int,
dataset: tm.TestDataset,
DMatrixT: Type,
client: Client,
) -> None:
params["tree_method"] = "gpu_hist"
params = dataset.set_params(params)
# It doesn't make sense to distribute a completely
# empty dataset.
if dataset.X.shape[0] == 0:
return
chunk = 128
X = to_cp(dataset.X, DMatrixT)
X = da.from_array(X, chunks=(chunk, dataset.X.shape[1]))
y = to_cp(dataset.y, DMatrixT)
y_chunk = chunk if len(dataset.y.shape) == 1 else (chunk, dataset.y.shape[1])
y = da.from_array(y, chunks=y_chunk)
if dataset.w is not None:
w = to_cp(dataset.w, DMatrixT)
w = da.from_array(w, chunks=(chunk,))
else:
w = None
if DMatrixT is dxgb.DaskDeviceQuantileDMatrix:
m = DMatrixT(
client, data=X, label=y, weight=w, max_bin=params.get("max_bin", 256)
)
else:
m = DMatrixT(client, data=X, label=y, weight=w)
history = dxgb.train(
client,
params=params,
dtrain=m,
num_boost_round=num_rounds,
evals=[(m, "train")],
)["history"]["train"][dataset.metric]
note(history)
# See note on `ObjFunction::UpdateTreeLeaf`.
update_leaf = dataset.name.endswith("-l1")
if update_leaf:
assert history[0] + 1e-2 >= history[-1]
return
else:
assert tm.non_increasing(history)
def test_tree_stats() -> None:
with LocalCUDACluster(n_workers=1) as cluster:
with Client(cluster) as client:
local = run_tree_stats(client, "gpu_hist")
with LocalCUDACluster(n_workers=2) as cluster:
with Client(cluster) as client:
distributed = run_tree_stats(client, "gpu_hist")
assert local == distributed
class TestDistributedGPU:
@pytest.mark.skipif(**tm.no_cudf())
def test_boost_from_prediction(self, local_cuda_client: Client) -> None:
import cudf
from sklearn.datasets import load_breast_cancer, load_iris
X_, y_ = load_breast_cancer(return_X_y=True)
X = dd.from_array(X_, chunksize=100).map_partitions(cudf.from_pandas)
y = dd.from_array(y_, chunksize=100).map_partitions(cudf.from_pandas)
run_boost_from_prediction(X, y, "gpu_hist", local_cuda_client)
X_, y_ = load_iris(return_X_y=True)
X = dd.from_array(X_, chunksize=50).map_partitions(cudf.from_pandas)
y = dd.from_array(y_, chunksize=50).map_partitions(cudf.from_pandas)
run_boost_from_prediction_multi_class(X, y, "gpu_hist", local_cuda_client)
@pytest.mark.skipif(**tm.no_dask_cudf())
def test_dask_dataframe(self, local_cuda_client: Client) -> None:
run_with_dask_dataframe(dxgb.DaskDMatrix, local_cuda_client)
run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, local_cuda_client)
@pytest.mark.skipif(**tm.no_dask_cudf())
def test_categorical(self, local_cuda_client: Client) -> None:
import dask_cudf
X, y = make_categorical(local_cuda_client, 10000, 30, 13)
X = dask_cudf.from_dask_dataframe(X)
X_onehot, _ = make_categorical(local_cuda_client, 10000, 30, 13, True)
X_onehot = dask_cudf.from_dask_dataframe(X_onehot)
run_categorical(local_cuda_client, "gpu_hist", X, X_onehot, y)
@given(
params=parameter_strategy,
num_rounds=strategies.integers(1, 20),
dataset=tm.dataset_strategy,
dmatrix_type=strategies.sampled_from(
[dxgb.DaskDMatrix, dxgb.DaskDeviceQuantileDMatrix]
),
)
@settings(
deadline=duration(seconds=120),
max_examples=20,
suppress_health_check=suppress,
print_blob=True,
)
@pytest.mark.skipif(**tm.no_cupy())
def test_gpu_hist(
self,
params: Dict,
num_rounds: int,
dataset: tm.TestDataset,
dmatrix_type: type,
local_cuda_client: Client,
) -> None:
run_gpu_hist(params, num_rounds, dataset, dmatrix_type, local_cuda_client)
@pytest.mark.skipif(**tm.no_cupy())
def test_dask_array(self, local_cuda_client: Client) -> None:
run_with_dask_array(dxgb.DaskDMatrix, local_cuda_client)
run_with_dask_array(dxgb.DaskDeviceQuantileDMatrix, local_cuda_client)
@pytest.mark.skipif(**tm.no_cupy())
def test_early_stopping(self, local_cuda_client: Client) -> None:
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
X, y = da.from_array(X), da.from_array(y)
m = dxgb.DaskDMatrix(local_cuda_client, X, y)
valid = dxgb.DaskDMatrix(local_cuda_client, X, y)
early_stopping_rounds = 5
booster = dxgb.train(
local_cuda_client,
{
"objective": "binary:logistic",
"eval_metric": "error",
"tree_method": "gpu_hist",
},
m,
evals=[(valid, "Valid")],
num_boost_round=1000,
early_stopping_rounds=early_stopping_rounds,
)["booster"]
assert hasattr(booster, "best_score")
dump = booster.get_dump(dump_format="json")
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
valid_X = X
valid_y = y
cls = dxgb.DaskXGBClassifier(
objective="binary:logistic",
tree_method="gpu_hist",
eval_metric="error",
n_estimators=100,
)
cls.client = local_cuda_client
cls.fit(
X,
y,
early_stopping_rounds=early_stopping_rounds,
eval_set=[(valid_X, valid_y)],
)
booster = cls.get_booster()
dump = booster.get_dump(dump_format="json")
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.parametrize("model", ["boosting"])
def test_dask_classifier(self, model: str, local_cuda_client: Client) -> None:
import dask_cudf
X_, y_, w_ = generate_array(with_weights=True)
y_ = (y_ * 10).astype(np.int32)
X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_))
y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_))
w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_))
run_dask_classifier(X, y, w, model, "gpu_hist", local_cuda_client, 10)
def test_empty_dmatrix(self, local_cuda_client: Client) -> None:
parameters = {"tree_method": "gpu_hist", "debug_synchronize": True}
run_empty_dmatrix_reg(local_cuda_client, parameters)
run_empty_dmatrix_cls(local_cuda_client, parameters)
@pytest.mark.skipif(**tm.no_dask_cudf())
def test_empty_partition(self, local_cuda_client: Client) -> None:
import cudf
import cupy
import dask_cudf
mult = 100
df = cudf.DataFrame(
{
"a": [1, 2, 3, 4, 5.1] * mult,
"b": [10, 15, 29.3, 30, 31] * mult,
"y": [10, 20, 30, 40.0, 50] * mult,
}
)
parameters = {"tree_method": "gpu_hist", "debug_synchronize": True}
empty = df.iloc[:0]
ddf = dask_cudf.concat(
[dask_cudf.from_cudf(empty, npartitions=1)]
+ [dask_cudf.from_cudf(df, npartitions=3)]
+ [dask_cudf.from_cudf(df, npartitions=3)]
)
X = ddf[ddf.columns.difference(["y"])]
y = ddf[["y"]]
dtrain = dxgb.DaskDeviceQuantileDMatrix(local_cuda_client, X, y)
bst_empty = xgb.dask.train(
local_cuda_client, parameters, dtrain, evals=[(dtrain, "train")]
)
predt_empty = dxgb.predict(local_cuda_client, bst_empty, X).compute().values
ddf = dask_cudf.concat(
[dask_cudf.from_cudf(df, npartitions=3)]
+ [dask_cudf.from_cudf(df, npartitions=3)]
)
X = ddf[ddf.columns.difference(["y"])]
y = ddf[["y"]]
dtrain = dxgb.DaskDeviceQuantileDMatrix(local_cuda_client, X, y)
bst = xgb.dask.train(
local_cuda_client, parameters, dtrain, evals=[(dtrain, "train")]
)
predt = dxgb.predict(local_cuda_client, bst, X).compute().values
cupy.testing.assert_allclose(predt, predt_empty)
predt = dxgb.predict(local_cuda_client, bst, dtrain).compute()
cupy.testing.assert_allclose(predt, predt_empty)
predt = dxgb.inplace_predict(local_cuda_client, bst, X).compute().values
cupy.testing.assert_allclose(predt, predt_empty)
df = df.to_pandas()
empty = df.iloc[:0]
ddf = dd.concat(
[dd.from_pandas(empty, npartitions=1)]
+ [dd.from_pandas(df, npartitions=3)]
+ [dd.from_pandas(df, npartitions=3)]
)
X = ddf[ddf.columns.difference(["y"])]
y = ddf[["y"]]
predt_empty = cupy.asnumpy(predt_empty)
predt = dxgb.predict(local_cuda_client, bst_empty, X).compute().values
np.testing.assert_allclose(predt, predt_empty)
in_predt = (
dxgb.inplace_predict(local_cuda_client, bst_empty, X).compute().values
)
np.testing.assert_allclose(predt, in_predt)
def test_empty_dmatrix_auc(self, local_cuda_client: Client) -> None:
n_workers = len(_get_client_workers(local_cuda_client))
run_empty_dmatrix_auc(local_cuda_client, "gpu_hist", n_workers)
def test_auc(self, local_cuda_client: Client) -> None:
run_auc(local_cuda_client, "gpu_hist")
def test_data_initialization(self, local_cuda_client: Client) -> None:
X, y, _ = generate_array()
fw = da.random.random((random_cols,))
fw = fw - fw.min()
m = dxgb.DaskDMatrix(local_cuda_client, X, y, feature_weights=fw)
workers = _get_client_workers(local_cuda_client)
rabit_args = local_cuda_client.sync(
dxgb._get_rabit_args, len(workers), None, local_cuda_client
)
def worker_fn(worker_addr: str, data_ref: Dict) -> None:
with dxgb.CommunicatorContext(**rabit_args):
local_dtrain = dxgb._dmatrix_from_list_of_parts(**data_ref, nthread=7)
fw_rows = local_dtrain.get_float_info("feature_weights").shape[0]
assert fw_rows == local_dtrain.num_col()
futures = []
for i in range(len(workers)):
futures.append(
local_cuda_client.submit(
worker_fn,
workers[i],
m._create_fn_args(workers[i]),
pure=False,
workers=[workers[i]],
)
)
local_cuda_client.gather(futures)
def test_interface_consistency(self) -> None:
sig = OrderedDict(signature(dxgb.DaskDMatrix).parameters)
del sig["client"]
ddm_names = list(sig.keys())
sig = OrderedDict(signature(dxgb.DaskQuantileDMatrix).parameters)
del sig["client"]
del sig["max_bin"]
del sig["ref"]
ddqdm_names = list(sig.keys())
assert len(ddm_names) == len(ddqdm_names)
# between dask
for i in range(len(ddm_names)):
assert ddm_names[i] == ddqdm_names[i]
sig = OrderedDict(signature(xgb.DMatrix).parameters)
del sig["nthread"] # no nthread in dask
dm_names = list(sig.keys())
sig = OrderedDict(signature(xgb.QuantileDMatrix).parameters)
del sig["nthread"]
del sig["max_bin"]
del sig["ref"]
dqdm_names = list(sig.keys())
# between single node
assert len(dm_names) == len(dqdm_names)
for i in range(len(dm_names)):
assert dm_names[i] == dqdm_names[i]
# ddm <-> dm
for i in range(len(ddm_names)):
assert ddm_names[i] == dm_names[i]
# dqdm <-> ddqdm
for i in range(len(ddqdm_names)):
assert ddqdm_names[i] == dqdm_names[i]
sig = OrderedDict(signature(xgb.XGBRanker.fit).parameters)
ranker_names = list(sig.keys())
sig = OrderedDict(signature(xgb.dask.DaskXGBRanker.fit).parameters)
dranker_names = list(sig.keys())
for rn, drn in zip(ranker_names, dranker_names):
assert rn == drn
def run_quantile(self, name: str, local_cuda_client: Client) -> None:
if sys.platform.startswith("win"):
pytest.skip("Skipping dask tests on Windows")
exe = None
for possible_path in {
"./testxgboost",
"./build/testxgboost",
"../build/testxgboost",
"../gpu-build/testxgboost",
}:
if os.path.exists(possible_path):
exe = possible_path
assert exe, "No testxgboost executable found."
test = "--gtest_filter=GPUQuantile." + name
def runit(
worker_addr: str, rabit_args: Dict[str, Union[int, str]]
) -> subprocess.CompletedProcess:
port_env = ""
# setup environment for running the c++ part.
env = os.environ.copy()
env['DMLC_TRACKER_PORT'] = str(rabit_args['DMLC_TRACKER_PORT'])
env["DMLC_TRACKER_URI"] = str(rabit_args["DMLC_TRACKER_URI"])
return subprocess.run([str(exe), test], env=env, stdout=subprocess.PIPE)
workers = _get_client_workers(local_cuda_client)
rabit_args = local_cuda_client.sync(
dxgb._get_rabit_args, len(workers), None, local_cuda_client
)
futures = local_cuda_client.map(
runit, workers, pure=False, workers=workers, rabit_args=rabit_args
)
results = local_cuda_client.gather(futures)
for ret in results:
msg = ret.stdout.decode("utf-8")
assert msg.find("1 test from GPUQuantile") != -1, msg
assert ret.returncode == 0, msg
@pytest.mark.gtest
def test_quantile_basic(self, local_cuda_client: Client) -> None:
self.run_quantile("AllReduceBasic", local_cuda_client)
@pytest.mark.gtest
def test_quantile_same_on_all_workers(self, local_cuda_client: Client) -> None:
self.run_quantile("SameOnAllWorkers", local_cuda_client)
@pytest.mark.skipif(**tm.no_cupy())
def test_with_asyncio(local_cuda_client: Client) -> None:
address = local_cuda_client.scheduler.address
output = asyncio.run(run_from_dask_array_asyncio(address))
assert isinstance(output["booster"], xgboost.Booster)
assert isinstance(output["history"], dict)
async def run_from_dask_array_asyncio(scheduler_address: str) -> dxgb.TrainReturnT:
async with Client(scheduler_address, asynchronous=True) as client:
import cupy as cp
X, y, _ = generate_array()
X = X.map_blocks(cp.array)
y = y.map_blocks(cp.array)
m = await xgboost.dask.DaskDeviceQuantileDMatrix(client, X, y)
output = await xgboost.dask.train(client, {"tree_method": "gpu_hist"}, dtrain=m)
with_m = await xgboost.dask.predict(client, output, m)
with_X = await xgboost.dask.predict(client, output, X)
inplace = await xgboost.dask.inplace_predict(client, output, X)
assert isinstance(with_m, da.Array)
assert isinstance(with_X, da.Array)
assert isinstance(inplace, da.Array)
cp.testing.assert_allclose(
await client.compute(with_m), await client.compute(with_X)
)
cp.testing.assert_allclose(
await client.compute(with_m), await client.compute(inplace)
)
client.shutdown()
return output