diff --git a/tests/python-gpu/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask.py index f0ae5e292..eb5ce6530 100644 --- a/tests/python-gpu/test_gpu_with_dask.py +++ b/tests/python-gpu/test_gpu_with_dask.py @@ -6,6 +6,11 @@ import unittest if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows", allow_module_level=True) +sys.path.append("tests/python") +from test_with_dask import run_empty_dmatrix # noqa +from test_with_dask import generate_array # noqa +import testing as tm # noqa + try: import dask.dataframe as dd from xgboost import dask as dxgb @@ -15,10 +20,6 @@ try: except ImportError: pass -sys.path.append("tests/python") -from test_with_dask import generate_array # noqa -import testing as tm # noqa - class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @@ -52,42 +53,7 @@ class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): - - def _check_outputs(out, predictions): - assert isinstance(out['booster'], dxgb.Booster) - assert len(out['history']['validation']['rmse']) == 2 - assert isinstance(predictions, np.ndarray) - assert predictions.shape[0] == 1 - - parameters = {'tree_method': 'gpu_hist', 'verbosity': 3, - 'debug_synchronize': True} - with LocalCUDACluster() as cluster: with Client(cluster) as client: - kRows, kCols = 1, 97 - X = dd.from_array(np.random.randn(kRows, kCols)) - y = dd.from_array(np.random.rand(kRows)) - dtrain = dxgb.DaskDMatrix(client, X, y) - - out = dxgb.train(client, parameters, - dtrain=dtrain, - evals=[(dtrain, 'validation')], - num_boost_round=2) - predictions = dxgb.predict(client=client, model=out, - data=dtrain).compute() - _check_outputs(out, predictions) - - # train has more rows than evals - valid = dtrain - kRows += 1 - X = dd.from_array(np.random.randn(kRows, kCols)) - y = dd.from_array(np.random.rand(kRows)) - dtrain = dxgb.DaskDMatrix(client, X, y) - - out = dxgb.train(client, parameters, - dtrain=dtrain, - evals=[(valid, 'validation')], - num_boost_round=2) - predictions = dxgb.predict(client=client, model=out, - data=valid).compute() - _check_outputs(out, predictions) + parameters = {'tree_method': 'gpu_hist'} + run_empty_dmatrix(client, parameters) diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py index 0b04e2822..f7dc7583d 100644 --- a/tests/python/test_with_dask.py +++ b/tests/python/test_with_dask.py @@ -122,3 +122,53 @@ def test_classifier(client): assert prediction.ndim == 1 assert prediction.shape[0] == kRows + + +def run_empty_dmatrix(client, parameters): + + def _check_outputs(out, predictions): + assert isinstance(out['booster'], xgb.dask.Booster) + assert len(out['history']['validation']['rmse']) == 2 + assert isinstance(predictions, np.ndarray) + assert predictions.shape[0] == 1 + + kRows, kCols = 1, 97 + X = dd.from_array(np.random.randn(kRows, kCols)) + y = dd.from_array(np.random.rand(kRows)) + dtrain = xgb.dask.DaskDMatrix(client, X, y) + + out = xgb.dask.train(client, parameters, + dtrain=dtrain, + evals=[(dtrain, 'validation')], + num_boost_round=2) + predictions = xgb.dask.predict(client=client, model=out, + data=dtrain).compute() + _check_outputs(out, predictions) + + # train has more rows than evals + valid = dtrain + kRows += 1 + X = dd.from_array(np.random.randn(kRows, kCols)) + y = dd.from_array(np.random.rand(kRows)) + dtrain = xgb.dask.DaskDMatrix(client, X, y) + + out = xgb.dask.train(client, parameters, + dtrain=dtrain, + evals=[(valid, 'validation')], + num_boost_round=2) + predictions = xgb.dask.predict(client=client, model=out, + data=valid).compute() + _check_outputs(out, predictions) + + +# No test for Exact, as empty DMatrix handling are mostly for distributed +# environment and Exact doesn't support it. + +def test_empty_dmatrix_hist(client): + parameters = {'tree_method': 'hist'} + run_empty_dmatrix(client, parameters) + + +def test_empty_dmatrix_approx(client): + parameters = {'tree_method': 'approx'} + run_empty_dmatrix(client, parameters)