Tests for empty dmatrix. (#5159)

2019-12-26 11:51:54 +08:00 · 2019-12-26 11:51:54 +08:00 · ced3660f60
commit ced3660f60
parent 298ebe68ac
2 changed files with 57 additions and 41 deletions
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@ -6,6 +6,11 @@ import unittest
 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
 sys.path.append("tests/python")
 from test_with_dask import run_empty_dmatrix  # noqa
 from test_with_dask import generate_array     # noqa
 import testing as tm                          # noqa
 try:
    import dask.dataframe as dd
    from xgboost import dask as dxgb
@ -15,10 +20,6 @@ try:
 except ImportError:
    pass
 sys.path.append("tests/python")
 from test_with_dask import generate_array  # noqa
 import testing as tm                       # noqa
 class TestDistributedGPU(unittest.TestCase):
    @pytest.mark.skipif(**tm.no_dask())
@ -52,42 +53,7 @@ class TestDistributedGPU(unittest.TestCase):
    @pytest.mark.skipif(**tm.no_dask_cuda())
    @pytest.mark.mgpu
    def test_empty_dmatrix(self):
        def _check_outputs(out, predictions):
            assert isinstance(out['booster'], dxgb.Booster)
            assert len(out['history']['validation']['rmse']) == 2
            assert isinstance(predictions, np.ndarray)
            assert predictions.shape[0] == 1
        parameters = {'tree_method': 'gpu_hist', 'verbosity': 3,
                      'debug_synchronize': True}
        with LocalCUDACluster() as cluster:
            with Client(cluster) as client:
-                kRows, kCols = 1, 97
+                parameters = {'tree_method': 'gpu_hist'}
-                X = dd.from_array(np.random.randn(kRows, kCols))
+                run_empty_dmatrix(client, parameters)
                y = dd.from_array(np.random.rand(kRows))
                dtrain = dxgb.DaskDMatrix(client, X, y)
                out = dxgb.train(client, parameters,
                                 dtrain=dtrain,
                                 evals=[(dtrain, 'validation')],
                                 num_boost_round=2)
                predictions = dxgb.predict(client=client, model=out,
                                           data=dtrain).compute()
                _check_outputs(out, predictions)
                # train has more rows than evals
                valid = dtrain
                kRows += 1
                X = dd.from_array(np.random.randn(kRows, kCols))
                y = dd.from_array(np.random.rand(kRows))
                dtrain = dxgb.DaskDMatrix(client, X, y)
                out = dxgb.train(client, parameters,
                                 dtrain=dtrain,
                                 evals=[(valid, 'validation')],
                                 num_boost_round=2)
                predictions = dxgb.predict(client=client, model=out,
                                           data=valid).compute()
                _check_outputs(out, predictions)
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@ -122,3 +122,53 @@ def test_classifier(client):
    assert prediction.ndim == 1
    assert prediction.shape[0] == kRows
 def run_empty_dmatrix(client, parameters):
    def _check_outputs(out, predictions):
        assert isinstance(out['booster'], xgb.dask.Booster)
        assert len(out['history']['validation']['rmse']) == 2
        assert isinstance(predictions, np.ndarray)
        assert predictions.shape[0] == 1
    kRows, kCols = 1, 97
    X = dd.from_array(np.random.randn(kRows, kCols))
    y = dd.from_array(np.random.rand(kRows))
    dtrain = xgb.dask.DaskDMatrix(client, X, y)
    out = xgb.dask.train(client, parameters,
                         dtrain=dtrain,
                         evals=[(dtrain, 'validation')],
                         num_boost_round=2)
    predictions = xgb.dask.predict(client=client, model=out,
                                   data=dtrain).compute()
    _check_outputs(out, predictions)
    # train has more rows than evals
    valid = dtrain
    kRows += 1
    X = dd.from_array(np.random.randn(kRows, kCols))
    y = dd.from_array(np.random.rand(kRows))
    dtrain = xgb.dask.DaskDMatrix(client, X, y)
    out = xgb.dask.train(client, parameters,
                         dtrain=dtrain,
                         evals=[(valid, 'validation')],
                         num_boost_round=2)
    predictions = xgb.dask.predict(client=client, model=out,
                                   data=valid).compute()
    _check_outputs(out, predictions)
 # No test for Exact, as empty DMatrix handling are mostly for distributed
 # environment and Exact doesn't support it.
 def test_empty_dmatrix_hist(client):
    parameters = {'tree_method': 'hist'}
    run_empty_dmatrix(client, parameters)
 def test_empty_dmatrix_approx(client):
    parameters = {'tree_method': 'approx'}
    run_empty_dmatrix(client, parameters)