Tests for empty dmatrix. (#5159)

2019-12-26 11:51:54 +08:00 · 2019-12-26 11:51:54 +08:00 · ced3660f60
commit ced3660f60
parent 298ebe68ac
2 changed files with 57 additions and 41 deletions
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@ -6,6 +6,11 @@ import unittest
 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)

+sys.path.append("tests/python")
+from test_with_dask import run_empty_dmatrix  # noqa
+from test_with_dask import generate_array     # noqa
+import testing as tm                          # noqa
+
 try:
    import dask.dataframe as dd
    from xgboost import dask as dxgb
@ -15,10 +20,6 @@ try:
 except ImportError:
    pass

-sys.path.append("tests/python")
-from test_with_dask import generate_array  # noqa
-import testing as tm                       # noqa
-

 class TestDistributedGPU(unittest.TestCase):
    @pytest.mark.skipif(**tm.no_dask())
@ -52,42 +53,7 @@ class TestDistributedGPU(unittest.TestCase):
    @pytest.mark.skipif(**tm.no_dask_cuda())
    @pytest.mark.mgpu
    def test_empty_dmatrix(self):
-
-        def _check_outputs(out, predictions):
-            assert isinstance(out['booster'], dxgb.Booster)
-            assert len(out['history']['validation']['rmse']) == 2
-            assert isinstance(predictions, np.ndarray)
-            assert predictions.shape[0] == 1
-
-        parameters = {'tree_method': 'gpu_hist', 'verbosity': 3,
-                      'debug_synchronize': True}
-
        with LocalCUDACluster() as cluster:
            with Client(cluster) as client:
-                kRows, kCols = 1, 97
-                X = dd.from_array(np.random.randn(kRows, kCols))
-                y = dd.from_array(np.random.rand(kRows))
-                dtrain = dxgb.DaskDMatrix(client, X, y)
-
-                out = dxgb.train(client, parameters,
-                                 dtrain=dtrain,
-                                 evals=[(dtrain, 'validation')],
-                                 num_boost_round=2)
-                predictions = dxgb.predict(client=client, model=out,
-                                           data=dtrain).compute()
-                _check_outputs(out, predictions)
-
-                # train has more rows than evals
-                valid = dtrain
-                kRows += 1
-                X = dd.from_array(np.random.randn(kRows, kCols))
-                y = dd.from_array(np.random.rand(kRows))
-                dtrain = dxgb.DaskDMatrix(client, X, y)
-
-                out = dxgb.train(client, parameters,
-                                 dtrain=dtrain,
-                                 evals=[(valid, 'validation')],
-                                 num_boost_round=2)
-                predictions = dxgb.predict(client=client, model=out,
-                                           data=valid).compute()
-                _check_outputs(out, predictions)
+                parameters = {'tree_method': 'gpu_hist'}
+                run_empty_dmatrix(client, parameters)
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@ -122,3 +122,53 @@ def test_classifier(client):

    assert prediction.ndim == 1
    assert prediction.shape[0] == kRows
+
+
+def run_empty_dmatrix(client, parameters):
+
+    def _check_outputs(out, predictions):
+        assert isinstance(out['booster'], xgb.dask.Booster)
+        assert len(out['history']['validation']['rmse']) == 2
+        assert isinstance(predictions, np.ndarray)
+        assert predictions.shape[0] == 1
+
+    kRows, kCols = 1, 97
+    X = dd.from_array(np.random.randn(kRows, kCols))
+    y = dd.from_array(np.random.rand(kRows))
+    dtrain = xgb.dask.DaskDMatrix(client, X, y)
+
+    out = xgb.dask.train(client, parameters,
+                         dtrain=dtrain,
+                         evals=[(dtrain, 'validation')],
+                         num_boost_round=2)
+    predictions = xgb.dask.predict(client=client, model=out,
+                                   data=dtrain).compute()
+    _check_outputs(out, predictions)
+
+    # train has more rows than evals
+    valid = dtrain
+    kRows += 1
+    X = dd.from_array(np.random.randn(kRows, kCols))
+    y = dd.from_array(np.random.rand(kRows))
+    dtrain = xgb.dask.DaskDMatrix(client, X, y)
+
+    out = xgb.dask.train(client, parameters,
+                         dtrain=dtrain,
+                         evals=[(valid, 'validation')],
+                         num_boost_round=2)
+    predictions = xgb.dask.predict(client=client, model=out,
+                                   data=valid).compute()
+    _check_outputs(out, predictions)
+
+
+# No test for Exact, as empty DMatrix handling are mostly for distributed
+# environment and Exact doesn't support it.
+
+def test_empty_dmatrix_hist(client):
+    parameters = {'tree_method': 'hist'}
+    run_empty_dmatrix(client, parameters)
+
+
+def test_empty_dmatrix_approx(client):
+    parameters = {'tree_method': 'approx'}
+    run_empty_dmatrix(client, parameters)