Run training with empty DMatrix. (#4990)

This makes GPU Hist robust in distributed environment as some workers might not be associated with any data in either training or evaluation. * Disable rabit mock test for now: See #5012 . * Disable dask-cudf test at prediction for now: See #5003 * Launch dask job for all workers despite they might not have any data. * Check 0 rows in elementwise evaluation metrics. Using AUC and AUC-PR still throws an error. See #4663 for a robust fix. * Add tests for edge cases. * Add `LaunchKernel` wrapper handling zero sized grid. * Move some parts of allreducer into a cu file. * Don't validate feature names when the booster is empty. * Sync number of columns in DMatrix. As num_feature is required to be the same across all workers in data split mode. * Filtering in dask interface now by default syncs all booster that's not empty, instead of using rank 0. * Fix Jenkins' GPU tests. * Install dask-cuda from source in Jenkins' test. Now all tests are actually running. * Restore GPU Hist tree synchronization test. * Check UUID of running devices. The check is only performed on CUDA version >= 10.x, as 9.x doesn't have UUID field. * Fix CMake policy and project variables. Use xgboost_SOURCE_DIR uniformly, add policy for CMake >= 3.13. * Fix copying data to CPU * Fix race condition in cpu predictor. * Fix duplicated DMatrix construction. * Don't download extra nccl in CI script.
2019-11-06 16:13:13 +08:00
parent 807a244517
commit 7663de956c
44 changed files with 603 additions and 272 deletions
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -2,6 +2,7 @@ import numpy as np
 import sys
 import unittest
 import pytest
+import xgboost

 sys.path.append("tests/python")
 from regression_test_utilities import run_suite, parameter_combinations, \
@@ -21,7 +22,8 @@ datasets = ["Boston", "Cancer", "Digits", "Sparse regression",

 class TestGPU(unittest.TestCase):
    def test_gpu_hist(self):
-        test_param = parameter_combinations({'gpu_id': [0], 'max_depth': [2, 8],
+        test_param = parameter_combinations({'gpu_id': [0],
+                                             'max_depth': [2, 8],
                                             'max_leaves': [255, 4],
                                             'max_bin': [2, 256],
                                             'grow_policy': ['lossguide']})
@@ -36,6 +38,31 @@ class TestGPU(unittest.TestCase):
            cpu_results = run_suite(param, select_datasets=datasets)
            assert_gpu_results(cpu_results, gpu_results)

+    def test_with_empty_dmatrix(self):
+        # FIXME(trivialfis): This should be done with all updaters
+        kRows = 0
+        kCols = 100
+
+        X = np.empty((kRows, kCols))
+        y = np.empty((kRows))
+
+        dtrain = xgboost.DMatrix(X, y)
+
+        bst = xgboost.train({'verbosity': 2,
+                             'tree_method': 'gpu_hist',
+                             'gpu_id': 0},
+                            dtrain,
+                            verbose_eval=True,
+                            num_boost_round=6,
+                            evals=[(dtrain, 'Train')])
+
+        kRows = 100
+        X = np.random.randn(kRows, kCols)
+
+        dtest = xgboost.DMatrix(X)
+        predictions = bst.predict(dtest)
+        np.testing.assert_allclose(predictions, 0.5, 1e-6)
+
    @pytest.mark.mgpu
    def test_specified_gpu_id_gpu_update(self):
        variable_param = {'gpu_id': [1],
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@@ -1,45 +1,94 @@
 import sys
 import pytest
+import numpy as np
+import unittest

 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)

 try:
-    from distributed.utils_test import client, loop, cluster_fixture
    import dask.dataframe as dd
    from xgboost import dask as dxgb
+    from dask_cuda import LocalCUDACluster
+    from dask.distributed import Client
    import cudf
 except ImportError:
-    client = None
-    loop = None
-    cluster_fixture = None
    pass

 sys.path.append("tests/python")
-from test_with_dask import generate_array
-import testing as tm
+from test_with_dask import generate_array  # noqa
+import testing as tm                       # noqa


-@pytest.mark.skipif(**tm.no_dask())
-@pytest.mark.skipif(**tm.no_cudf())
-@pytest.mark.skipif(**tm.no_dask_cudf())
-def test_dask_dataframe(client):
-    X, y = generate_array()
+class TestDistributedGPU(unittest.TestCase):
+    @pytest.mark.skipif(**tm.no_dask())
+    @pytest.mark.skipif(**tm.no_cudf())
+    @pytest.mark.skipif(**tm.no_dask_cudf())
+    @pytest.mark.skipif(**tm.no_dask_cuda())
+    def test_dask_dataframe(self):
+        with LocalCUDACluster() as cluster:
+            with Client(cluster) as client:
+                X, y = generate_array()

-    X = dd.from_dask_array(X)
-    y = dd.from_dask_array(y)
+                X = dd.from_dask_array(X)
+                y = dd.from_dask_array(y)

-    X = X.map_partitions(cudf.from_pandas)
-    y = y.map_partitions(cudf.from_pandas)
+                X = X.map_partitions(cudf.from_pandas)
+                y = y.map_partitions(cudf.from_pandas)

-    dtrain = dxgb.DaskDMatrix(client, X, y)
-    out = dxgb.train(client, {'tree_method': 'gpu_hist'},
-                     dtrain=dtrain,
-                     evals=[(dtrain, 'X')],
-                     num_boost_round=2)
+                dtrain = dxgb.DaskDMatrix(client, X, y)
+                out = dxgb.train(client, {'tree_method': 'gpu_hist'},
+                                 dtrain=dtrain,
+                                 evals=[(dtrain, 'X')],
+                                 num_boost_round=2)

-    assert isinstance(out['booster'], dxgb.Booster)
-    assert len(out['history']['X']['rmse']) == 2
+                assert isinstance(out['booster'], dxgb.Booster)
+                assert len(out['history']['X']['rmse']) == 2

-    predictions = dxgb.predict(out, dtrain)
-    predictions = predictions.compute()
+                # FIXME(trivialfis): Re-enable this after #5003  is fixed
+                # predictions = dxgb.predict(client, out, dtrain).compute()
+                # assert isinstance(predictions, np.ndarray)
+
+    @pytest.mark.skipif(**tm.no_dask())
+    @pytest.mark.skipif(**tm.no_dask_cuda())
+    @pytest.mark.mgpu
+    def test_empty_dmatrix(self):
+
+        def _check_outputs(out, predictions):
+            assert isinstance(out['booster'], dxgb.Booster)
+            assert len(out['history']['validation']['rmse']) == 2
+            assert isinstance(predictions, np.ndarray)
+            assert predictions.shape[0] == 1
+
+        parameters = {'tree_method': 'gpu_hist', 'verbosity': 3,
+                      'debug_synchronize': True}
+
+        with LocalCUDACluster() as cluster:
+            with Client(cluster) as client:
+                kRows, kCols = 1, 97
+                X = dd.from_array(np.random.randn(kRows, kCols))
+                y = dd.from_array(np.random.rand(kRows))
+                dtrain = dxgb.DaskDMatrix(client, X, y)
+
+                out = dxgb.train(client, parameters,
+                                 dtrain=dtrain,
+                                 evals=[(dtrain, 'validation')],
+                                 num_boost_round=2)
+                predictions = dxgb.predict(client=client, model=out,
+                                           data=dtrain).compute()
+                _check_outputs(out, predictions)
+
+                # train has more rows than evals
+                valid = dtrain
+                kRows += 1
+                X = dd.from_array(np.random.randn(kRows, kCols))
+                y = dd.from_array(np.random.rand(kRows))
+                dtrain = dxgb.DaskDMatrix(client, X, y)
+
+                out = dxgb.train(client, parameters,
+                                 dtrain=dtrain,
+                                 evals=[(valid, 'validation')],
+                                 num_boost_round=2)
+                predictions = dxgb.predict(client=client, model=out,
+                                           data=valid).compute()
+                _check_outputs(out, predictions)