[dask] dask cudf inplace prediction. (#5512)

* Add inplace prediction for dask-cudf. * Remove Dockerfile.release, since it's not used anywhere * Use Conda exclusively in CUDF and GPU containers * Improve cupy memory copying. * Add skip marks to tests. * Add mgpu-cudf category on the CI to run all distributed tests. Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
2020-04-15 18:15:51 +08:00
parent ca4e05660e
commit 8b04736b81
15 changed files with 97 additions and 87 deletions
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -62,6 +62,7 @@ class TestGPUPredict(unittest.TestCase):

    # Test case for a bug where multiple batch predictions made on a
    # test set produce incorrect results
+    @pytest.mark.skipif(**tm.no_sklearn())
    def test_multi_predict(self):
        from sklearn.datasets import make_regression
        from sklearn.model_selection import train_test_split
@@ -89,6 +90,7 @@ class TestGPUPredict(unittest.TestCase):
        assert np.allclose(predict0, predict1)
        assert np.allclose(predict0, cpu_predict)

+    @pytest.mark.skipif(**tm.no_sklearn())
    def test_sklearn(self):
        m, n = 15000, 14
        tr_size = 2500
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@@ -27,6 +27,7 @@ class TestDistributedGPU(unittest.TestCase):
    @pytest.mark.skipif(**tm.no_cudf())
    @pytest.mark.skipif(**tm.no_dask_cudf())
    @pytest.mark.skipif(**tm.no_dask_cuda())
+    @pytest.mark.mgpu
    def test_dask_dataframe(self):
        with LocalCUDACluster() as cluster:
            with Client(cluster) as client:
@@ -51,18 +52,18 @@ class TestDistributedGPU(unittest.TestCase):
                predictions = dxgb.predict(client, out, dtrain).compute()
                assert isinstance(predictions, np.ndarray)

-                # There's an error with cudf saying `concat_cudf` got an
-                # expected argument `ignore_index`.  So the test here is just
-                # place holder.
-
-                # series_predictions = dxgb.inplace_predict(client, out, X)
-                # assert isinstance(series_predictions, dd.Series)
+                series_predictions = dxgb.inplace_predict(client, out, X)
+                assert isinstance(series_predictions, dd.Series)
+                series_predictions = series_predictions.compute()

                single_node = out['booster'].predict(
                    xgboost.DMatrix(X.compute()))
+
                cupy.testing.assert_allclose(single_node, predictions)
+                cupy.testing.assert_allclose(single_node, series_predictions)

    @pytest.mark.skipif(**tm.no_cupy())
+    @pytest.mark.mgpu
    def test_dask_array(self):
        with LocalCUDACluster() as cluster:
            with Client(cluster) as client:
@@ -82,8 +83,12 @@ class TestDistributedGPU(unittest.TestCase):
                single_node = out['booster'].predict(
                    xgboost.DMatrix(X.compute()))
                np.testing.assert_allclose(single_node, from_dmatrix)
+                device = cupy.cuda.runtime.getDevice()
+                assert device == inplace_predictions.device.id
+                single_node = cupy.array(single_node)
+                assert device == single_node.device.id
                cupy.testing.assert_allclose(
-                    cupy.array(single_node),
+                    single_node,
                    inplace_predictions)


--- a/tests/python-gpu/test_monotonic_constraints.py
+++ b/tests/python-gpu/test_monotonic_constraints.py
@@ -1,12 +1,12 @@
-from __future__ import print_function
-
+import sys
 import numpy as np
-from sklearn.datasets import make_regression

 import unittest
 import pytest

 import xgboost as xgb
+sys.path.append("tests/python")
+import testing as tm

 rng = np.random.RandomState(1994)

@@ -20,6 +20,7 @@ def non_increasing(L):


 def assert_constraint(constraint, tree_method):
+    from sklearn.datasets import make_regression
    n = 1000
    X, y = make_regression(n, random_state=rng, n_features=1, n_informative=1)
    dtrain = xgb.DMatrix(X, y)
@@ -35,12 +36,13 @@ def assert_constraint(constraint, tree_method):
        assert non_increasing(pred)


-@pytest.mark.gpu
 class TestMonotonicConstraints(unittest.TestCase):
+    @pytest.mark.skipif(**tm.no_sklearn())
    def test_exact(self):
        assert_constraint(1, 'exact')
        assert_constraint(-1, 'exact')

+    @pytest.mark.skipif(**tm.no_sklearn())
    def test_gpu_hist(self):
        assert_constraint(1, 'gpu_hist')
        assert_constraint(-1, 'gpu_hist')