[dask] dask cudf inplace prediction. (#5512)

* Add inplace prediction for dask-cudf.

* Remove Dockerfile.release, since it's not used anywhere

* Use Conda exclusively in CUDF and GPU containers

* Improve cupy memory copying.

* Add skip marks to tests.

* Add mgpu-cudf category on the CI to run all distributed tests.

Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan
2020-04-15 18:15:51 +08:00
committed by GitHub
parent ca4e05660e
commit 8b04736b81
15 changed files with 97 additions and 87 deletions

View File

@@ -62,6 +62,7 @@ class TestGPUPredict(unittest.TestCase):
# Test case for a bug where multiple batch predictions made on a
# test set produce incorrect results
@pytest.mark.skipif(**tm.no_sklearn())
def test_multi_predict(self):
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
@@ -89,6 +90,7 @@ class TestGPUPredict(unittest.TestCase):
assert np.allclose(predict0, predict1)
assert np.allclose(predict0, cpu_predict)
@pytest.mark.skipif(**tm.no_sklearn())
def test_sklearn(self):
m, n = 15000, 14
tr_size = 2500

View File

@@ -27,6 +27,7 @@ class TestDistributedGPU(unittest.TestCase):
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_dask_cudf())
@pytest.mark.skipif(**tm.no_dask_cuda())
@pytest.mark.mgpu
def test_dask_dataframe(self):
with LocalCUDACluster() as cluster:
with Client(cluster) as client:
@@ -51,18 +52,18 @@ class TestDistributedGPU(unittest.TestCase):
predictions = dxgb.predict(client, out, dtrain).compute()
assert isinstance(predictions, np.ndarray)
# There's an error with cudf saying `concat_cudf` got an
# expected argument `ignore_index`. So the test here is just
# place holder.
# series_predictions = dxgb.inplace_predict(client, out, X)
# assert isinstance(series_predictions, dd.Series)
series_predictions = dxgb.inplace_predict(client, out, X)
assert isinstance(series_predictions, dd.Series)
series_predictions = series_predictions.compute()
single_node = out['booster'].predict(
xgboost.DMatrix(X.compute()))
cupy.testing.assert_allclose(single_node, predictions)
cupy.testing.assert_allclose(single_node, series_predictions)
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_dask_array(self):
with LocalCUDACluster() as cluster:
with Client(cluster) as client:
@@ -82,8 +83,12 @@ class TestDistributedGPU(unittest.TestCase):
single_node = out['booster'].predict(
xgboost.DMatrix(X.compute()))
np.testing.assert_allclose(single_node, from_dmatrix)
device = cupy.cuda.runtime.getDevice()
assert device == inplace_predictions.device.id
single_node = cupy.array(single_node)
assert device == single_node.device.id
cupy.testing.assert_allclose(
cupy.array(single_node),
single_node,
inplace_predictions)

View File

@@ -1,12 +1,12 @@
from __future__ import print_function
import sys
import numpy as np
from sklearn.datasets import make_regression
import unittest
import pytest
import xgboost as xgb
sys.path.append("tests/python")
import testing as tm
rng = np.random.RandomState(1994)
@@ -20,6 +20,7 @@ def non_increasing(L):
def assert_constraint(constraint, tree_method):
from sklearn.datasets import make_regression
n = 1000
X, y = make_regression(n, random_state=rng, n_features=1, n_informative=1)
dtrain = xgb.DMatrix(X, y)
@@ -35,12 +36,13 @@ def assert_constraint(constraint, tree_method):
assert non_increasing(pred)
@pytest.mark.gpu
class TestMonotonicConstraints(unittest.TestCase):
@pytest.mark.skipif(**tm.no_sklearn())
def test_exact(self):
assert_constraint(1, 'exact')
assert_constraint(-1, 'exact')
@pytest.mark.skipif(**tm.no_sklearn())
def test_gpu_hist(self):
assert_constraint(1, 'gpu_hist')
assert_constraint(-1, 'gpu_hist')