Thread safe, inplace prediction. (#5389)
Normal prediction with DMatrix is now thread safe with locks. Added inplace prediction is lock free thread safe. When data is on device (cupy, cudf), the returned data is also on device. * Implementation for numpy, csr, cudf and cupy. * Implementation for dask. * Remove sync in simple dmatrix.
This commit is contained in:
@@ -1,8 +1,12 @@
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import unittest
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import unittest
|
||||
import xgboost as xgb
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
from test_predict import run_threaded_predict # noqa
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
@@ -111,3 +115,65 @@ class TestGPUPredict(unittest.TestCase):
|
||||
|
||||
assert np.allclose(cpu_train_score, gpu_train_score)
|
||||
assert np.allclose(cpu_test_score, gpu_test_score)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_inplace_predict_cupy(self):
|
||||
import cupy as cp
|
||||
rows = 1000
|
||||
cols = 10
|
||||
cp_rng = cp.random.RandomState(1994)
|
||||
cp.random.set_random_state(cp_rng)
|
||||
X = cp.random.randn(rows, cols)
|
||||
y = cp.random.randn(rows)
|
||||
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
|
||||
booster = xgb.train({'tree_method': 'gpu_hist'},
|
||||
dtrain, num_boost_round=10)
|
||||
test = xgb.DMatrix(X[:10, ...])
|
||||
predt_from_array = booster.inplace_predict(X[:10, ...])
|
||||
predt_from_dmatrix = booster.predict(test)
|
||||
|
||||
cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)
|
||||
|
||||
def predict_dense(x):
|
||||
inplace_predt = booster.inplace_predict(x)
|
||||
d = xgb.DMatrix(x)
|
||||
copied_predt = cp.array(booster.predict(d))
|
||||
return cp.all(copied_predt == inplace_predt)
|
||||
|
||||
for i in range(10):
|
||||
run_threaded_predict(X, rows, predict_dense)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_inplace_predict_cudf(self):
|
||||
import cupy as cp
|
||||
import cudf
|
||||
import pandas as pd
|
||||
rows = 1000
|
||||
cols = 10
|
||||
rng = np.random.RandomState(1994)
|
||||
X = rng.randn(rows, cols)
|
||||
X = pd.DataFrame(X)
|
||||
y = rng.randn(rows)
|
||||
|
||||
X = cudf.from_pandas(X)
|
||||
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
|
||||
booster = xgb.train({'tree_method': 'gpu_hist'},
|
||||
dtrain, num_boost_round=10)
|
||||
test = xgb.DMatrix(X)
|
||||
predt_from_array = booster.inplace_predict(X)
|
||||
predt_from_dmatrix = booster.predict(test)
|
||||
|
||||
cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)
|
||||
|
||||
def predict_df(x):
|
||||
inplace_predt = booster.inplace_predict(x)
|
||||
d = xgb.DMatrix(x)
|
||||
copied_predt = cp.array(booster.predict(d))
|
||||
return cp.all(copied_predt == inplace_predt)
|
||||
|
||||
for i in range(10):
|
||||
run_threaded_predict(X, rows, predict_df)
|
||||
|
||||
@@ -2,6 +2,7 @@ import sys
|
||||
import pytest
|
||||
import numpy as np
|
||||
import unittest
|
||||
import xgboost
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
||||
@@ -29,6 +30,7 @@ class TestDistributedGPU(unittest.TestCase):
|
||||
def test_dask_dataframe(self):
|
||||
with LocalCUDACluster() as cluster:
|
||||
with Client(cluster) as client:
|
||||
import cupy
|
||||
X, y = generate_array()
|
||||
|
||||
X = dd.from_dask_array(X)
|
||||
@@ -49,6 +51,42 @@ class TestDistributedGPU(unittest.TestCase):
|
||||
predictions = dxgb.predict(client, out, dtrain).compute()
|
||||
assert isinstance(predictions, np.ndarray)
|
||||
|
||||
# There's an error with cudf saying `concat_cudf` got an
|
||||
# expected argument `ignore_index`. So the test here is just
|
||||
# place holder.
|
||||
|
||||
# series_predictions = dxgb.inplace_predict(client, out, X)
|
||||
# assert isinstance(series_predictions, dd.Series)
|
||||
|
||||
single_node = out['booster'].predict(
|
||||
xgboost.DMatrix(X.compute()))
|
||||
cupy.testing.assert_allclose(single_node, predictions)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_dask_array(self):
|
||||
with LocalCUDACluster() as cluster:
|
||||
with Client(cluster) as client:
|
||||
import cupy
|
||||
X, y = generate_array()
|
||||
|
||||
X = X.map_blocks(cupy.asarray)
|
||||
y = y.map_blocks(cupy.asarray)
|
||||
dtrain = dxgb.DaskDMatrix(client, X, y)
|
||||
out = dxgb.train(client, {'tree_method': 'gpu_hist'},
|
||||
dtrain=dtrain,
|
||||
evals=[(dtrain, 'X')],
|
||||
num_boost_round=2)
|
||||
from_dmatrix = dxgb.predict(client, out, dtrain).compute()
|
||||
inplace_predictions = dxgb.inplace_predict(
|
||||
client, out, X).compute()
|
||||
single_node = out['booster'].predict(
|
||||
xgboost.DMatrix(X.compute()))
|
||||
np.testing.assert_allclose(single_node, from_dmatrix)
|
||||
cupy.testing.assert_allclose(
|
||||
cupy.array(single_node),
|
||||
inplace_predictions)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||
@pytest.mark.mgpu
|
||||
|
||||
Reference in New Issue
Block a user