Thread safe, inplace prediction. (#5389)

Normal prediction with DMatrix is now thread safe with locks.  Added inplace prediction is lock free thread safe.

When data is on device (cupy, cudf), the returned data is also on device.

* Implementation for numpy, csr, cudf and cupy.

* Implementation for dask.

* Remove sync in simple dmatrix.
This commit is contained in:
Jiaming Yuan
2020-03-30 15:35:28 +08:00
committed by GitHub
parent 7f980e9f83
commit 6601a641d7
25 changed files with 1217 additions and 167 deletions

View File

@@ -1,8 +1,12 @@
from __future__ import print_function
import sys
import unittest
import pytest
import numpy as np
import unittest
import xgboost as xgb
sys.path.append("tests/python")
import testing as tm
from test_predict import run_threaded_predict # noqa
rng = np.random.RandomState(1994)
@@ -111,3 +115,65 @@ class TestGPUPredict(unittest.TestCase):
assert np.allclose(cpu_train_score, gpu_train_score)
assert np.allclose(cpu_test_score, gpu_test_score)
@pytest.mark.skipif(**tm.no_cupy())
def test_inplace_predict_cupy(self):
import cupy as cp
rows = 1000
cols = 10
cp_rng = cp.random.RandomState(1994)
cp.random.set_random_state(cp_rng)
X = cp.random.randn(rows, cols)
y = cp.random.randn(rows)
dtrain = xgb.DMatrix(X, y)
booster = xgb.train({'tree_method': 'gpu_hist'},
dtrain, num_boost_round=10)
test = xgb.DMatrix(X[:10, ...])
predt_from_array = booster.inplace_predict(X[:10, ...])
predt_from_dmatrix = booster.predict(test)
cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)
def predict_dense(x):
inplace_predt = booster.inplace_predict(x)
d = xgb.DMatrix(x)
copied_predt = cp.array(booster.predict(d))
return cp.all(copied_predt == inplace_predt)
for i in range(10):
run_threaded_predict(X, rows, predict_dense)
@pytest.mark.skipif(**tm.no_cudf())
def test_inplace_predict_cudf(self):
import cupy as cp
import cudf
import pandas as pd
rows = 1000
cols = 10
rng = np.random.RandomState(1994)
X = rng.randn(rows, cols)
X = pd.DataFrame(X)
y = rng.randn(rows)
X = cudf.from_pandas(X)
dtrain = xgb.DMatrix(X, y)
booster = xgb.train({'tree_method': 'gpu_hist'},
dtrain, num_boost_round=10)
test = xgb.DMatrix(X)
predt_from_array = booster.inplace_predict(X)
predt_from_dmatrix = booster.predict(test)
cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)
def predict_df(x):
inplace_predt = booster.inplace_predict(x)
d = xgb.DMatrix(x)
copied_predt = cp.array(booster.predict(d))
return cp.all(copied_predt == inplace_predt)
for i in range(10):
run_threaded_predict(X, rows, predict_df)