Enhance inplace prediction. (#6653)

* Accept array interface for csr and array.
* Accept an optional proxy dmatrix for metainfo.

This constructs an explicit `_ProxyDMatrix` type in Python.

* Remove unused doc.
* Add strict output.
This commit is contained in:
Jiaming Yuan
2021-02-02 11:41:46 +08:00
committed by GitHub
parent 87ab1ad607
commit 411592a347
22 changed files with 955 additions and 530 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2020 by Contributors
* Copyright 2020-2021 by Contributors
*/
#include <gtest/gtest.h>
@@ -104,21 +104,24 @@ void TestInplacePrediction(dmlc::any x, std::string predictor,
}
HostDeviceVector<float> *p_out_predictions_0{nullptr};
learner->InplacePredict(x, "margin", std::numeric_limits<float>::quiet_NaN(),
learner->InplacePredict(x, nullptr, PredictionType::kMargin,
std::numeric_limits<float>::quiet_NaN(),
&p_out_predictions_0, 0, 2);
CHECK(p_out_predictions_0);
HostDeviceVector<float> predict_0 (p_out_predictions_0->Size());
predict_0.Copy(*p_out_predictions_0);
HostDeviceVector<float> *p_out_predictions_1{nullptr};
learner->InplacePredict(x, "margin", std::numeric_limits<float>::quiet_NaN(),
learner->InplacePredict(x, nullptr, PredictionType::kMargin,
std::numeric_limits<float>::quiet_NaN(),
&p_out_predictions_1, 2, 4);
CHECK(p_out_predictions_1);
HostDeviceVector<float> predict_1 (p_out_predictions_1->Size());
predict_1.Copy(*p_out_predictions_1);
HostDeviceVector<float>* p_out_predictions{nullptr};
learner->InplacePredict(x, "margin", std::numeric_limits<float>::quiet_NaN(),
learner->InplacePredict(x, nullptr, PredictionType::kMargin,
std::numeric_limits<float>::quiet_NaN(),
&p_out_predictions, 0, 4);
auto& h_pred = p_out_predictions->HostVector();

View File

@@ -11,8 +11,7 @@ import testing as tm
class TestDeviceQuantileDMatrix:
def test_dmatrix_numpy_init(self):
data = np.random.randn(5, 5)
with pytest.raises(TypeError,
match='is not supported for DeviceQuantileDMatrix'):
with pytest.raises(TypeError, match='is not supported'):
xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
@pytest.mark.skipif(**tm.no_cupy())

View File

@@ -141,6 +141,13 @@ class TestGPUPredict:
assert np.allclose(cpu_train_score, gpu_train_score)
assert np.allclose(cpu_test_score, gpu_test_score)
def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
import cupy as cp
dtrain.set_info(base_margin=base_margin)
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain)
cp.testing.assert_allclose(from_inplace, from_dmatrix)
@pytest.mark.skipif(**tm.no_cupy())
def test_inplace_predict_cupy(self):
import cupy as cp
@@ -175,6 +182,9 @@ class TestGPUPredict:
for i in range(10):
run_threaded_predict(X, rows, predict_dense)
base_margin = cp_rng.randn(rows)
self.run_inplace_base_margin(booster, dtrain, X, base_margin)
@pytest.mark.skipif(**tm.no_cudf())
def test_inplace_predict_cudf(self):
import cupy as cp
@@ -208,6 +218,9 @@ class TestGPUPredict:
for i in range(10):
run_threaded_predict(X, rows, predict_df)
base_margin = cudf.Series(rng.randn(rows))
self.run_inplace_base_margin(booster, dtrain, X, base_margin)
@given(strategies.integers(1, 10),
tm.dataset_strategy, shap_parameter_strategy)
@settings(deadline=None)

View File

@@ -80,20 +80,28 @@ def test_predict_leaf():
class TestInplacePredict:
'''Tests for running inplace prediction'''
@classmethod
def setup_class(cls):
cls.rows = 100
cls.cols = 10
cls.rng = np.random.RandomState(1994)
cls.X = cls.rng.randn(cls.rows, cls.cols)
cls.y = cls.rng.randn(cls.rows)
dtrain = xgb.DMatrix(cls.X, cls.y)
cls.booster = xgb.train({'tree_method': 'hist'},
dtrain, num_boost_round=10)
cls.test = xgb.DMatrix(cls.X[:10, ...])
def test_predict(self):
rows = 1000
cols = 10
booster = self.booster
X = self.X
test = self.test
np.random.seed(1994)
X = np.random.randn(rows, cols)
y = np.random.randn(rows)
dtrain = xgb.DMatrix(X, y)
booster = xgb.train({'tree_method': 'hist'},
dtrain, num_boost_round=10)
test = xgb.DMatrix(X[:10, ...])
predt_from_array = booster.inplace_predict(X[:10, ...])
predt_from_dmatrix = booster.predict(test)
@@ -111,7 +119,7 @@ class TestInplacePredict:
return np.all(copied_predt == inplace_predt)
for i in range(10):
run_threaded_predict(X, rows, predict_dense)
run_threaded_predict(X, self.rows, predict_dense)
def predict_csr(x):
inplace_predt = booster.inplace_predict(sparse.csr_matrix(x))
@@ -120,4 +128,14 @@ class TestInplacePredict:
return np.all(copied_predt == inplace_predt)
for i in range(10):
run_threaded_predict(X, rows, predict_csr)
run_threaded_predict(X, self.rows, predict_csr)
def test_base_margin(self):
booster = self.booster
base_margin = self.rng.randn(self.rows)
from_inplace = booster.inplace_predict(data=self.X, base_margin=base_margin)
dtrain = xgb.DMatrix(self.X, self.y, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain)
np.testing.assert_allclose(from_dmatrix, from_inplace)

View File

@@ -1,6 +1,5 @@
import numpy as np
import xgboost as xgb
import testing as tm
import pytest
try: