Implement GPU predict leaf. (#6187)

This commit is contained in:
Jiaming Yuan
2020-11-11 17:33:47 +08:00
committed by GitHub
parent 7f101d1b33
commit 8a17610666
12 changed files with 252 additions and 42 deletions

View File

@@ -348,6 +348,13 @@ RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
gen.GenerateDense(&out->Info().labels_);
}
}
if (device_ >= 0) {
out->Info().labels_.SetDevice(device_);
for (auto const& page : out->GetBatches<SparsePage>()) {
page.data.SetDevice(device_);
page.offset.SetDevice(device_);
}
}
return out;
}

View File

@@ -46,9 +46,10 @@ TEST(CpuPredictor, Basic) {
}
// Test predict leaf
std::vector<float> leaf_out_predictions;
HostDeviceVector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
for (auto v : leaf_out_predictions) {
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
for (auto v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
@@ -112,10 +113,11 @@ TEST(CpuPredictor, ExternalMemory) {
}
// Test predict leaf
std::vector<float> leaf_out_predictions;
HostDeviceVector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
ASSERT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
for (const auto& v : leaf_out_predictions) {
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
ASSERT_EQ(h_leaf_out_predictions.size(), dmat->Info().num_row_);
for (const auto& v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}

View File

@@ -190,6 +190,7 @@ TEST(GPUPredictor, ShapStump) {
EXPECT_EQ(phis[4], 0.0);
EXPECT_EQ(phis[5], param.base_score);
}
TEST(GPUPredictor, Shap) {
LearnerModelParam param;
param.num_feature = 1;
@@ -224,5 +225,28 @@ TEST(GPUPredictor, Shap) {
TEST(GPUPredictor, CategoricalPrediction) {
TestCategoricalPrediction("gpu_predictor");
}
TEST(GPUPredictor, PredictLeafBasic) {
size_t constexpr kRows = 5, kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(0).GenerateDMatrix();
auto lparam = CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Predictor> gpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
gpu_predictor->Configure({});
LearnerModelParam param;
param.num_feature = kCols;
param.base_score = 0.0;
param.num_output_group = 1;
gbm::GBTreeModel model = CreateTestModel(&param);
HostDeviceVector<float> leaf_out_predictions;
gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
for (auto v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
}
} // namespace predictor
} // namespace xgboost

View File

@@ -1,4 +1,5 @@
import sys
import json
import unittest
import pytest
@@ -9,6 +10,7 @@ from hypothesis import given, strategies, assume, settings, note
sys.path.append("tests/python")
import testing as tm
from test_predict import run_threaded_predict # noqa
from test_predict import run_predict_leaf # noqa
rng = np.random.RandomState(1994)
@@ -18,6 +20,11 @@ shap_parameter_strategy = strategies.fixed_dictionaries({
'num_parallel_tree': strategies.sampled_from([1, 10]),
}).filter(lambda x: x['max_depth'] > 0 or x['max_leaves'] > 0)
predict_parameter_strategy = strategies.fixed_dictionaries({
'max_depth': strategies.integers(1, 8),
'num_parallel_tree': strategies.sampled_from([1, 4]),
})
class TestGPUPredict(unittest.TestCase):
def test_predict(self):
@@ -223,3 +230,34 @@ class TestGPUPredict(unittest.TestCase):
assert np.allclose(np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
margin,
1e-3, 1e-3)
def test_predict_leaf_basic(self):
gpu_leaf = run_predict_leaf('gpu_predictor')
cpu_leaf = run_predict_leaf('cpu_predictor')
np.testing.assert_equal(gpu_leaf, cpu_leaf)
def run_predict_leaf_booster(self, param, num_rounds, dataset):
param = dataset.set_params(param)
m = dataset.get_dmat()
booster = xgb.train(param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds)
booster.set_param({'predictor': 'cpu_predictor'})
cpu_leaf = booster.predict(m, pred_leaf=True)
booster.set_param({'predictor': 'gpu_predictor'})
gpu_leaf = booster.predict(m, pred_leaf=True)
np.testing.assert_equal(cpu_leaf, gpu_leaf)
@given(predict_parameter_strategy, tm.dataset_strategy)
@settings(deadline=None)
def test_predict_leaf_gbtree(self, param, dataset):
param['booster'] = 'gbtree'
param['tree_method'] = 'gpu_hist'
self.run_predict_leaf_booster(param, 10, dataset)
@given(predict_parameter_strategy, tm.dataset_strategy)
@settings(deadline=None)
def test_predict_leaf_dart(self, param, dataset):
param['booster'] = 'dart'
param['tree_method'] = 'gpu_hist'
self.run_predict_leaf_booster(param, 10, dataset)

View File

@@ -23,6 +23,49 @@ def run_threaded_predict(X, rows, predict_func):
assert f.result()
def run_predict_leaf(predictor):
rows = 100
cols = 4
classes = 5
num_parallel_tree = 4
num_boost_round = 10
rng = np.random.RandomState(1994)
X = rng.randn(rows, cols)
y = rng.randint(low=0, high=classes, size=rows)
m = xgb.DMatrix(X, y)
booster = xgb.train(
{'num_parallel_tree': num_parallel_tree, 'num_class': classes,
'predictor': predictor, 'tree_method': 'hist'}, m,
num_boost_round=num_boost_round)
empty = xgb.DMatrix(np.ones(shape=(0, cols)))
empty_leaf = booster.predict(empty, pred_leaf=True)
assert empty_leaf.shape[0] == 0
leaf = booster.predict(m, pred_leaf=True)
assert leaf.shape[0] == rows
assert leaf.shape[1] == classes * num_parallel_tree * num_boost_round
for i in range(rows):
row = leaf[i, ...]
for j in range(num_boost_round):
start = classes * num_parallel_tree * j
end = classes * num_parallel_tree * (j + 1)
layer = row[start: end]
for c in range(classes):
tree_group = layer[c * num_parallel_tree:
(c+1) * num_parallel_tree]
assert tree_group.shape[0] == num_parallel_tree
# no subsampling so tree in same forest should output same
# leaf.
assert np.all(tree_group == tree_group[0])
return leaf
def test_predict_leaf():
run_predict_leaf('cpu_predictor')
class TestInplacePredict(unittest.TestCase):
'''Tests for running inplace prediction'''
def test_predict(self):