Add support inference on SYCL devices (#9800)

---------

Co-authored-by: Dmitry Razdoburdin <>
Co-authored-by: Nikolay Petrov <nikolay.a.petrov@intel.com>
Co-authored-by: Alexandra <alexandra.epanchinzeva@intel.com>
This commit is contained in:
Dmitry Razdoburdin
2023-12-04 09:15:57 +01:00
committed by GitHub
parent 7196c9d95e
commit 381f1d3dc9
31 changed files with 1369 additions and 1294 deletions

View File

@@ -18,92 +18,17 @@
namespace xgboost {
namespace {
void TestBasic(DMatrix* dmat) {
Context ctx;
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &ctx));
size_t const kRows = dmat->Info().num_row_;
size_t const kCols = dmat->Info().num_col_;
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
PredictionCacheEntry out_predictions;
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
cpu_predictor->PredictBatch(dmat, &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5);
}
// Test predict instance
auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
auto page = batch.GetView();
for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions;
cpu_predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
dmat->Info().IsColumnSplit());
ASSERT_EQ(instance_out_predictions[0], 1.5);
}
// Test predict leaf
HostDeviceVector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
for (auto v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
if (dmat->Info().IsColumnSplit()) {
// Predict contribution is not supported for column split.
return;
}
// Test predict contribution
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
} // anonymous namespace
TEST(CpuPredictor, Basic) {
Context ctx;
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
TestBasic(dmat.get());
TestBasic(dmat.get(), &ctx);
}
namespace {
void TestColumnSplit() {
Context ctx;
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
@@ -112,7 +37,7 @@ void TestColumnSplit() {
auto const rank = collective::GetRank();
dmat = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};
TestBasic(dmat.get());
TestBasic(dmat.get(), &ctx);
}
} // anonymous namespace
@@ -132,10 +57,11 @@ TEST(CpuPredictor, IterationRangeColmnSplit) {
}
TEST(CpuPredictor, ExternalMemory) {
Context ctx;
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
TestBasic(dmat.get());
TestBasic(dmat.get(), &ctx);
}
TEST(CpuPredictor, InplacePredict) {
@@ -235,12 +161,14 @@ TEST(CPUPredictor, CategoricalPredictionColumnSplit) {
}
TEST(CPUPredictor, CategoricalPredictLeaf) {
TestCategoricalPredictLeaf(false, false);
Context ctx;
TestCategoricalPredictLeaf(&ctx, false);
}
TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, false, true);
Context ctx;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, &ctx, true);
}
TEST(CpuPredictor, UpdatePredictionCache) {

View File

@@ -289,11 +289,13 @@ TEST_F(MGPUPredictorTest, CategoricalPredictionColumnSplit) {
}
TEST(GPUPredictor, CategoricalPredictLeaf) {
TestCategoricalPredictLeaf(true, false);
auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
TestCategoricalPredictLeaf(&ctx, false);
}
TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, true, true);
auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, &ctx, true);
}
TEST(GPUPredictor, PredictLeafBasic) {

View File

@@ -26,6 +26,79 @@
#include "xgboost/tree_model.h" // for RegTree
namespace xgboost {
void TestBasic(DMatrix* dmat, Context const *ctx) {
auto predictor = std::unique_ptr<Predictor>(CreatePredictorForTest(ctx));
size_t const kRows = dmat->Info().num_row_;
size_t const kCols = dmat->Info().num_col_;
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
gbm::GBTreeModel model = CreateTestModel(&mparam, ctx);
// Test predict batch
PredictionCacheEntry out_predictions;
predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
predictor->PredictBatch(dmat, &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5);
}
// Test predict instance
auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
auto page = batch.GetView();
for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions;
predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
dmat->Info().IsColumnSplit());
ASSERT_EQ(instance_out_predictions[0], 1.5);
}
// Test predict leaf
HostDeviceVector<float> leaf_out_predictions;
predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
for (auto v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
if (dmat->Info().IsColumnSplit()) {
// Predict contribution is not supported for column split.
return;
}
// Test predict contribution
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
predictor->PredictContribution(dmat, &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
TEST(Predictor, PredictionCache) {
size_t constexpr kRows = 16, kCols = 4;
@@ -64,7 +137,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
{"num_feature", std::to_string(kCols)},
{"num_class", std::to_string(kClasses)},
{"max_bin", std::to_string(bins)},
{"device", ctx->DeviceName()}});
{"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
learner->Configure();
for (size_t i = 0; i < kIters; ++i) {
@@ -151,7 +224,7 @@ std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMat
size_t iters, size_t forest = 1) {
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
learner->SetParams(
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
for (size_t i = 0; i < iters; ++i) {
learner->UpdateOneIter(i, dmat);
}
@@ -305,11 +378,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split) {
ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
}
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
}
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
size_t constexpr kCols = 10;
PredictionCacheEntry out_predictions;
@@ -320,10 +389,10 @@ void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
float left_weight = 1.3f;
float right_weight = 1.7f;
gbm::GBTreeModel model(&mparam, &ctx);
gbm::GBTreeModel model(&mparam, ctx);
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
std::vector<float> row(kCols);
row[split_ind] = split_cat;
@@ -363,7 +432,6 @@ void TestIterationRange(Context const* ctx) {
HostDeviceVector<float> out_predt_sliced;
HostDeviceVector<float> out_predt_ranged;
// margin
{
sliced->Predict(dmat, true, &out_predt_sliced, 0, 0, false, false, false, false, false);
learner->Predict(dmat, true, &out_predt_ranged, 0, lend, false, false, false, false, false);
@@ -519,6 +587,8 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
learner.reset(Learner::Create({Xy}));
learner->LoadModel(model);
learner->SetParam("device", ctx->DeviceName());
learner->Configure();
if (ctx->IsCUDA()) {
learner->SetParam("tree_method", "gpu_hist");

View File

@@ -34,6 +34,8 @@ inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context
inline auto CreatePredictorForTest(Context const* ctx) {
if (ctx->IsCPU()) {
return Predictor::Create("cpu_predictor", ctx);
} else if (ctx->IsSycl()) {
return Predictor::Create("sycl_predictor", ctx);
} else {
return Predictor::Create("gpu_predictor", ctx);
}
@@ -83,6 +85,8 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
}
}
void TestBasic(DMatrix* dmat, Context const * ctx);
// p_full and p_hist should come from the same data set.
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
@@ -98,7 +102,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split);
void TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu);
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split);
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split);
void TestIterationRange(Context const* ctx);