Add support inference on SYCL devices (#9800)

--------- Co-authored-by: Dmitry Razdoburdin <> Co-authored-by: Nikolay Petrov <nikolay.a.petrov@intel.com> Co-authored-by: Alexandra <alexandra.epanchinzeva@intel.com>
2023-12-04 09:15:57 +01:00
parent 7196c9d95e
commit 381f1d3dc9
31 changed files with 1369 additions and 1294 deletions
--- a/tests/cpp/CMakeLists.txt
+++ b/tests/cpp/CMakeLists.txt
@@ -13,9 +13,9 @@ if(USE_CUDA)
  list(APPEND TEST_SOURCES ${CUDA_TEST_SOURCES})
 endif()

-file(GLOB_RECURSE ONEAPI_TEST_SOURCES "plugin/*_oneapi.cc")
-if(NOT PLUGIN_UPDATER_ONEAPI)
-  list(REMOVE_ITEM TEST_SOURCES ${ONEAPI_TEST_SOURCES})
+file(GLOB_RECURSE SYCL_TEST_SOURCES "plugin/test_sycl_*.cc")
+if(NOT PLUGIN_SYCL)
+  list(REMOVE_ITEM TEST_SOURCES ${SYCL_TEST_SOURCES})
 endif()

 if(PLUGIN_FEDERATED)
--- a/tests/cpp/plugin/test_predictor_oneapi.cc
+++ b/tests/cpp/plugin/test_predictor_oneapi.cc
@@ -1,168 +0,0 @@
-/*!
- * Copyright 2017-2020 XGBoost contributors
- */
-#include <gtest/gtest.h>
-#include <xgboost/predictor.h>
-
-#include "../../../src/data/adapter.h"
-#include "../../../src/gbm/gbtree_model.h"
-#include "../filesystem.h"  // dmlc::TemporaryDirectory
-#include "../helpers.h"
-#include "../predictor/test_predictor.h"
-
-namespace xgboost {
-TEST(Plugin, OneAPIPredictorBasic) {
-  auto lparam = MakeCUDACtx(0);
-  std::unique_ptr<Predictor> oneapi_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("oneapi_predictor", &lparam));
-
-  int kRows = 5;
-  int kCols = 5;
-
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.base_score = 0.0;
-  param.num_output_group = 1;
-
-  gbm::GBTreeModel model = CreateTestModel(&param);
-
-  auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
-
-  // Test predict batch
-  PredictionCacheEntry out_predictions;
-  oneapi_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
-  ASSERT_EQ(model.trees.size(), out_predictions.version);
-  std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
-  for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
-    ASSERT_EQ(out_predictions_h[i], 1.5);
-  }
-
-  // Test predict instance
-  auto const &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
-  for (size_t i = 0; i < batch.Size(); i++) {
-    std::vector<float> instance_out_predictions;
-    oneapi_predictor->PredictInstance(batch[i], &instance_out_predictions, model);
-    ASSERT_EQ(instance_out_predictions[0], 1.5);
-  }
-
-  // Test predict leaf
-  std::vector<float> leaf_out_predictions;
-  oneapi_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
-  for (auto v : leaf_out_predictions) {
-    ASSERT_EQ(v, 0);
-  }
-
-  // Test predict contribution
-  std::vector<float> out_contribution;
-  oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model);
-  ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
-  for (size_t i = 0; i < out_contribution.size(); ++i) {
-    auto const& contri = out_contribution[i];
-    // shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
-    if ((i+1) % (kCols+1) == 0) {
-      ASSERT_EQ(out_contribution.back(), 1.5f);
-    } else {
-      ASSERT_EQ(contri, 0);
-    }
-  }
-  // Test predict contribution (approximate method)
-  oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model, 0, nullptr, true);
-  for (size_t i = 0; i < out_contribution.size(); ++i) {
-    auto const& contri = out_contribution[i];
-    // shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
-    if ((i+1) % (kCols+1) == 0) {
-      ASSERT_EQ(out_contribution.back(), 1.5f);
-    } else {
-      ASSERT_EQ(contri, 0);
-    }
-  }
-}
-
-TEST(Plugin, OneAPIPredictorExternalMemory) {
-  dmlc::TemporaryDirectory tmpdir;
-  std::string filename = tmpdir.path + "/big.libsvm";
-  std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(12, 64, filename);
-  auto lparam = MakeCUDACtx(0);
-
-  std::unique_ptr<Predictor> oneapi_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("oneapi_predictor", &lparam));
-
-  LearnerModelParam param;
-  param.base_score = 0;
-  param.num_feature = dmat->Info().num_col_;
-  param.num_output_group = 1;
-
-  gbm::GBTreeModel model = CreateTestModel(&param);
-
-  // Test predict batch
-  PredictionCacheEntry out_predictions;
-  oneapi_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
-  std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();
-  ASSERT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_);
-  for (const auto& v : out_predictions_h) {
-    ASSERT_EQ(v, 1.5);
-  }
-
-  // Test predict leaf
-  std::vector<float> leaf_out_predictions;
-  oneapi_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
-  ASSERT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
-  for (const auto& v : leaf_out_predictions) {
-    ASSERT_EQ(v, 0);
-  }
-
-  // Test predict contribution
-  std::vector<float> out_contribution;
-  oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model);
-  ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
-  for (size_t i = 0; i < out_contribution.size(); ++i) {
-    auto const& contri = out_contribution[i];
-    // shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
-    if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
-      ASSERT_EQ(out_contribution.back(), 1.5f);
-    } else {
-      ASSERT_EQ(contri, 0);
-    }
-  }
-
-  // Test predict contribution (approximate method)
-  std::vector<float> out_contribution_approximate;
-  oneapi_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, 0, nullptr, true);
-  ASSERT_EQ(out_contribution_approximate.size(),
-            dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
-  for (size_t i = 0; i < out_contribution.size(); ++i) {
-    auto const& contri = out_contribution[i];
-    // shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
-    if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
-      ASSERT_EQ(out_contribution.back(), 1.5f);
-    } else {
-      ASSERT_EQ(contri, 0);
-    }
-  }
-}
-
-TEST(Plugin, OneAPIPredictorInplacePredict) {
-  bst_row_t constexpr kRows{128};
-  bst_feature_t constexpr kCols{64};
-  auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(-1);
-  {
-    HostDeviceVector<float> data;
-    gen.GenerateDense(&data);
-    ASSERT_EQ(data.Size(), kRows * kCols);
-    std::shared_ptr<data::DenseAdapter> x{
-      new data::DenseAdapter(data.HostPointer(), kRows, kCols)};
-    TestInplacePrediction(x, "oneapi_predictor", kRows, kCols, -1);
-  }
-
-  {
-    HostDeviceVector<float> data;
-    HostDeviceVector<bst_row_t> rptrs;
-    HostDeviceVector<bst_feature_t> columns;
-    gen.GenerateCSR(&data, &rptrs, &columns);
-    std::shared_ptr<data::CSRAdapter> x{new data::CSRAdapter(
-        rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), kRows,
-        data.Size(), kCols)};
-    TestInplacePrediction(x, "oneapi_predictor", kRows, kCols, -1);
-  }
-}
-}  // namespace xgboost
--- a/tests/cpp/plugin/test_regression_obj_oneapi.cc
+++ b/tests/cpp/plugin/test_regression_obj_oneapi.cc
@@ -1,176 +0,0 @@
-/*!
- * Copyright 2017-2019 XGBoost contributors
- */
-#include <gtest/gtest.h>
-#include <xgboost/objective.h>
-#include <xgboost/context.h>
-#include <xgboost/json.h>
-#include "../helpers.h"
-namespace xgboost {
-
-TEST(Plugin, LinearRegressionGPairOneAPI) {
-  Context tparam = MakeCUDACtx(0);
-  std::vector<std::pair<std::string, std::string>> args;
-
-  std::unique_ptr<ObjFunction> obj {
-    ObjFunction::Create("reg:squarederror_oneapi", &tparam)
-  };
-
-  obj->Configure(args);
-  CheckObjFunction(obj,
-                   {0, 0.1f, 0.9f,   1,    0,  0.1f, 0.9f,  1},
-                   {0,   0,   0,   0,    1,    1,    1, 1},
-                   {1,   1,   1,   1,    1,    1,    1, 1},
-                   {0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},
-                   {1,   1,   1,   1,    1,    1,    1, 1});
-  CheckObjFunction(obj,
-                   {0, 0.1f, 0.9f,   1,    0,  0.1f, 0.9f,  1},
-                   {0,   0,   0,   0,    1,    1,    1, 1},
-                   {},  // empty weight
-                   {0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},
-                   {1,   1,   1,   1,    1,    1,    1, 1});
-  ASSERT_NO_THROW(obj->DefaultEvalMetric());
-}
-
-TEST(Plugin, SquaredLogOneAPI) {
-  Context tparam = MakeCUDACtx(0);
-  std::vector<std::pair<std::string, std::string>> args;
-
-  std::unique_ptr<ObjFunction> obj { ObjFunction::Create("reg:squaredlogerror_oneapi", &tparam) };
-  obj->Configure(args);
-  CheckConfigReload(obj, "reg:squaredlogerror_oneapi");
-
-  CheckObjFunction(obj,
-                   {0.1f, 0.2f, 0.4f, 0.8f, 1.6f},  // pred
-                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},  // labels
-                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},  // weights
-                   {-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
-                   { 1.3205f,  1.0492f,  0.69215f,  0.34115f, 0.1091f});
-  CheckObjFunction(obj,
-                   {0.1f, 0.2f, 0.4f, 0.8f, 1.6f},  // pred
-                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},  // labels
-                   {},                              // empty weights
-                   {-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
-                   { 1.3205f,  1.0492f,  0.69215f,  0.34115f, 0.1091f});
-  ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"rmsle"});
-}
-
-TEST(Plugin, LogisticRegressionGPairOneAPI) {
-  Context tparam = MakeCUDACtx(0);
-  std::vector<std::pair<std::string, std::string>> args;
-  std::unique_ptr<ObjFunction> obj { ObjFunction::Create("reg:logistic_oneapi", &tparam) };
-
-  obj->Configure(args);
-  CheckConfigReload(obj, "reg:logistic_oneapi");
-
-  CheckObjFunction(obj,
-                   {   0,  0.1f,  0.9f,    1,    0,   0.1f,  0.9f,      1}, // preds
-                   {   0,    0,    0,    0,    1,     1,     1,     1}, // labels
-                   {   1,    1,    1,    1,    1,     1,     1,     1}, // weights
-                   { 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f}, // out_grad
-                   {0.25f, 0.24f, 0.20f, 0.19f, 0.25f,  0.24f,  0.20f,  0.19f}); // out_hess
-}
-
-TEST(Plugin, LogisticRegressionBasicOneAPI) {
-  Context lparam = MakeCUDACtx(0);
-  std::vector<std::pair<std::string, std::string>> args;
-  std::unique_ptr<ObjFunction> obj {
-    ObjFunction::Create("reg:logistic_oneapi", &lparam)
-  };
-
-  obj->Configure(args);
-  CheckConfigReload(obj, "reg:logistic_oneapi");
-
-  // test label validation
-  EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {10}, {1}, {0}, {0}))
-    << "Expected error when label not in range [0,1f] for LogisticRegression";
-
-  // test ProbToMargin
-  EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.197f, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.5f), 0, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.9f), 2.197f, 0.01f);
-  EXPECT_ANY_THROW(obj->ProbToMargin(10))
-    << "Expected error when base_score not in range [0,1f] for LogisticRegression";
-
-  // test PredTransform
-  HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
-  std::vector<bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
-  obj->PredTransform(&io_preds);
-  auto& preds = io_preds.HostVector();
-  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
-    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
-  }
-}
-
-TEST(Plugin, LogisticRawGPairOneAPI) {
-  Context lparam = MakeCUDACtx(0);
-  std::vector<std::pair<std::string, std::string>> args;
-  std::unique_ptr<ObjFunction>  obj {
-    ObjFunction::Create("binary:logitraw_oneapi", &lparam)
-  };
-
-  obj->Configure(args);
-
-  CheckObjFunction(obj,
-                   {   0,  0.1f,  0.9f,    1,    0,   0.1f,   0.9f,     1},
-                   {   0,    0,    0,    0,    1,     1,     1,     1},
-                   {   1,    1,    1,    1,    1,     1,     1,     1},
-                   { 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f},
-                   {0.25f, 0.24f, 0.20f, 0.19f, 0.25f,  0.24f,  0.20f,  0.19f});
-}
-
-TEST(Plugin, CPUvsOneAPI) {
-  Context ctx = MakeCUDACtx(0);
-
-  ObjFunction * obj_cpu =
-      ObjFunction::Create("reg:squarederror", &ctx);
-  ObjFunction * obj_oneapi =
-      ObjFunction::Create("reg:squarederror_oneapi", &ctx);
-  HostDeviceVector<GradientPair> cpu_out_preds;
-  HostDeviceVector<GradientPair> oneapi_out_preds;
-
-  constexpr size_t kRows = 400;
-  constexpr size_t kCols = 100;
-  auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
-  HostDeviceVector<float> preds;
-  preds.Resize(kRows);
-  auto& h_preds = preds.HostVector();
-  for (size_t i = 0; i < h_preds.size(); ++i) {
-    h_preds[i] = static_cast<float>(i);
-  }
-  auto& info = pdmat->Info();
-
-  info.labels.Reshape(kRows, 1);
-  auto& h_labels = info.labels.Data()->HostVector();
-  for (size_t i = 0; i < h_labels.size(); ++i) {
-    h_labels[i] = 1 / static_cast<float>(i+1);
-  }
-
-  {
-    // CPU
-    ctx = ctx.MakeCPU();
-    obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
-  }
-  {
-    // oneapi
-    ctx.gpu_id = 0;
-    obj_oneapi->GetGradient(preds, info, 0, &oneapi_out_preds);
-  }
-
-  auto& h_cpu_out = cpu_out_preds.HostVector();
-  auto& h_oneapi_out = oneapi_out_preds.HostVector();
-
-  float sgrad = 0;
-  float shess = 0;
-  for (size_t i = 0; i < kRows; ++i) {
-    sgrad += std::pow(h_cpu_out[i].GetGrad() - h_oneapi_out[i].GetGrad(), 2);
-    shess += std::pow(h_cpu_out[i].GetHess() - h_oneapi_out[i].GetHess(), 2);
-  }
-  ASSERT_NEAR(sgrad, 0.0f, kRtEps);
-  ASSERT_NEAR(shess, 0.0f, kRtEps);
-
-  delete obj_cpu;
-  delete obj_oneapi;
-}
-
-}  // namespace xgboost
--- a/tests/cpp/plugin/test_sycl_predictor.cc
+++ b/tests/cpp/plugin/test_sycl_predictor.cc
@@ -0,0 +1,101 @@
+/*!
+ * Copyright 2017-2023 XGBoost contributors
+ */
+#include <gtest/gtest.h>
+#include <xgboost/predictor.h>
+
+#include "../../../src/data/adapter.h"
+#include "../../../src/data/proxy_dmatrix.h"
+#include "../../../src/gbm/gbtree.h"
+#include "../../../src/gbm/gbtree_model.h"
+#include "../filesystem.h"  // dmlc::TemporaryDirectory
+#include "../helpers.h"
+#include "../predictor/test_predictor.h"
+
+namespace xgboost {
+
+TEST(SyclPredictor, Basic) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+
+  size_t constexpr kRows = 5;
+  size_t constexpr kCols = 5;
+  auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
+  TestBasic(dmat.get(), &ctx);
+}
+
+TEST(SyclPredictor, ExternalMemory) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+
+  size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
+  size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
+  std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
+  TestBasic(dmat.get(), &ctx);
+}
+
+TEST(SyclPredictor, InplacePredict) {
+  bst_row_t constexpr kRows{128};
+  bst_feature_t constexpr kCols{64};
+  Context ctx;
+  auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
+  {
+    HostDeviceVector<float> data;
+    gen.GenerateDense(&data);
+    ASSERT_EQ(data.Size(), kRows * kCols);
+    Context ctx;
+    ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+    std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
+    auto array_interface = GetArrayInterface(&data, kRows, kCols);
+    std::string arr_str;
+    Json::Dump(array_interface, &arr_str);
+    x->SetArrayData(arr_str.data());
+    TestInplacePrediction(&ctx, x, kRows, kCols);
+  }
+}
+
+TEST(SyclPredictor, IterationRange) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+  TestIterationRange(&ctx);
+}
+
+TEST(SyclPredictor, GHistIndexTraining) {
+  size_t constexpr kRows{128}, kCols{16}, kBins{64};
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+  auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateDMatrix(false);
+  HostDeviceVector<float> storage(kRows * kCols);
+  auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
+  auto adapter = data::ArrayAdapter(columnar.c_str());
+  std::shared_ptr<DMatrix> p_full{
+      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
+  TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
+}
+
+TEST(SyclPredictor, CategoricalPredictLeaf) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+  TestCategoricalPredictLeaf(&ctx, false);
+}
+
+TEST(SyclPredictor, LesserFeatures) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+  TestPredictionWithLesserFeatures(&ctx);
+}
+
+TEST(SyclPredictor, Sparse) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+  TestSparsePrediction(&ctx, 0.2);
+  TestSparsePrediction(&ctx, 0.8);
+}
+
+TEST(SyclPredictor, Multi) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+  TestVectorLeafPrediction(&ctx);
+}
+
+}  // namespace xgboost
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -18,92 +18,17 @@

 namespace xgboost {

-namespace {
-void TestBasic(DMatrix* dmat) {
-  Context ctx;
-  std::unique_ptr<Predictor> cpu_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &ctx));
-
-  size_t const kRows = dmat->Info().num_row_;
-  size_t const kCols = dmat->Info().num_col_;
-
-  LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
-
-  ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
-
-  // Test predict batch
-  PredictionCacheEntry out_predictions;
-  cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
-  cpu_predictor->PredictBatch(dmat, &out_predictions, model, 0);
-
-  std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
-  for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
-    ASSERT_EQ(out_predictions_h[i], 1.5);
-  }
-
-  // Test predict instance
-  auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
-  auto page = batch.GetView();
-  for (size_t i = 0; i < batch.Size(); i++) {
-    std::vector<float> instance_out_predictions;
-    cpu_predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
-                                   dmat->Info().IsColumnSplit());
-    ASSERT_EQ(instance_out_predictions[0], 1.5);
-  }
-
-  // Test predict leaf
-  HostDeviceVector<float> leaf_out_predictions;
-  cpu_predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
-  auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
-  for (auto v : h_leaf_out_predictions) {
-    ASSERT_EQ(v, 0);
-  }
-
-  if (dmat->Info().IsColumnSplit()) {
-    // Predict contribution is not supported for column split.
-    return;
-  }
-
-  // Test predict contribution
-  HostDeviceVector<float> out_contribution_hdv;
-  auto& out_contribution = out_contribution_hdv.HostVector();
-  cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model);
-  ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
-  for (size_t i = 0; i < out_contribution.size(); ++i) {
-    auto const& contri = out_contribution[i];
-    // shift 1 for bias, as test tree is a decision dump, only global bias is
-    // filled with LeafValue().
-    if ((i + 1) % (kCols + 1) == 0) {
-      ASSERT_EQ(out_contribution.back(), 1.5f);
-    } else {
-      ASSERT_EQ(contri, 0);
-    }
-  }
-  // Test predict contribution (approximate method)
-  cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
-  for (size_t i = 0; i < out_contribution.size(); ++i) {
-    auto const& contri = out_contribution[i];
-    // shift 1 for bias, as test tree is a decision dump, only global bias is
-    // filled with LeafValue().
-    if ((i + 1) % (kCols + 1) == 0) {
-      ASSERT_EQ(out_contribution.back(), 1.5f);
-    } else {
-      ASSERT_EQ(contri, 0);
-    }
-  }
-}
-}  // anonymous namespace
-
 TEST(CpuPredictor, Basic) {
+  Context ctx;
  size_t constexpr kRows = 5;
  size_t constexpr kCols = 5;
  auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
-  TestBasic(dmat.get());
+  TestBasic(dmat.get(), &ctx);
 }

 namespace {
 void TestColumnSplit() {
+  Context ctx;
  size_t constexpr kRows = 5;
  size_t constexpr kCols = 5;
  auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
@@ -112,7 +37,7 @@ void TestColumnSplit() {
  auto const rank = collective::GetRank();
  dmat = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};

-  TestBasic(dmat.get());
+  TestBasic(dmat.get(), &ctx);
 }
 }  // anonymous namespace

@@ -132,10 +57,11 @@ TEST(CpuPredictor, IterationRangeColmnSplit) {
 }

 TEST(CpuPredictor, ExternalMemory) {
+  Context ctx;
  size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
  size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
  std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
-  TestBasic(dmat.get());
+  TestBasic(dmat.get(), &ctx);
 }

 TEST(CpuPredictor, InplacePredict) {
@@ -235,12 +161,14 @@ TEST(CPUPredictor, CategoricalPredictionColumnSplit) {
 }

 TEST(CPUPredictor, CategoricalPredictLeaf) {
-  TestCategoricalPredictLeaf(false, false);
+  Context ctx;
+  TestCategoricalPredictLeaf(&ctx, false);
 }

 TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {
  auto constexpr kWorldSize = 2;
-  RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, false, true);
+  Context ctx;
+  RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, &ctx, true);
 }

 TEST(CpuPredictor, UpdatePredictionCache) {
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -289,11 +289,13 @@ TEST_F(MGPUPredictorTest, CategoricalPredictionColumnSplit) {
 }

 TEST(GPUPredictor, CategoricalPredictLeaf) {
-  TestCategoricalPredictLeaf(true, false);
+  auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
+  TestCategoricalPredictLeaf(&ctx, false);
 }

 TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {
-  RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, true, true);
+  auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
+  RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, &ctx, true);
 }

 TEST(GPUPredictor, PredictLeafBasic) {
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -26,6 +26,79 @@
 #include "xgboost/tree_model.h"                   // for RegTree

 namespace xgboost {
+
+void TestBasic(DMatrix* dmat, Context const *ctx) {
+  auto predictor = std::unique_ptr<Predictor>(CreatePredictorForTest(ctx));
+
+  size_t const kRows = dmat->Info().num_row_;
+  size_t const kCols = dmat->Info().num_col_;
+
+  LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
+
+  gbm::GBTreeModel model = CreateTestModel(&mparam, ctx);
+
+  // Test predict batch
+  PredictionCacheEntry out_predictions;
+  predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
+  predictor->PredictBatch(dmat, &out_predictions, model, 0);
+
+  std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
+  for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
+    ASSERT_EQ(out_predictions_h[i], 1.5);
+  }
+
+  // Test predict instance
+  auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
+  auto page = batch.GetView();
+  for (size_t i = 0; i < batch.Size(); i++) {
+    std::vector<float> instance_out_predictions;
+    predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
+                                   dmat->Info().IsColumnSplit());
+    ASSERT_EQ(instance_out_predictions[0], 1.5);
+  }
+
+  // Test predict leaf
+  HostDeviceVector<float> leaf_out_predictions;
+  predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
+  auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
+  for (auto v : h_leaf_out_predictions) {
+    ASSERT_EQ(v, 0);
+  }
+
+  if (dmat->Info().IsColumnSplit()) {
+    // Predict contribution is not supported for column split.
+    return;
+  }
+
+  // Test predict contribution
+  HostDeviceVector<float> out_contribution_hdv;
+  auto& out_contribution = out_contribution_hdv.HostVector();
+  predictor->PredictContribution(dmat, &out_contribution_hdv, model);
+  ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
+  for (size_t i = 0; i < out_contribution.size(); ++i) {
+    auto const& contri = out_contribution[i];
+    // shift 1 for bias, as test tree is a decision dump, only global bias is
+    // filled with LeafValue().
+    if ((i + 1) % (kCols + 1) == 0) {
+      ASSERT_EQ(out_contribution.back(), 1.5f);
+    } else {
+      ASSERT_EQ(contri, 0);
+    }
+  }
+  // Test predict contribution (approximate method)
+  predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
+  for (size_t i = 0; i < out_contribution.size(); ++i) {
+    auto const& contri = out_contribution[i];
+    // shift 1 for bias, as test tree is a decision dump, only global bias is
+    // filled with LeafValue().
+    if ((i + 1) % (kCols + 1) == 0) {
+      ASSERT_EQ(out_contribution.back(), 1.5f);
+    } else {
+      ASSERT_EQ(contri, 0);
+    }
+  }
+}
+
 TEST(Predictor, PredictionCache) {
  size_t constexpr kRows = 16, kCols = 4;

@@ -64,7 +137,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
                          {"num_feature", std::to_string(kCols)},
                          {"num_class", std::to_string(kClasses)},
                          {"max_bin", std::to_string(bins)},
-                          {"device", ctx->DeviceName()}});
+                          {"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
  learner->Configure();

  for (size_t i = 0; i < kIters; ++i) {
@@ -151,7 +224,7 @@ std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMat
                                        size_t iters, size_t forest = 1) {
  std::unique_ptr<Learner> learner{Learner::Create({dmat})};
  learner->SetParams(
-      Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
+      Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
  for (size_t i = 0; i < iters; ++i) {
    learner->UpdateOneIter(i, dmat);
  }
@@ -305,11 +378,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split) {
  ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
 }

-void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
-  Context ctx;
-  if (use_gpu) {
-    ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
-  }
+void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
  size_t constexpr kCols = 10;
  PredictionCacheEntry out_predictions;

@@ -320,10 +389,10 @@ void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
  float left_weight = 1.3f;
  float right_weight = 1.7f;

-  gbm::GBTreeModel model(&mparam, &ctx);
+  gbm::GBTreeModel model(&mparam, ctx);
  GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);

-  std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};
+  std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};

  std::vector<float> row(kCols);
  row[split_ind] = split_cat;
@@ -363,7 +432,6 @@ void TestIterationRange(Context const* ctx) {
  HostDeviceVector<float> out_predt_sliced;
  HostDeviceVector<float> out_predt_ranged;

-  // margin
  {
    sliced->Predict(dmat, true, &out_predt_sliced, 0, 0, false, false, false, false, false);
    learner->Predict(dmat, true, &out_predt_ranged, 0, lend, false, false, false, false, false);
@@ -519,6 +587,8 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {

  learner.reset(Learner::Create({Xy}));
  learner->LoadModel(model);
+  learner->SetParam("device", ctx->DeviceName());
+  learner->Configure();

  if (ctx->IsCUDA()) {
    learner->SetParam("tree_method", "gpu_hist");
--- a/tests/cpp/predictor/test_predictor.h
+++ b/tests/cpp/predictor/test_predictor.h
@@ -34,6 +34,8 @@ inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context
 inline auto CreatePredictorForTest(Context const* ctx) {
  if (ctx->IsCPU()) {
    return Predictor::Create("cpu_predictor", ctx);
+  } else if (ctx->IsSycl()) {
+    return Predictor::Create("sycl_predictor", ctx);
  } else {
    return Predictor::Create("gpu_predictor", ctx);
  }
@@ -83,6 +85,8 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
  }
 }

+void TestBasic(DMatrix* dmat, Context const * ctx);
+
 // p_full and p_hist should come from the same data set.
 void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
                            std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
@@ -98,7 +102,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split);

 void TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu);

-void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split);
+void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split);

 void TestIterationRange(Context const* ctx);