[SYC]. Implementation of HostDeviceVector (#10842)

This commit is contained in:
Dmitry Razdoburdin
2024-09-24 22:45:17 +02:00
committed by GitHub
parent bc69a3e877
commit 2179baa50c
25 changed files with 937 additions and 282 deletions

View File

@@ -4,8 +4,36 @@
#pragma once
#include "../helpers.h"
#include "../../plugin/sycl/device_manager.h"
#include "../../plugin/sycl/data.h"
namespace xgboost::sycl {
template<typename T, typename Fn>
void TransformOnDeviceData(DeviceOrd device, T* device_data, size_t n_data, Fn&& fn) {
sycl::DeviceManager device_manager;
::sycl::queue* qu = device_manager.GetQueue(device);
qu->submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(n_data), [=](::sycl::item<1> nid) {
const size_t i = nid.get_id(0);
device_data[i] = fn(device_data[i]);
});
}).wait();
}
template<typename T>
void VerifyOnDeviceData(DeviceOrd device, const T* device_data, const T* host_data, size_t n_data, T eps = T()) {
sycl::DeviceManager device_manager;
::sycl::queue* qu = device_manager.GetQueue(device);
std::vector<T> copy_device_data(n_data);
qu->memcpy(copy_device_data.data(), device_data, n_data * sizeof(T)).wait();
for (size_t i = 0; i < n_data; ++i) {
EXPECT_NEAR(copy_device_data[i], host_data[i], eps);
}
}
template<typename T, typename Container>
void VerifySyclVector(const USMVector<T, MemoryType::shared>& sycl_vector,
const Container& host_vector, T eps = T()) {

View File

@@ -40,10 +40,10 @@ void GHistBuilderTest(float sparsity, bool force_atomic_use) {
RowSetCollection row_set_collection;
auto& row_indices = row_set_collection.Data();
row_indices.Resize(&qu, num_rows);
row_indices.Resize(qu, num_rows);
size_t* p_row_indices = row_indices.Data();
qu.submit([&](::sycl::handler& cgh) {
qu->submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(num_rows),
[p_row_indices](::sycl::item<1> pid) {
const size_t idx = pid.get_id(0);
@@ -58,23 +58,23 @@ void GHistBuilderTest(float sparsity, bool force_atomic_use) {
{0.1f, 0.2f}, {0.3f, 0.4f}, {0.5f, 0.6f}, {0.7f, 0.8f},
{0.9f, 0.1f}, {0.2f, 0.3f}, {0.4f, 0.5f}, {0.6f, 0.7f}};
CHECK_EQ(gpair.size(), num_rows);
USMVector<GradientPair, MemoryType::on_device> gpair_device(&qu, gpair);
USMVector<GradientPair, MemoryType::on_device> gpair_device(qu, gpair);
std::vector<GradientSumT> hist_host(2*n_bins);
GHistRow<GradientSumT, MemoryType::on_device> hist(&qu, 2 * n_bins);
GHistRow<GradientSumT, MemoryType::on_device> hist(qu, 2 * n_bins);
::sycl::event event;
const size_t nblocks = 2;
GHistRow<GradientSumT, MemoryType::on_device> hist_buffer(&qu, 2 * nblocks * n_bins);
GHistRow<GradientSumT, MemoryType::on_device> hist_buffer(qu, 2 * nblocks * n_bins);
InitHist(qu, &hist, hist.Size(), &event);
InitHist(qu, &hist_buffer, hist_buffer.Size(), &event);
event = builder.BuildHist(gpair_device, row_set_collection[0], gmat_sycl, &hist,
sparsity < eps , &hist_buffer, event, force_atomic_use);
qu.memcpy(hist_host.data(), hist.Data(),
qu->memcpy(hist_host.data(), hist.Data(),
2 * n_bins * sizeof(GradientSumT), event);
qu.wait_and_throw();
qu->wait_and_throw();
// Build hist on host to compare
std::vector<GradientSumT> hist_desired(2*n_bins);
@@ -104,21 +104,21 @@ void GHistSubtractionTest() {
::sycl::event event;
std::vector<GradientSumT> hist1_host = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8};
GHistType hist1(&qu, 2 * n_bins);
event = qu.memcpy(hist1.Data(), hist1_host.data(),
2 * n_bins * sizeof(GradientSumT), event);
GHistType hist1(qu, 2 * n_bins);
event = qu->memcpy(hist1.Data(), hist1_host.data(),
2 * n_bins * sizeof(GradientSumT), event);
std::vector<GradientSumT> hist2_host = {0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1};
GHistType hist2(&qu, 2 * n_bins);
event = qu.memcpy(hist2.Data(), hist2_host.data(),
GHistType hist2(qu, 2 * n_bins);
event = qu->memcpy(hist2.Data(), hist2_host.data(),
2 * n_bins * sizeof(GradientSumT), event);
std::vector<GradientSumT> hist3_host(2 * n_bins);
GHistType hist3(&qu, 2 * n_bins);
GHistType hist3(qu, 2 * n_bins);
event = SubtractionHist(qu, &hist3, hist1, hist2, n_bins, event);
qu.memcpy(hist3_host.data(), hist3.Data(),
qu->memcpy(hist3_host.data(), hist3.Data(),
2 * n_bins * sizeof(GradientSumT), event);
qu.wait_and_throw();
qu->wait_and_throw();
std::vector<GradientSumT> hist3_desired(2 * n_bins);
for (size_t idx = 0; idx < 2 * n_bins; ++idx) {

View File

@@ -19,7 +19,7 @@ template <typename GradientSumT>
class TestHistUpdater : public HistUpdater<GradientSumT> {
public:
TestHistUpdater(const Context* ctx,
::sycl::queue qu,
::sycl::queue* qu,
const xgboost::tree::TrainParam& param,
FeatureInteractionConstraintHost int_constraints_,
DMatrix const* fmat) : HistUpdater<GradientSumT>(ctx, qu, param,
@@ -115,10 +115,10 @@ void TestHistUpdaterSampling(const xgboost::tree::TrainParam& param) {
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
USMVector<size_t, MemoryType::on_device> row_indices_0(&qu, num_rows);
USMVector<size_t, MemoryType::on_device> row_indices_1(&qu, num_rows);
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
GenerateRandomGPairs(&qu, gpair.Data(), num_rows, true);
USMVector<size_t, MemoryType::on_device> row_indices_0(qu, num_rows);
USMVector<size_t, MemoryType::on_device> row_indices_1(qu, num_rows);
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
GenerateRandomGPairs(qu, gpair.Data(), num_rows, true);
updater.TestInitSampling(gpair, &row_indices_0);
@@ -132,8 +132,8 @@ void TestHistUpdaterSampling(const xgboost::tree::TrainParam& param) {
if (row_indices_1.Size() == n_samples) {
std::vector<size_t> row_indices_0_host(n_samples);
std::vector<size_t> row_indices_1_host(n_samples);
qu.memcpy(row_indices_0_host.data(), row_indices_0.Data(), n_samples * sizeof(size_t)).wait();
qu.memcpy(row_indices_1_host.data(), row_indices_1.Data(), n_samples * sizeof(size_t)).wait();
qu->memcpy(row_indices_0_host.data(), row_indices_0.Data(), n_samples * sizeof(size_t)).wait();
qu->memcpy(row_indices_1_host.data(), row_indices_1.Data(), n_samples * sizeof(size_t)).wait();
// The order in row_indices_0 and row_indices_1 can be different
std::set<size_t> rows;
@@ -168,8 +168,8 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
GenerateRandomGPairs(&qu, gpair.Data(), num_rows, has_neg_hess);
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
GenerateRandomGPairs(qu, gpair.Data(), num_rows, has_neg_hess);
DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
@@ -181,7 +181,7 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
auto& row_indices = row_set_collection->Data();
std::vector<size_t> row_indices_host(row_indices.Size());
qu.memcpy(row_indices_host.data(), row_indices.DataConst(), row_indices.Size()*sizeof(size_t)).wait();
qu->memcpy(row_indices_host.data(), row_indices.DataConst(), row_indices.Size()*sizeof(size_t)).wait();
if (!has_neg_hess) {
for (size_t i = 0; i < num_rows; ++i) {
@@ -189,7 +189,7 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
}
} else {
std::vector<GradientPair> gpair_host(num_rows);
qu.memcpy(gpair_host.data(), gpair.Data(), num_rows*sizeof(GradientPair)).wait();
qu->memcpy(gpair_host.data(), gpair.Data(), num_rows*sizeof(GradientPair)).wait();
std::set<size_t> rows;
for (size_t i = 0; i < num_rows; ++i) {
@@ -224,9 +224,9 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
auto* gpair_ptr = gpair.Data();
GenerateRandomGPairs(&qu, gpair_ptr, num_rows, false);
GenerateRandomGPairs(qu, gpair_ptr, num_rows, false);
DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
@@ -255,10 +255,10 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa
std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist0_host(n_bins);
std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist1_host(n_bins);
std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist2_host(n_bins);
qu.memcpy(hist0_host.data(), (*hist)[0].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
qu.memcpy(hist1_host.data(), (*hist)[1].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
qu.memcpy(hist2_host.data(), (*hist)[2].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
qu.wait();
qu->memcpy(hist0_host.data(), (*hist)[0].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
qu->memcpy(hist1_host.data(), (*hist)[1].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
qu->memcpy(hist2_host.data(), (*hist)[2].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
qu->wait();
for (size_t idx_bin = 0; idx_bin < n_bins; ++idx_bin) {
EXPECT_NEAR(hist0_host[idx_bin].GetGrad(), hist1_host[idx_bin].GetGrad() + hist2_host[idx_bin].GetGrad(), 1e-6);
@@ -286,9 +286,9 @@ void TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sp
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
auto* gpair_ptr = gpair.Data();
GenerateRandomGPairs(&qu, gpair_ptr, num_rows, false);
GenerateRandomGPairs(qu, gpair_ptr, num_rows, false);
DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
@@ -308,7 +308,7 @@ void TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sp
GradStats<GradientSumT> grad_stat;
{
::sycl::buffer<GradStats<GradientSumT>> buff(&grad_stat, 1);
qu.submit([&](::sycl::handler& cgh) {
qu->submit([&](::sycl::handler& cgh) {
auto buff_acc = buff.template get_access<::sycl::access::mode::read_write>(cgh);
cgh.single_task<>([=]() {
for (size_t i = 0; i < num_rows; ++i) {
@@ -344,9 +344,9 @@ void TestHistUpdaterEvaluateSplits(const xgboost::tree::TrainParam& param) {
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
auto* gpair_ptr = gpair.Data();
GenerateRandomGPairs(&qu, gpair_ptr, num_rows, false);
GenerateRandomGPairs(qu, gpair_ptr, num_rows, false);
DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
@@ -378,7 +378,7 @@ void TestHistUpdaterEvaluateSplits(const xgboost::tree::TrainParam& param) {
std::vector<bst_float> best_loss_chg_des(1, -1);
{
::sycl::buffer<bst_float> best_loss_chg_buff(best_loss_chg_des.data(), 1);
qu.submit([&](::sycl::handler& cgh) {
qu->submit([&](::sycl::handler& cgh) {
auto best_loss_chg_acc = best_loss_chg_buff.template get_access<::sycl::access::mode::read_write>(cgh);
cgh.single_task<>([=]() {
for (size_t i = 1; i < size; ++i) {
@@ -426,15 +426,15 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
FeatureInteractionConstraintHost int_constraints;
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
GenerateRandomGPairs(&qu, gpair.Data(), num_rows, false);
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
GenerateRandomGPairs(qu, gpair.Data(), num_rows, false);
auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);
updater.TestApplySplit(nodes, gmat, &tree);
// Copy indexes to host
std::vector<size_t> row_indices_host(num_rows);
qu.memcpy(row_indices_host.data(), row_set_collection->Data().Data(), sizeof(size_t)*num_rows).wait();
qu->memcpy(row_indices_host.data(), row_set_collection->Data().Data(), sizeof(size_t)*num_rows).wait();
// Reference Implementation
std::vector<size_t> row_indices_desired_host(num_rows);
@@ -448,7 +448,7 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
xgboost::tree::CommonRowPartitioner::FindSplitConditions(nodes, tree, gmat, &split_conditions);
common::PartitionBuilder partition_builder;
partition_builder.Init(&qu, n_nodes, [&](size_t node_in_set) {
partition_builder.Init(qu, n_nodes, [&](size_t node_in_set) {
const int32_t nid = nodes[node_in_set].nid;
return (*row_set_collection4verification)[nid].Size();
});
@@ -456,14 +456,14 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
::sycl::event event;
partition_builder.Partition(gmat, nodes, (*row_set_collection4verification),
split_conditions, &tree, &event);
qu.wait_and_throw();
qu->wait_and_throw();
for (size_t node_in_set = 0; node_in_set < n_nodes; node_in_set++) {
const int32_t nid = nodes[node_in_set].nid;
size_t* data_result = const_cast<size_t*>((*row_set_collection4verification)[nid].begin);
partition_builder.MergeToArray(node_in_set, data_result, &event);
}
qu.wait_and_throw();
qu->wait_and_throw();
const int32_t nid = nodes[0].nid;
n_left = partition_builder.GetNLeftElems(0);
@@ -472,7 +472,7 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
row_set_collection4verification->AddSplit(nid, tree[nid].LeftChild(),
tree[nid].RightChild(), n_left, n_right);
qu.memcpy(row_indices_desired_host.data(), row_set_collection4verification->Data().Data(), sizeof(size_t)*num_rows).wait();
qu->memcpy(row_indices_desired_host.data(), row_set_collection4verification->Data().Data(), sizeof(size_t)*num_rows).wait();
}
std::sort(row_indices_desired_host.begin(), row_indices_desired_host.begin() + n_left);
@@ -506,7 +506,7 @@ void TestHistUpdaterExpandWithLossGuide(const xgboost::tree::TrainParam& param)
gmat.Init(qu, &ctx, dmat, n_bins);
std::vector<GradientPair> gpair_host = {{1, 2}, {3, 1}, {1, 1}};
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, gpair_host);
USMVector<GradientPair, MemoryType::on_device> gpair(qu, gpair_host);
RegTree tree;
FeatureInteractionConstraintHost int_constraints;
@@ -554,7 +554,7 @@ void TestHistUpdaterExpandWithDepthWise(const xgboost::tree::TrainParam& param)
gmat.Init(qu, &ctx, dmat, n_bins);
std::vector<GradientPair> gpair_host = {{1, 2}, {3, 1}, {1, 1}};
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, gpair_host);
USMVector<GradientPair, MemoryType::on_device> gpair(qu, gpair_host);
RegTree tree;
FeatureInteractionConstraintHost int_constraints;

View File

@@ -0,0 +1,250 @@
/**
* Copyright 2018-2024, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <numeric>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include <xgboost/host_device_vector.h>
#pragma GCC diagnostic pop
#include "sycl_helpers.h"
namespace xgboost::common {
namespace {
void InitHostDeviceVector(size_t n, DeviceOrd device, HostDeviceVector<int> *v) {
// create the vector
v->SetDevice(device);
v->Resize(n);
ASSERT_EQ(v->Size(), n);
ASSERT_EQ(v->Device(), device);
// ensure that the device have read-write access
ASSERT_TRUE(v->DeviceCanRead());
ASSERT_TRUE(v->DeviceCanWrite());
// ensure that the host has no access
ASSERT_FALSE(v->HostCanRead());
ASSERT_FALSE(v->HostCanWrite());
// fill in the data on the host
std::vector<int>& data_h = v->HostVector();
// ensure that the host has full access, while the device have none
ASSERT_TRUE(v->HostCanRead());
ASSERT_TRUE(v->HostCanWrite());
ASSERT_FALSE(v->DeviceCanRead());
ASSERT_FALSE(v->DeviceCanWrite());
ASSERT_EQ(data_h.size(), n);
std::iota(data_h.begin(), data_h.end(), 0);
}
void PlusOne(HostDeviceVector<int> *v) {
auto device = v->Device();
sycl::TransformOnDeviceData(v->Device(), v->DevicePointer(), v->Size(), [=](size_t a){ return a + 1; });
ASSERT_TRUE(v->DeviceCanWrite());
}
void CheckDevice(HostDeviceVector<int>* v,
size_t size,
unsigned int first,
GPUAccess access) {
ASSERT_EQ(v->Size(), size);
std::vector<int> desired_data(size);
std::iota(desired_data.begin(), desired_data.end(), first);
sycl::VerifyOnDeviceData(v->Device(), v->ConstDevicePointer(), desired_data.data(), size);
ASSERT_TRUE(v->DeviceCanRead());
// ensure that the device has at most the access specified by access
ASSERT_EQ(v->DeviceCanWrite(), access == GPUAccess::kWrite);
ASSERT_EQ(v->HostCanRead(), access == GPUAccess::kRead);
ASSERT_FALSE(v->HostCanWrite());
sycl::VerifyOnDeviceData(v->Device(), v->DevicePointer(), desired_data.data(), size);
ASSERT_TRUE(v->DeviceCanRead());
ASSERT_TRUE(v->DeviceCanWrite());
ASSERT_FALSE(v->HostCanRead());
ASSERT_FALSE(v->HostCanWrite());
}
void CheckHost(HostDeviceVector<int> *v, GPUAccess access) {
const std::vector<int>& data_h = access == GPUAccess::kNone ?
v->HostVector() : v->ConstHostVector();
for (size_t i = 0; i < v->Size(); ++i) {
ASSERT_EQ(data_h.at(i), i + 1);
}
ASSERT_TRUE(v->HostCanRead());
ASSERT_EQ(v->HostCanWrite(), access == GPUAccess::kNone);
ASSERT_EQ(v->DeviceCanRead(), access == GPUAccess::kRead);
// the devices should have no write access
ASSERT_FALSE(v->DeviceCanWrite());
}
void TestHostDeviceVector(size_t n, DeviceOrd device) {
HostDeviceVector<int> v;
InitHostDeviceVector(n, device, &v);
CheckDevice(&v, n, 0, GPUAccess::kRead);
PlusOne(&v);
CheckDevice(&v, n, 1, GPUAccess::kWrite);
CheckHost(&v, GPUAccess::kRead);
CheckHost(&v, GPUAccess::kNone);
}
TEST(SyclHostDeviceVector, Basic) {
size_t n = 1001;
DeviceOrd device = DeviceOrd::SyclDefault();
TestHostDeviceVector(n, device);
}
TEST(SyclHostDeviceVector, Copy) {
size_t n = 1001;
auto device = DeviceOrd::SyclDefault();
HostDeviceVector<int> v;
{
// a separate scope to ensure that v1 is gone before further checks
HostDeviceVector<int> v1;
InitHostDeviceVector(n, device, &v1);
v.Resize(v1.Size());
v.Copy(v1);
}
CheckDevice(&v, n, 0, GPUAccess::kRead);
PlusOne(&v);
CheckDevice(&v, n, 1, GPUAccess::kWrite);
CheckHost(&v, GPUAccess::kRead);
CheckHost(&v, GPUAccess::kNone);
}
TEST(SyclHostDeviceVector, Fill) {
size_t n = 1001;
auto device = DeviceOrd::SyclDefault();
int val = 42;
HostDeviceVector<int> v;
v.SetDevice(device);
v.Resize(n);
ASSERT_TRUE(v.DeviceCanWrite());
v.Fill(val);
ASSERT_FALSE(v.HostCanRead());
ASSERT_FALSE(v.HostCanWrite());
ASSERT_TRUE(v.DeviceCanRead());
ASSERT_TRUE(v.DeviceCanWrite());
std::vector<int> desired_data(n, val);
sycl::VerifyOnDeviceData(v.Device(), v.ConstDevicePointer(), desired_data.data(), n);
}
TEST(SyclHostDeviceVector, Extend) {
size_t n0 = 1001;
size_t n1 = 17;
auto device = DeviceOrd::SyclDefault();
int val = 42;
HostDeviceVector<int> v0;
v0.SetDevice(device);
v0.Resize(n0);
v0.Fill(val);
HostDeviceVector<int> v1;
v1.SetDevice(device);
v1.Resize(n1);
v1.Fill(val);
v0.Extend(v1);
{
std::vector<int> desired_data(n0+n1, val);
sycl::VerifyOnDeviceData(v0.Device(), v0.ConstDevicePointer(), desired_data.data(), n0+n1);
}
v1.Extend(v0);
{
std::vector<int> desired_data(n0+2*n1, val);
sycl::VerifyOnDeviceData(v1.Device(), v1.ConstDevicePointer(), desired_data.data(), n0+2*n1);
}
}
TEST(SyclHostDeviceVector, SetDevice) {
std::vector<int> h_vec (2345);
for (size_t i = 0; i < h_vec.size(); ++i) {
h_vec[i] = i;
}
HostDeviceVector<int> vec (h_vec);
auto device = DeviceOrd::SyclDefault();
vec.SetDevice(device);
ASSERT_EQ(vec.Size(), h_vec.size());
auto span = vec.DeviceSpan(); // sync to device
vec.SetDevice(DeviceOrd::CPU()); // pull back to cpu.
ASSERT_EQ(vec.Size(), h_vec.size());
ASSERT_EQ(vec.Device(), DeviceOrd::CPU());
auto h_vec_1 = vec.HostVector();
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
}
TEST(SyclHostDeviceVector, Span) {
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
vec.SetDevice(DeviceOrd::SyclDefault());
auto span = vec.DeviceSpan();
ASSERT_EQ(vec.Size(), span.size());
ASSERT_EQ(vec.DevicePointer(), span.data());
auto const_span = vec.ConstDeviceSpan();
ASSERT_EQ(vec.Size(), const_span.size());
ASSERT_EQ(vec.ConstDevicePointer(), const_span.data());
auto h_span = vec.ConstHostSpan();
ASSERT_TRUE(vec.HostCanRead());
ASSERT_FALSE(vec.HostCanWrite());
ASSERT_EQ(h_span.size(), vec.Size());
ASSERT_EQ(h_span.data(), vec.ConstHostPointer());
h_span = vec.HostSpan();
ASSERT_TRUE(vec.HostCanWrite());
}
TEST(SyclHostDeviceVector, Empty) {
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
HostDeviceVector<float> another { std::move(vec) };
ASSERT_FALSE(another.Empty());
ASSERT_TRUE(vec.Empty());
}
TEST(SyclHostDeviceVector, Resize) {
auto check = [&](HostDeviceVector<float> const& vec) {
auto const& h_vec = vec.ConstHostSpan();
for (std::size_t i = 0; i < 4; ++i) {
ASSERT_EQ(h_vec[i], i + 1);
}
for (std::size_t i = 4; i < vec.Size(); ++i) {
ASSERT_EQ(h_vec[i], 3.0);
}
};
{
HostDeviceVector<float> vec{1.0f, 2.0f, 3.0f, 4.0f};
vec.SetDevice(DeviceOrd::SyclDefault());
vec.ConstDeviceSpan();
ASSERT_TRUE(vec.DeviceCanRead());
ASSERT_FALSE(vec.DeviceCanWrite());
vec.DeviceSpan();
vec.Resize(7, 3.0f);
ASSERT_TRUE(vec.DeviceCanWrite());
check(vec);
}
{
HostDeviceVector<float> vec{{1.0f, 2.0f, 3.0f, 4.0f}, DeviceOrd::SyclDefault()};
ASSERT_TRUE(vec.DeviceCanWrite());
vec.Resize(7, 3.0f);
ASSERT_TRUE(vec.DeviceCanWrite());
check(vec);
}
{
HostDeviceVector<float> vec{1.0f, 2.0f, 3.0f, 4.0f};
ASSERT_TRUE(vec.HostCanWrite());
vec.Resize(7, 3.0f);
ASSERT_TRUE(vec.HostCanWrite());
check(vec);
}
}
}
} // namespace xgboost::common

View File

@@ -32,10 +32,10 @@ void TestPartitioning(float sparsity, int max_bins) {
RowSetCollection row_set_collection;
auto& row_indices = row_set_collection.Data();
row_indices.Resize(&qu, num_rows);
row_indices.Resize(qu, num_rows);
size_t* p_row_indices = row_indices.Data();
qu.submit([&](::sycl::handler& cgh) {
qu->submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(num_rows),
[p_row_indices](::sycl::item<1> pid) {
const size_t idx = pid.get_id(0);
@@ -49,7 +49,7 @@ void TestPartitioning(float sparsity, int max_bins) {
const size_t n_nodes = row_set_collection.Size();
PartitionBuilder partition_builder;
partition_builder.Init(&qu, n_nodes, [&](size_t nid) {
partition_builder.Init(qu, n_nodes, [&](size_t nid) {
return row_set_collection[nid].Size();
});
@@ -60,11 +60,11 @@ void TestPartitioning(float sparsity, int max_bins) {
std::vector<int32_t> split_conditions = {2};
partition_builder.Partition(gmat, nodes, row_set_collection,
split_conditions, &tree, &event);
qu.wait_and_throw();
qu->wait_and_throw();
size_t* data_result = const_cast<size_t*>(row_set_collection[0].begin);
partition_builder.MergeToArray(0, data_result, &event);
qu.wait_and_throw();
qu->wait_and_throw();
bst_float split_pt = gmat.cut.Values()[split_conditions[0]];
@@ -99,8 +99,8 @@ void TestPartitioning(float sparsity, int max_bins) {
auto n_right = std::accumulate(ridx_right.begin(), ridx_right.end(), 0);
std::vector<size_t> row_indices_host(num_rows);
qu.memcpy(row_indices_host.data(), row_indices.Data(), num_rows * sizeof(size_t));
qu.wait_and_throw();
qu->memcpy(row_indices_host.data(), row_indices.Data(), num_rows * sizeof(size_t));
qu->wait_and_throw();
ASSERT_EQ(n_left, partition_builder.GetNLeftElems(0));
for (size_t i = 0; i < n_left; ++i) {
@@ -123,7 +123,7 @@ TEST(SyclPartitionBuilder, BasicTest) {
DeviceManager device_manager;
auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());
PartitionBuilder builder;
builder.Init(&qu, kNodes, [&](size_t i) {
builder.Init(qu, kNodes, [&](size_t i) {
return rows[i];
});
@@ -142,23 +142,23 @@ TEST(SyclPartitionBuilder, BasicTest) {
size_t n_left = rows_for_left_node[nid];
size_t n_right = rows[nid] - n_left;
qu.submit([&](::sycl::handler& cgh) {
qu->submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(n_left), [=](::sycl::id<1> pid) {
int row_id = first_row_id + pid[0];
rid_buff_ptr[pid[0]] = row_id;
});
});
qu.wait();
qu->wait();
first_row_id += n_left;
// We are storing indexes for the right side in the tail of the array to save some memory
qu.submit([&](::sycl::handler& cgh) {
qu->submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(n_right), [=](::sycl::id<1> pid) {
int row_id = first_row_id + pid[0];
rid_buff_ptr[rid_buff_size - pid[0] - 1] = row_id;
});
});
qu.wait();
qu->wait();
first_row_id += n_right;
builder.SetNLeftElems(nid, n_left);
@@ -170,7 +170,7 @@ TEST(SyclPartitionBuilder, BasicTest) {
size_t row_id = 0;
for(size_t nid = 0; nid < kNodes; ++nid) {
builder.MergeToArray(nid, v.data(), &event);
qu.wait();
qu->wait();
// Check that row_id for left side are correct
for(size_t j = 0; j < rows_for_left_node[nid]; ++j) {

View File

@@ -46,14 +46,15 @@ TEST(SyclObjective, LogisticRawGPair) {
}
TEST(SyclObjective, CPUvsSycl) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
Context ctx_sycl;
ctx_sycl.UpdateAllowUnknown(Args{{"device", "sycl"}});
ObjFunction * obj_sycl =
ObjFunction::Create("reg:squarederror_sycl", &ctx);
ObjFunction::Create("reg:squarederror_sycl", &ctx_sycl);
ctx = ctx.MakeCPU();
Context ctx_cpu;
ctx_cpu.UpdateAllowUnknown(Args{{"device", "cpu"}});
ObjFunction * obj_cpu =
ObjFunction::Create("reg:squarederror", &ctx);
ObjFunction::Create("reg:squarederror", &ctx_cpu);
linalg::Matrix<GradientPair> cpu_out_preds;
linalg::Matrix<GradientPair> sycl_out_preds;

View File

@@ -21,10 +21,10 @@ TEST(SyclRowSetCollection, AddSplits) {
RowSetCollection row_set_collection;
auto& row_indices = row_set_collection.Data();
row_indices.Resize(&qu, num_rows);
row_indices.Resize(qu, num_rows);
size_t* p_row_indices = row_indices.Data();
qu.submit([&](::sycl::handler& cgh) {
qu->submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(num_rows),
[p_row_indices](::sycl::item<1> pid) {
const size_t idx = pid.get_id(0);