[SYC]. Implementation of HostDeviceVector (#10842)
This commit is contained in:
committed by
GitHub
parent
bc69a3e877
commit
2179baa50c
@@ -4,8 +4,36 @@
|
||||
#pragma once
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../../plugin/sycl/device_manager.h"
|
||||
#include "../../plugin/sycl/data.h"
|
||||
|
||||
namespace xgboost::sycl {
|
||||
|
||||
template<typename T, typename Fn>
|
||||
void TransformOnDeviceData(DeviceOrd device, T* device_data, size_t n_data, Fn&& fn) {
|
||||
sycl::DeviceManager device_manager;
|
||||
::sycl::queue* qu = device_manager.GetQueue(device);
|
||||
|
||||
qu->submit([&](::sycl::handler& cgh) {
|
||||
cgh.parallel_for<>(::sycl::range<1>(n_data), [=](::sycl::item<1> nid) {
|
||||
const size_t i = nid.get_id(0);
|
||||
device_data[i] = fn(device_data[i]);
|
||||
});
|
||||
}).wait();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void VerifyOnDeviceData(DeviceOrd device, const T* device_data, const T* host_data, size_t n_data, T eps = T()) {
|
||||
sycl::DeviceManager device_manager;
|
||||
::sycl::queue* qu = device_manager.GetQueue(device);
|
||||
|
||||
std::vector<T> copy_device_data(n_data);
|
||||
qu->memcpy(copy_device_data.data(), device_data, n_data * sizeof(T)).wait();
|
||||
for (size_t i = 0; i < n_data; ++i) {
|
||||
EXPECT_NEAR(copy_device_data[i], host_data[i], eps);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename Container>
|
||||
void VerifySyclVector(const USMVector<T, MemoryType::shared>& sycl_vector,
|
||||
const Container& host_vector, T eps = T()) {
|
||||
|
||||
@@ -40,10 +40,10 @@ void GHistBuilderTest(float sparsity, bool force_atomic_use) {
|
||||
|
||||
RowSetCollection row_set_collection;
|
||||
auto& row_indices = row_set_collection.Data();
|
||||
row_indices.Resize(&qu, num_rows);
|
||||
row_indices.Resize(qu, num_rows);
|
||||
size_t* p_row_indices = row_indices.Data();
|
||||
|
||||
qu.submit([&](::sycl::handler& cgh) {
|
||||
qu->submit([&](::sycl::handler& cgh) {
|
||||
cgh.parallel_for<>(::sycl::range<1>(num_rows),
|
||||
[p_row_indices](::sycl::item<1> pid) {
|
||||
const size_t idx = pid.get_id(0);
|
||||
@@ -58,23 +58,23 @@ void GHistBuilderTest(float sparsity, bool force_atomic_use) {
|
||||
{0.1f, 0.2f}, {0.3f, 0.4f}, {0.5f, 0.6f}, {0.7f, 0.8f},
|
||||
{0.9f, 0.1f}, {0.2f, 0.3f}, {0.4f, 0.5f}, {0.6f, 0.7f}};
|
||||
CHECK_EQ(gpair.size(), num_rows);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair_device(&qu, gpair);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair_device(qu, gpair);
|
||||
|
||||
std::vector<GradientSumT> hist_host(2*n_bins);
|
||||
GHistRow<GradientSumT, MemoryType::on_device> hist(&qu, 2 * n_bins);
|
||||
GHistRow<GradientSumT, MemoryType::on_device> hist(qu, 2 * n_bins);
|
||||
::sycl::event event;
|
||||
|
||||
const size_t nblocks = 2;
|
||||
GHistRow<GradientSumT, MemoryType::on_device> hist_buffer(&qu, 2 * nblocks * n_bins);
|
||||
GHistRow<GradientSumT, MemoryType::on_device> hist_buffer(qu, 2 * nblocks * n_bins);
|
||||
|
||||
InitHist(qu, &hist, hist.Size(), &event);
|
||||
InitHist(qu, &hist_buffer, hist_buffer.Size(), &event);
|
||||
|
||||
event = builder.BuildHist(gpair_device, row_set_collection[0], gmat_sycl, &hist,
|
||||
sparsity < eps , &hist_buffer, event, force_atomic_use);
|
||||
qu.memcpy(hist_host.data(), hist.Data(),
|
||||
qu->memcpy(hist_host.data(), hist.Data(),
|
||||
2 * n_bins * sizeof(GradientSumT), event);
|
||||
qu.wait_and_throw();
|
||||
qu->wait_and_throw();
|
||||
|
||||
// Build hist on host to compare
|
||||
std::vector<GradientSumT> hist_desired(2*n_bins);
|
||||
@@ -104,21 +104,21 @@ void GHistSubtractionTest() {
|
||||
|
||||
::sycl::event event;
|
||||
std::vector<GradientSumT> hist1_host = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8};
|
||||
GHistType hist1(&qu, 2 * n_bins);
|
||||
event = qu.memcpy(hist1.Data(), hist1_host.data(),
|
||||
2 * n_bins * sizeof(GradientSumT), event);
|
||||
GHistType hist1(qu, 2 * n_bins);
|
||||
event = qu->memcpy(hist1.Data(), hist1_host.data(),
|
||||
2 * n_bins * sizeof(GradientSumT), event);
|
||||
|
||||
std::vector<GradientSumT> hist2_host = {0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1};
|
||||
GHistType hist2(&qu, 2 * n_bins);
|
||||
event = qu.memcpy(hist2.Data(), hist2_host.data(),
|
||||
GHistType hist2(qu, 2 * n_bins);
|
||||
event = qu->memcpy(hist2.Data(), hist2_host.data(),
|
||||
2 * n_bins * sizeof(GradientSumT), event);
|
||||
|
||||
std::vector<GradientSumT> hist3_host(2 * n_bins);
|
||||
GHistType hist3(&qu, 2 * n_bins);
|
||||
GHistType hist3(qu, 2 * n_bins);
|
||||
event = SubtractionHist(qu, &hist3, hist1, hist2, n_bins, event);
|
||||
qu.memcpy(hist3_host.data(), hist3.Data(),
|
||||
qu->memcpy(hist3_host.data(), hist3.Data(),
|
||||
2 * n_bins * sizeof(GradientSumT), event);
|
||||
qu.wait_and_throw();
|
||||
qu->wait_and_throw();
|
||||
|
||||
std::vector<GradientSumT> hist3_desired(2 * n_bins);
|
||||
for (size_t idx = 0; idx < 2 * n_bins; ++idx) {
|
||||
|
||||
@@ -19,7 +19,7 @@ template <typename GradientSumT>
|
||||
class TestHistUpdater : public HistUpdater<GradientSumT> {
|
||||
public:
|
||||
TestHistUpdater(const Context* ctx,
|
||||
::sycl::queue qu,
|
||||
::sycl::queue* qu,
|
||||
const xgboost::tree::TrainParam& param,
|
||||
FeatureInteractionConstraintHost int_constraints_,
|
||||
DMatrix const* fmat) : HistUpdater<GradientSumT>(ctx, qu, param,
|
||||
@@ -115,10 +115,10 @@ void TestHistUpdaterSampling(const xgboost::tree::TrainParam& param) {
|
||||
|
||||
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
|
||||
|
||||
USMVector<size_t, MemoryType::on_device> row_indices_0(&qu, num_rows);
|
||||
USMVector<size_t, MemoryType::on_device> row_indices_1(&qu, num_rows);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
|
||||
GenerateRandomGPairs(&qu, gpair.Data(), num_rows, true);
|
||||
USMVector<size_t, MemoryType::on_device> row_indices_0(qu, num_rows);
|
||||
USMVector<size_t, MemoryType::on_device> row_indices_1(qu, num_rows);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
|
||||
GenerateRandomGPairs(qu, gpair.Data(), num_rows, true);
|
||||
|
||||
updater.TestInitSampling(gpair, &row_indices_0);
|
||||
|
||||
@@ -132,8 +132,8 @@ void TestHistUpdaterSampling(const xgboost::tree::TrainParam& param) {
|
||||
if (row_indices_1.Size() == n_samples) {
|
||||
std::vector<size_t> row_indices_0_host(n_samples);
|
||||
std::vector<size_t> row_indices_1_host(n_samples);
|
||||
qu.memcpy(row_indices_0_host.data(), row_indices_0.Data(), n_samples * sizeof(size_t)).wait();
|
||||
qu.memcpy(row_indices_1_host.data(), row_indices_1.Data(), n_samples * sizeof(size_t)).wait();
|
||||
qu->memcpy(row_indices_0_host.data(), row_indices_0.Data(), n_samples * sizeof(size_t)).wait();
|
||||
qu->memcpy(row_indices_1_host.data(), row_indices_1.Data(), n_samples * sizeof(size_t)).wait();
|
||||
|
||||
// The order in row_indices_0 and row_indices_1 can be different
|
||||
std::set<size_t> rows;
|
||||
@@ -168,8 +168,8 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
|
||||
|
||||
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
|
||||
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
|
||||
GenerateRandomGPairs(&qu, gpair.Data(), num_rows, has_neg_hess);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
|
||||
GenerateRandomGPairs(qu, gpair.Data(), num_rows, has_neg_hess);
|
||||
|
||||
DeviceMatrix dmat;
|
||||
dmat.Init(qu, p_fmat.get());
|
||||
@@ -181,7 +181,7 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
|
||||
auto& row_indices = row_set_collection->Data();
|
||||
|
||||
std::vector<size_t> row_indices_host(row_indices.Size());
|
||||
qu.memcpy(row_indices_host.data(), row_indices.DataConst(), row_indices.Size()*sizeof(size_t)).wait();
|
||||
qu->memcpy(row_indices_host.data(), row_indices.DataConst(), row_indices.Size()*sizeof(size_t)).wait();
|
||||
|
||||
if (!has_neg_hess) {
|
||||
for (size_t i = 0; i < num_rows; ++i) {
|
||||
@@ -189,7 +189,7 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
|
||||
}
|
||||
} else {
|
||||
std::vector<GradientPair> gpair_host(num_rows);
|
||||
qu.memcpy(gpair_host.data(), gpair.Data(), num_rows*sizeof(GradientPair)).wait();
|
||||
qu->memcpy(gpair_host.data(), gpair.Data(), num_rows*sizeof(GradientPair)).wait();
|
||||
|
||||
std::set<size_t> rows;
|
||||
for (size_t i = 0; i < num_rows; ++i) {
|
||||
@@ -224,9 +224,9 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa
|
||||
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
|
||||
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
|
||||
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
|
||||
auto* gpair_ptr = gpair.Data();
|
||||
GenerateRandomGPairs(&qu, gpair_ptr, num_rows, false);
|
||||
GenerateRandomGPairs(qu, gpair_ptr, num_rows, false);
|
||||
|
||||
DeviceMatrix dmat;
|
||||
dmat.Init(qu, p_fmat.get());
|
||||
@@ -255,10 +255,10 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa
|
||||
std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist0_host(n_bins);
|
||||
std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist1_host(n_bins);
|
||||
std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist2_host(n_bins);
|
||||
qu.memcpy(hist0_host.data(), (*hist)[0].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
|
||||
qu.memcpy(hist1_host.data(), (*hist)[1].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
|
||||
qu.memcpy(hist2_host.data(), (*hist)[2].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
|
||||
qu.wait();
|
||||
qu->memcpy(hist0_host.data(), (*hist)[0].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
|
||||
qu->memcpy(hist1_host.data(), (*hist)[1].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
|
||||
qu->memcpy(hist2_host.data(), (*hist)[2].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
|
||||
qu->wait();
|
||||
|
||||
for (size_t idx_bin = 0; idx_bin < n_bins; ++idx_bin) {
|
||||
EXPECT_NEAR(hist0_host[idx_bin].GetGrad(), hist1_host[idx_bin].GetGrad() + hist2_host[idx_bin].GetGrad(), 1e-6);
|
||||
@@ -286,9 +286,9 @@ void TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sp
|
||||
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
|
||||
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
|
||||
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
|
||||
auto* gpair_ptr = gpair.Data();
|
||||
GenerateRandomGPairs(&qu, gpair_ptr, num_rows, false);
|
||||
GenerateRandomGPairs(qu, gpair_ptr, num_rows, false);
|
||||
|
||||
DeviceMatrix dmat;
|
||||
dmat.Init(qu, p_fmat.get());
|
||||
@@ -308,7 +308,7 @@ void TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sp
|
||||
GradStats<GradientSumT> grad_stat;
|
||||
{
|
||||
::sycl::buffer<GradStats<GradientSumT>> buff(&grad_stat, 1);
|
||||
qu.submit([&](::sycl::handler& cgh) {
|
||||
qu->submit([&](::sycl::handler& cgh) {
|
||||
auto buff_acc = buff.template get_access<::sycl::access::mode::read_write>(cgh);
|
||||
cgh.single_task<>([=]() {
|
||||
for (size_t i = 0; i < num_rows; ++i) {
|
||||
@@ -344,9 +344,9 @@ void TestHistUpdaterEvaluateSplits(const xgboost::tree::TrainParam& param) {
|
||||
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
|
||||
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
|
||||
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
|
||||
auto* gpair_ptr = gpair.Data();
|
||||
GenerateRandomGPairs(&qu, gpair_ptr, num_rows, false);
|
||||
GenerateRandomGPairs(qu, gpair_ptr, num_rows, false);
|
||||
|
||||
DeviceMatrix dmat;
|
||||
dmat.Init(qu, p_fmat.get());
|
||||
@@ -378,7 +378,7 @@ void TestHistUpdaterEvaluateSplits(const xgboost::tree::TrainParam& param) {
|
||||
std::vector<bst_float> best_loss_chg_des(1, -1);
|
||||
{
|
||||
::sycl::buffer<bst_float> best_loss_chg_buff(best_loss_chg_des.data(), 1);
|
||||
qu.submit([&](::sycl::handler& cgh) {
|
||||
qu->submit([&](::sycl::handler& cgh) {
|
||||
auto best_loss_chg_acc = best_loss_chg_buff.template get_access<::sycl::access::mode::read_write>(cgh);
|
||||
cgh.single_task<>([=]() {
|
||||
for (size_t i = 1; i < size; ++i) {
|
||||
@@ -426,15 +426,15 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
|
||||
|
||||
FeatureInteractionConstraintHost int_constraints;
|
||||
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
|
||||
GenerateRandomGPairs(&qu, gpair.Data(), num_rows, false);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(qu, num_rows);
|
||||
GenerateRandomGPairs(qu, gpair.Data(), num_rows, false);
|
||||
|
||||
auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);
|
||||
updater.TestApplySplit(nodes, gmat, &tree);
|
||||
|
||||
// Copy indexes to host
|
||||
std::vector<size_t> row_indices_host(num_rows);
|
||||
qu.memcpy(row_indices_host.data(), row_set_collection->Data().Data(), sizeof(size_t)*num_rows).wait();
|
||||
qu->memcpy(row_indices_host.data(), row_set_collection->Data().Data(), sizeof(size_t)*num_rows).wait();
|
||||
|
||||
// Reference Implementation
|
||||
std::vector<size_t> row_indices_desired_host(num_rows);
|
||||
@@ -448,7 +448,7 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
|
||||
xgboost::tree::CommonRowPartitioner::FindSplitConditions(nodes, tree, gmat, &split_conditions);
|
||||
|
||||
common::PartitionBuilder partition_builder;
|
||||
partition_builder.Init(&qu, n_nodes, [&](size_t node_in_set) {
|
||||
partition_builder.Init(qu, n_nodes, [&](size_t node_in_set) {
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
return (*row_set_collection4verification)[nid].Size();
|
||||
});
|
||||
@@ -456,14 +456,14 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
|
||||
::sycl::event event;
|
||||
partition_builder.Partition(gmat, nodes, (*row_set_collection4verification),
|
||||
split_conditions, &tree, &event);
|
||||
qu.wait_and_throw();
|
||||
qu->wait_and_throw();
|
||||
|
||||
for (size_t node_in_set = 0; node_in_set < n_nodes; node_in_set++) {
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
size_t* data_result = const_cast<size_t*>((*row_set_collection4verification)[nid].begin);
|
||||
partition_builder.MergeToArray(node_in_set, data_result, &event);
|
||||
}
|
||||
qu.wait_and_throw();
|
||||
qu->wait_and_throw();
|
||||
|
||||
const int32_t nid = nodes[0].nid;
|
||||
n_left = partition_builder.GetNLeftElems(0);
|
||||
@@ -472,7 +472,7 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
|
||||
row_set_collection4verification->AddSplit(nid, tree[nid].LeftChild(),
|
||||
tree[nid].RightChild(), n_left, n_right);
|
||||
|
||||
qu.memcpy(row_indices_desired_host.data(), row_set_collection4verification->Data().Data(), sizeof(size_t)*num_rows).wait();
|
||||
qu->memcpy(row_indices_desired_host.data(), row_set_collection4verification->Data().Data(), sizeof(size_t)*num_rows).wait();
|
||||
}
|
||||
|
||||
std::sort(row_indices_desired_host.begin(), row_indices_desired_host.begin() + n_left);
|
||||
@@ -506,7 +506,7 @@ void TestHistUpdaterExpandWithLossGuide(const xgboost::tree::TrainParam& param)
|
||||
gmat.Init(qu, &ctx, dmat, n_bins);
|
||||
|
||||
std::vector<GradientPair> gpair_host = {{1, 2}, {3, 1}, {1, 1}};
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, gpair_host);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(qu, gpair_host);
|
||||
|
||||
RegTree tree;
|
||||
FeatureInteractionConstraintHost int_constraints;
|
||||
@@ -554,7 +554,7 @@ void TestHistUpdaterExpandWithDepthWise(const xgboost::tree::TrainParam& param)
|
||||
gmat.Init(qu, &ctx, dmat, n_bins);
|
||||
|
||||
std::vector<GradientPair> gpair_host = {{1, 2}, {3, 1}, {1, 1}};
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, gpair_host);
|
||||
USMVector<GradientPair, MemoryType::on_device> gpair(qu, gpair_host);
|
||||
|
||||
RegTree tree;
|
||||
FeatureInteractionConstraintHost int_constraints;
|
||||
|
||||
250
tests/cpp/plugin/test_sycl_host_device_vector.cc
Normal file
250
tests/cpp/plugin/test_sycl_host_device_vector.cc
Normal file
@@ -0,0 +1,250 @@
|
||||
/**
|
||||
* Copyright 2018-2024, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <numeric>
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-W#pragma-messages"
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#include "sycl_helpers.h"
|
||||
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
|
||||
void InitHostDeviceVector(size_t n, DeviceOrd device, HostDeviceVector<int> *v) {
|
||||
// create the vector
|
||||
v->SetDevice(device);
|
||||
v->Resize(n);
|
||||
|
||||
ASSERT_EQ(v->Size(), n);
|
||||
ASSERT_EQ(v->Device(), device);
|
||||
// ensure that the device have read-write access
|
||||
ASSERT_TRUE(v->DeviceCanRead());
|
||||
ASSERT_TRUE(v->DeviceCanWrite());
|
||||
// ensure that the host has no access
|
||||
ASSERT_FALSE(v->HostCanRead());
|
||||
ASSERT_FALSE(v->HostCanWrite());
|
||||
|
||||
// fill in the data on the host
|
||||
std::vector<int>& data_h = v->HostVector();
|
||||
// ensure that the host has full access, while the device have none
|
||||
ASSERT_TRUE(v->HostCanRead());
|
||||
ASSERT_TRUE(v->HostCanWrite());
|
||||
ASSERT_FALSE(v->DeviceCanRead());
|
||||
ASSERT_FALSE(v->DeviceCanWrite());
|
||||
ASSERT_EQ(data_h.size(), n);
|
||||
std::iota(data_h.begin(), data_h.end(), 0);
|
||||
}
|
||||
|
||||
void PlusOne(HostDeviceVector<int> *v) {
|
||||
auto device = v->Device();
|
||||
sycl::TransformOnDeviceData(v->Device(), v->DevicePointer(), v->Size(), [=](size_t a){ return a + 1; });
|
||||
ASSERT_TRUE(v->DeviceCanWrite());
|
||||
}
|
||||
|
||||
void CheckDevice(HostDeviceVector<int>* v,
|
||||
size_t size,
|
||||
unsigned int first,
|
||||
GPUAccess access) {
|
||||
ASSERT_EQ(v->Size(), size);
|
||||
|
||||
std::vector<int> desired_data(size);
|
||||
std::iota(desired_data.begin(), desired_data.end(), first);
|
||||
sycl::VerifyOnDeviceData(v->Device(), v->ConstDevicePointer(), desired_data.data(), size);
|
||||
ASSERT_TRUE(v->DeviceCanRead());
|
||||
// ensure that the device has at most the access specified by access
|
||||
ASSERT_EQ(v->DeviceCanWrite(), access == GPUAccess::kWrite);
|
||||
ASSERT_EQ(v->HostCanRead(), access == GPUAccess::kRead);
|
||||
ASSERT_FALSE(v->HostCanWrite());
|
||||
|
||||
sycl::VerifyOnDeviceData(v->Device(), v->DevicePointer(), desired_data.data(), size);
|
||||
ASSERT_TRUE(v->DeviceCanRead());
|
||||
ASSERT_TRUE(v->DeviceCanWrite());
|
||||
ASSERT_FALSE(v->HostCanRead());
|
||||
ASSERT_FALSE(v->HostCanWrite());
|
||||
}
|
||||
|
||||
void CheckHost(HostDeviceVector<int> *v, GPUAccess access) {
|
||||
const std::vector<int>& data_h = access == GPUAccess::kNone ?
|
||||
v->HostVector() : v->ConstHostVector();
|
||||
for (size_t i = 0; i < v->Size(); ++i) {
|
||||
ASSERT_EQ(data_h.at(i), i + 1);
|
||||
}
|
||||
ASSERT_TRUE(v->HostCanRead());
|
||||
ASSERT_EQ(v->HostCanWrite(), access == GPUAccess::kNone);
|
||||
ASSERT_EQ(v->DeviceCanRead(), access == GPUAccess::kRead);
|
||||
// the devices should have no write access
|
||||
ASSERT_FALSE(v->DeviceCanWrite());
|
||||
}
|
||||
|
||||
void TestHostDeviceVector(size_t n, DeviceOrd device) {
|
||||
HostDeviceVector<int> v;
|
||||
InitHostDeviceVector(n, device, &v);
|
||||
CheckDevice(&v, n, 0, GPUAccess::kRead);
|
||||
PlusOne(&v);
|
||||
CheckDevice(&v, n, 1, GPUAccess::kWrite);
|
||||
CheckHost(&v, GPUAccess::kRead);
|
||||
CheckHost(&v, GPUAccess::kNone);
|
||||
}
|
||||
|
||||
TEST(SyclHostDeviceVector, Basic) {
|
||||
size_t n = 1001;
|
||||
DeviceOrd device = DeviceOrd::SyclDefault();
|
||||
TestHostDeviceVector(n, device);
|
||||
}
|
||||
|
||||
TEST(SyclHostDeviceVector, Copy) {
|
||||
size_t n = 1001;
|
||||
auto device = DeviceOrd::SyclDefault();
|
||||
|
||||
HostDeviceVector<int> v;
|
||||
{
|
||||
// a separate scope to ensure that v1 is gone before further checks
|
||||
HostDeviceVector<int> v1;
|
||||
InitHostDeviceVector(n, device, &v1);
|
||||
v.Resize(v1.Size());
|
||||
v.Copy(v1);
|
||||
}
|
||||
CheckDevice(&v, n, 0, GPUAccess::kRead);
|
||||
PlusOne(&v);
|
||||
CheckDevice(&v, n, 1, GPUAccess::kWrite);
|
||||
CheckHost(&v, GPUAccess::kRead);
|
||||
CheckHost(&v, GPUAccess::kNone);
|
||||
}
|
||||
|
||||
TEST(SyclHostDeviceVector, Fill) {
|
||||
size_t n = 1001;
|
||||
auto device = DeviceOrd::SyclDefault();
|
||||
|
||||
int val = 42;
|
||||
HostDeviceVector<int> v;
|
||||
v.SetDevice(device);
|
||||
v.Resize(n);
|
||||
|
||||
ASSERT_TRUE(v.DeviceCanWrite());
|
||||
v.Fill(val);
|
||||
|
||||
ASSERT_FALSE(v.HostCanRead());
|
||||
ASSERT_FALSE(v.HostCanWrite());
|
||||
ASSERT_TRUE(v.DeviceCanRead());
|
||||
ASSERT_TRUE(v.DeviceCanWrite());
|
||||
|
||||
std::vector<int> desired_data(n, val);
|
||||
sycl::VerifyOnDeviceData(v.Device(), v.ConstDevicePointer(), desired_data.data(), n);
|
||||
}
|
||||
|
||||
TEST(SyclHostDeviceVector, Extend) {
|
||||
size_t n0 = 1001;
|
||||
size_t n1 = 17;
|
||||
auto device = DeviceOrd::SyclDefault();
|
||||
|
||||
int val = 42;
|
||||
HostDeviceVector<int> v0;
|
||||
v0.SetDevice(device);
|
||||
v0.Resize(n0);
|
||||
v0.Fill(val);
|
||||
|
||||
HostDeviceVector<int> v1;
|
||||
v1.SetDevice(device);
|
||||
v1.Resize(n1);
|
||||
v1.Fill(val);
|
||||
|
||||
v0.Extend(v1);
|
||||
{
|
||||
std::vector<int> desired_data(n0+n1, val);
|
||||
sycl::VerifyOnDeviceData(v0.Device(), v0.ConstDevicePointer(), desired_data.data(), n0+n1);
|
||||
}
|
||||
v1.Extend(v0);
|
||||
{
|
||||
std::vector<int> desired_data(n0+2*n1, val);
|
||||
sycl::VerifyOnDeviceData(v1.Device(), v1.ConstDevicePointer(), desired_data.data(), n0+2*n1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SyclHostDeviceVector, SetDevice) {
|
||||
std::vector<int> h_vec (2345);
|
||||
for (size_t i = 0; i < h_vec.size(); ++i) {
|
||||
h_vec[i] = i;
|
||||
}
|
||||
HostDeviceVector<int> vec (h_vec);
|
||||
auto device = DeviceOrd::SyclDefault();
|
||||
|
||||
vec.SetDevice(device);
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
auto span = vec.DeviceSpan(); // sync to device
|
||||
|
||||
vec.SetDevice(DeviceOrd::CPU()); // pull back to cpu.
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
ASSERT_EQ(vec.Device(), DeviceOrd::CPU());
|
||||
|
||||
auto h_vec_1 = vec.HostVector();
|
||||
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
|
||||
}
|
||||
|
||||
TEST(SyclHostDeviceVector, Span) {
|
||||
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
vec.SetDevice(DeviceOrd::SyclDefault());
|
||||
auto span = vec.DeviceSpan();
|
||||
ASSERT_EQ(vec.Size(), span.size());
|
||||
ASSERT_EQ(vec.DevicePointer(), span.data());
|
||||
auto const_span = vec.ConstDeviceSpan();
|
||||
ASSERT_EQ(vec.Size(), const_span.size());
|
||||
ASSERT_EQ(vec.ConstDevicePointer(), const_span.data());
|
||||
|
||||
auto h_span = vec.ConstHostSpan();
|
||||
ASSERT_TRUE(vec.HostCanRead());
|
||||
ASSERT_FALSE(vec.HostCanWrite());
|
||||
ASSERT_EQ(h_span.size(), vec.Size());
|
||||
ASSERT_EQ(h_span.data(), vec.ConstHostPointer());
|
||||
|
||||
h_span = vec.HostSpan();
|
||||
ASSERT_TRUE(vec.HostCanWrite());
|
||||
}
|
||||
|
||||
TEST(SyclHostDeviceVector, Empty) {
|
||||
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
HostDeviceVector<float> another { std::move(vec) };
|
||||
ASSERT_FALSE(another.Empty());
|
||||
ASSERT_TRUE(vec.Empty());
|
||||
}
|
||||
|
||||
TEST(SyclHostDeviceVector, Resize) {
|
||||
auto check = [&](HostDeviceVector<float> const& vec) {
|
||||
auto const& h_vec = vec.ConstHostSpan();
|
||||
for (std::size_t i = 0; i < 4; ++i) {
|
||||
ASSERT_EQ(h_vec[i], i + 1);
|
||||
}
|
||||
for (std::size_t i = 4; i < vec.Size(); ++i) {
|
||||
ASSERT_EQ(h_vec[i], 3.0);
|
||||
}
|
||||
};
|
||||
{
|
||||
HostDeviceVector<float> vec{1.0f, 2.0f, 3.0f, 4.0f};
|
||||
vec.SetDevice(DeviceOrd::SyclDefault());
|
||||
vec.ConstDeviceSpan();
|
||||
ASSERT_TRUE(vec.DeviceCanRead());
|
||||
ASSERT_FALSE(vec.DeviceCanWrite());
|
||||
vec.DeviceSpan();
|
||||
vec.Resize(7, 3.0f);
|
||||
ASSERT_TRUE(vec.DeviceCanWrite());
|
||||
check(vec);
|
||||
}
|
||||
{
|
||||
HostDeviceVector<float> vec{{1.0f, 2.0f, 3.0f, 4.0f}, DeviceOrd::SyclDefault()};
|
||||
ASSERT_TRUE(vec.DeviceCanWrite());
|
||||
vec.Resize(7, 3.0f);
|
||||
ASSERT_TRUE(vec.DeviceCanWrite());
|
||||
check(vec);
|
||||
}
|
||||
{
|
||||
HostDeviceVector<float> vec{1.0f, 2.0f, 3.0f, 4.0f};
|
||||
ASSERT_TRUE(vec.HostCanWrite());
|
||||
vec.Resize(7, 3.0f);
|
||||
ASSERT_TRUE(vec.HostCanWrite());
|
||||
check(vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
@@ -32,10 +32,10 @@ void TestPartitioning(float sparsity, int max_bins) {
|
||||
|
||||
RowSetCollection row_set_collection;
|
||||
auto& row_indices = row_set_collection.Data();
|
||||
row_indices.Resize(&qu, num_rows);
|
||||
row_indices.Resize(qu, num_rows);
|
||||
size_t* p_row_indices = row_indices.Data();
|
||||
|
||||
qu.submit([&](::sycl::handler& cgh) {
|
||||
qu->submit([&](::sycl::handler& cgh) {
|
||||
cgh.parallel_for<>(::sycl::range<1>(num_rows),
|
||||
[p_row_indices](::sycl::item<1> pid) {
|
||||
const size_t idx = pid.get_id(0);
|
||||
@@ -49,7 +49,7 @@ void TestPartitioning(float sparsity, int max_bins) {
|
||||
|
||||
const size_t n_nodes = row_set_collection.Size();
|
||||
PartitionBuilder partition_builder;
|
||||
partition_builder.Init(&qu, n_nodes, [&](size_t nid) {
|
||||
partition_builder.Init(qu, n_nodes, [&](size_t nid) {
|
||||
return row_set_collection[nid].Size();
|
||||
});
|
||||
|
||||
@@ -60,11 +60,11 @@ void TestPartitioning(float sparsity, int max_bins) {
|
||||
std::vector<int32_t> split_conditions = {2};
|
||||
partition_builder.Partition(gmat, nodes, row_set_collection,
|
||||
split_conditions, &tree, &event);
|
||||
qu.wait_and_throw();
|
||||
qu->wait_and_throw();
|
||||
|
||||
size_t* data_result = const_cast<size_t*>(row_set_collection[0].begin);
|
||||
partition_builder.MergeToArray(0, data_result, &event);
|
||||
qu.wait_and_throw();
|
||||
qu->wait_and_throw();
|
||||
|
||||
bst_float split_pt = gmat.cut.Values()[split_conditions[0]];
|
||||
|
||||
@@ -99,8 +99,8 @@ void TestPartitioning(float sparsity, int max_bins) {
|
||||
auto n_right = std::accumulate(ridx_right.begin(), ridx_right.end(), 0);
|
||||
|
||||
std::vector<size_t> row_indices_host(num_rows);
|
||||
qu.memcpy(row_indices_host.data(), row_indices.Data(), num_rows * sizeof(size_t));
|
||||
qu.wait_and_throw();
|
||||
qu->memcpy(row_indices_host.data(), row_indices.Data(), num_rows * sizeof(size_t));
|
||||
qu->wait_and_throw();
|
||||
|
||||
ASSERT_EQ(n_left, partition_builder.GetNLeftElems(0));
|
||||
for (size_t i = 0; i < n_left; ++i) {
|
||||
@@ -123,7 +123,7 @@ TEST(SyclPartitionBuilder, BasicTest) {
|
||||
DeviceManager device_manager;
|
||||
auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());
|
||||
PartitionBuilder builder;
|
||||
builder.Init(&qu, kNodes, [&](size_t i) {
|
||||
builder.Init(qu, kNodes, [&](size_t i) {
|
||||
return rows[i];
|
||||
});
|
||||
|
||||
@@ -142,23 +142,23 @@ TEST(SyclPartitionBuilder, BasicTest) {
|
||||
size_t n_left = rows_for_left_node[nid];
|
||||
size_t n_right = rows[nid] - n_left;
|
||||
|
||||
qu.submit([&](::sycl::handler& cgh) {
|
||||
qu->submit([&](::sycl::handler& cgh) {
|
||||
cgh.parallel_for<>(::sycl::range<1>(n_left), [=](::sycl::id<1> pid) {
|
||||
int row_id = first_row_id + pid[0];
|
||||
rid_buff_ptr[pid[0]] = row_id;
|
||||
});
|
||||
});
|
||||
qu.wait();
|
||||
qu->wait();
|
||||
first_row_id += n_left;
|
||||
|
||||
// We are storing indexes for the right side in the tail of the array to save some memory
|
||||
qu.submit([&](::sycl::handler& cgh) {
|
||||
qu->submit([&](::sycl::handler& cgh) {
|
||||
cgh.parallel_for<>(::sycl::range<1>(n_right), [=](::sycl::id<1> pid) {
|
||||
int row_id = first_row_id + pid[0];
|
||||
rid_buff_ptr[rid_buff_size - pid[0] - 1] = row_id;
|
||||
});
|
||||
});
|
||||
qu.wait();
|
||||
qu->wait();
|
||||
first_row_id += n_right;
|
||||
|
||||
builder.SetNLeftElems(nid, n_left);
|
||||
@@ -170,7 +170,7 @@ TEST(SyclPartitionBuilder, BasicTest) {
|
||||
size_t row_id = 0;
|
||||
for(size_t nid = 0; nid < kNodes; ++nid) {
|
||||
builder.MergeToArray(nid, v.data(), &event);
|
||||
qu.wait();
|
||||
qu->wait();
|
||||
|
||||
// Check that row_id for left side are correct
|
||||
for(size_t j = 0; j < rows_for_left_node[nid]; ++j) {
|
||||
|
||||
@@ -46,14 +46,15 @@ TEST(SyclObjective, LogisticRawGPair) {
|
||||
}
|
||||
|
||||
TEST(SyclObjective, CPUvsSycl) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
Context ctx_sycl;
|
||||
ctx_sycl.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
ObjFunction * obj_sycl =
|
||||
ObjFunction::Create("reg:squarederror_sycl", &ctx);
|
||||
ObjFunction::Create("reg:squarederror_sycl", &ctx_sycl);
|
||||
|
||||
ctx = ctx.MakeCPU();
|
||||
Context ctx_cpu;
|
||||
ctx_cpu.UpdateAllowUnknown(Args{{"device", "cpu"}});
|
||||
ObjFunction * obj_cpu =
|
||||
ObjFunction::Create("reg:squarederror", &ctx);
|
||||
ObjFunction::Create("reg:squarederror", &ctx_cpu);
|
||||
|
||||
linalg::Matrix<GradientPair> cpu_out_preds;
|
||||
linalg::Matrix<GradientPair> sycl_out_preds;
|
||||
|
||||
@@ -21,10 +21,10 @@ TEST(SyclRowSetCollection, AddSplits) {
|
||||
RowSetCollection row_set_collection;
|
||||
|
||||
auto& row_indices = row_set_collection.Data();
|
||||
row_indices.Resize(&qu, num_rows);
|
||||
row_indices.Resize(qu, num_rows);
|
||||
size_t* p_row_indices = row_indices.Data();
|
||||
|
||||
qu.submit([&](::sycl::handler& cgh) {
|
||||
qu->submit([&](::sycl::handler& cgh) {
|
||||
cgh.parallel_for<>(::sycl::range<1>(num_rows),
|
||||
[p_row_indices](::sycl::item<1> pid) {
|
||||
const size_t idx = pid.get_id(0);
|
||||
|
||||
Reference in New Issue
Block a user