add sycl reaslisation of ghist builder (#10138)

Co-authored-by: Dmitry Razdoburdin <>
This commit is contained in:
Dmitry Razdoburdin 2024-03-23 05:55:25 +01:00 committed by GitHub
parent e1695775e9
commit 6a7c6a8ae6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 585 additions and 4 deletions

View File

@ -0,0 +1,334 @@
/*!
* Copyright 2017-2023 by Contributors
* \file hist_util.cc
*/
#include <vector>
#include <limits>
#include <algorithm>
#include "../data/gradient_index.h"
#include "hist_util.h"
#include <CL/sycl.hpp>
namespace xgboost {
namespace sycl {
namespace common {
/*!
* \brief Fill histogram with zeroes
*/
template<typename GradientSumT>
void InitHist(::sycl::queue qu, GHistRow<GradientSumT, MemoryType::on_device>* hist,
size_t size, ::sycl::event* event) {
*event = qu.fill(hist->Begin(),
xgboost::detail::GradientPairInternal<GradientSumT>(), size, *event);
}
template void InitHist(::sycl::queue qu,
GHistRow<float, MemoryType::on_device>* hist,
size_t size, ::sycl::event* event);
template void InitHist(::sycl::queue qu,
GHistRow<double, MemoryType::on_device>* hist,
size_t size, ::sycl::event* event);
/*!
* \brief Compute Subtraction: dst = src1 - src2
*/
template<typename GradientSumT>
::sycl::event SubtractionHist(::sycl::queue qu,
GHistRow<GradientSumT, MemoryType::on_device>* dst,
const GHistRow<GradientSumT, MemoryType::on_device>& src1,
const GHistRow<GradientSumT, MemoryType::on_device>& src2,
size_t size, ::sycl::event event_priv) {
GradientSumT* pdst = reinterpret_cast<GradientSumT*>(dst->Data());
const GradientSumT* psrc1 = reinterpret_cast<const GradientSumT*>(src1.DataConst());
const GradientSumT* psrc2 = reinterpret_cast<const GradientSumT*>(src2.DataConst());
auto event_final = qu.submit([&](::sycl::handler& cgh) {
cgh.depends_on(event_priv);
cgh.parallel_for<>(::sycl::range<1>(2 * size), [pdst, psrc1, psrc2](::sycl::item<1> pid) {
const size_t i = pid.get_id(0);
pdst[i] = psrc1[i] - psrc2[i];
});
});
return event_final;
}
template ::sycl::event SubtractionHist(::sycl::queue qu,
GHistRow<float, MemoryType::on_device>* dst,
const GHistRow<float, MemoryType::on_device>& src1,
const GHistRow<float, MemoryType::on_device>& src2,
size_t size, ::sycl::event event_priv);
template ::sycl::event SubtractionHist(::sycl::queue qu,
GHistRow<double, MemoryType::on_device>* dst,
const GHistRow<double, MemoryType::on_device>& src1,
const GHistRow<double, MemoryType::on_device>& src2,
size_t size, ::sycl::event event_priv);
// Kernel with buffer using
template<typename FPType, typename BinIdxType, bool isDense>
::sycl::event BuildHistKernel(::sycl::queue qu,
const USMVector<GradientPair, MemoryType::on_device>& gpair_device,
const RowSetCollection::Elem& row_indices,
const GHistIndexMatrix& gmat,
GHistRow<FPType, MemoryType::on_device>* hist,
GHistRow<FPType, MemoryType::on_device>* hist_buffer,
::sycl::event event_priv) {
const size_t size = row_indices.Size();
const size_t* rid = row_indices.begin;
const size_t n_columns = isDense ? gmat.nfeatures : gmat.row_stride;
const GradientPair::ValueT* pgh =
reinterpret_cast<const GradientPair::ValueT*>(gpair_device.DataConst());
const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
const uint32_t* offsets = gmat.index.Offset();
FPType* hist_data = reinterpret_cast<FPType*>(hist->Data());
const size_t nbins = gmat.nbins;
const size_t max_work_group_size =
qu.get_device().get_info<::sycl::info::device::max_work_group_size>();
const size_t work_group_size = n_columns < max_work_group_size ? n_columns : max_work_group_size;
const size_t max_nblocks = hist_buffer->Size() / (nbins * 2);
const size_t min_block_size = 128;
size_t nblocks = std::min(max_nblocks, size / min_block_size + !!(size % min_block_size));
const size_t block_size = size / nblocks + !!(size % nblocks);
FPType* hist_buffer_data = reinterpret_cast<FPType*>(hist_buffer->Data());
auto event_fill = qu.fill(hist_buffer_data, FPType(0), nblocks * nbins * 2, event_priv);
auto event_main = qu.submit([&](::sycl::handler& cgh) {
cgh.depends_on(event_fill);
cgh.parallel_for<>(::sycl::nd_range<2>(::sycl::range<2>(nblocks, work_group_size),
::sycl::range<2>(1, work_group_size)),
[=](::sycl::nd_item<2> pid) {
size_t block = pid.get_global_id(0);
size_t feat = pid.get_global_id(1);
FPType* hist_local = hist_buffer_data + block * nbins * 2;
for (size_t idx = 0; idx < block_size; ++idx) {
size_t i = block * block_size + idx;
if (i < size) {
const size_t icol_start = n_columns * rid[i];
const size_t idx_gh = rid[i];
pid.barrier(::sycl::access::fence_space::local_space);
const BinIdxType* gr_index_local = gradient_index + icol_start;
for (size_t j = feat; j < n_columns; j += work_group_size) {
uint32_t idx_bin = static_cast<uint32_t>(gr_index_local[j]);
if constexpr (isDense) {
idx_bin += offsets[j];
}
if (idx_bin < nbins) {
hist_local[2 * idx_bin] += pgh[2 * idx_gh];
hist_local[2 * idx_bin+1] += pgh[2 * idx_gh+1];
}
}
}
}
});
});
auto event_save = qu.submit([&](::sycl::handler& cgh) {
cgh.depends_on(event_main);
cgh.parallel_for<>(::sycl::range<1>(nbins), [=](::sycl::item<1> pid) {
size_t idx_bin = pid.get_id(0);
FPType gsum = 0.0f;
FPType hsum = 0.0f;
for (size_t j = 0; j < nblocks; ++j) {
gsum += hist_buffer_data[j * nbins * 2 + 2 * idx_bin];
hsum += hist_buffer_data[j * nbins * 2 + 2 * idx_bin + 1];
}
hist_data[2 * idx_bin] = gsum;
hist_data[2 * idx_bin + 1] = hsum;
});
});
return event_save;
}
// Kernel with atomic using
template<typename FPType, typename BinIdxType, bool isDense>
::sycl::event BuildHistKernel(::sycl::queue qu,
const USMVector<GradientPair, MemoryType::on_device>& gpair_device,
const RowSetCollection::Elem& row_indices,
const GHistIndexMatrix& gmat,
GHistRow<FPType, MemoryType::on_device>* hist,
::sycl::event event_priv) {
const size_t size = row_indices.Size();
const size_t* rid = row_indices.begin;
const size_t n_columns = isDense ? gmat.nfeatures : gmat.row_stride;
const GradientPair::ValueT* pgh =
reinterpret_cast<const GradientPair::ValueT*>(gpair_device.DataConst());
const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
const uint32_t* offsets = gmat.index.Offset();
FPType* hist_data = reinterpret_cast<FPType*>(hist->Data());
const size_t nbins = gmat.nbins;
const size_t max_work_group_size =
qu.get_device().get_info<::sycl::info::device::max_work_group_size>();
const size_t feat_local = n_columns < max_work_group_size ? n_columns : max_work_group_size;
auto event_fill = qu.fill(hist_data, FPType(0), nbins * 2, event_priv);
auto event_main = qu.submit([&](::sycl::handler& cgh) {
cgh.depends_on(event_fill);
cgh.parallel_for<>(::sycl::range<2>(size, feat_local),
[=](::sycl::item<2> pid) {
size_t i = pid.get_id(0);
size_t feat = pid.get_id(1);
const size_t icol_start = n_columns * rid[i];
const size_t idx_gh = rid[i];
const BinIdxType* gr_index_local = gradient_index + icol_start;
for (size_t j = feat; j < n_columns; j += feat_local) {
uint32_t idx_bin = static_cast<uint32_t>(gr_index_local[j]);
if constexpr (isDense) {
idx_bin += offsets[j];
}
if (idx_bin < nbins) {
AtomicRef<FPType> gsum(hist_data[2 * idx_bin]);
AtomicRef<FPType> hsum(hist_data[2 * idx_bin + 1]);
gsum.fetch_add(pgh[2 * idx_gh]);
hsum.fetch_add(pgh[2 * idx_gh + 1]);
}
}
});
});
return event_main;
}
template<typename FPType, typename BinIdxType>
::sycl::event BuildHistDispatchKernel(
::sycl::queue qu,
const USMVector<GradientPair, MemoryType::on_device>& gpair_device,
const RowSetCollection::Elem& row_indices,
const GHistIndexMatrix& gmat,
GHistRow<FPType, MemoryType::on_device>* hist,
bool isDense,
GHistRow<FPType, MemoryType::on_device>* hist_buffer,
::sycl::event events_priv,
bool force_atomic_use) {
const size_t size = row_indices.Size();
const size_t n_columns = isDense ? gmat.nfeatures : gmat.row_stride;
const size_t nbins = gmat.nbins;
// max cycle size, while atomics are still effective
const size_t max_cycle_size_atomics = nbins;
const size_t cycle_size = size;
// TODO(razdoburdin): replace the add-hock dispatching criteria by more sutable one
bool use_atomic = (size < nbins) || (gmat.max_num_bins == gmat.nbins / n_columns);
// force_atomic_use flag is used only for testing
use_atomic = use_atomic || force_atomic_use;
if (!use_atomic) {
if (isDense) {
return BuildHistKernel<FPType, BinIdxType, true>(qu, gpair_device, row_indices,
gmat, hist, hist_buffer,
events_priv);
} else {
return BuildHistKernel<FPType, uint32_t, false>(qu, gpair_device, row_indices,
gmat, hist, hist_buffer,
events_priv);
}
} else {
if (isDense) {
return BuildHistKernel<FPType, BinIdxType, true>(qu, gpair_device, row_indices,
gmat, hist, events_priv);
} else {
return BuildHistKernel<FPType, uint32_t, false>(qu, gpair_device, row_indices,
gmat, hist, events_priv);
}
}
}
template<typename FPType>
::sycl::event BuildHistKernel(::sycl::queue qu,
const USMVector<GradientPair, MemoryType::on_device>& gpair_device,
const RowSetCollection::Elem& row_indices,
const GHistIndexMatrix& gmat, const bool isDense,
GHistRow<FPType, MemoryType::on_device>* hist,
GHistRow<FPType, MemoryType::on_device>* hist_buffer,
::sycl::event event_priv,
bool force_atomic_use) {
const bool is_dense = isDense;
switch (gmat.index.GetBinTypeSize()) {
case BinTypeSize::kUint8BinsTypeSize:
return BuildHistDispatchKernel<FPType, uint8_t>(qu, gpair_device, row_indices,
gmat, hist, is_dense, hist_buffer,
event_priv, force_atomic_use);
break;
case BinTypeSize::kUint16BinsTypeSize:
return BuildHistDispatchKernel<FPType, uint16_t>(qu, gpair_device, row_indices,
gmat, hist, is_dense, hist_buffer,
event_priv, force_atomic_use);
break;
case BinTypeSize::kUint32BinsTypeSize:
return BuildHistDispatchKernel<FPType, uint32_t>(qu, gpair_device, row_indices,
gmat, hist, is_dense, hist_buffer,
event_priv, force_atomic_use);
break;
default:
CHECK(false); // no default behavior
}
}
template <typename GradientSumT>
::sycl::event GHistBuilder<GradientSumT>::BuildHist(
const USMVector<GradientPair, MemoryType::on_device>& gpair_device,
const RowSetCollection::Elem& row_indices,
const GHistIndexMatrix &gmat,
GHistRowT<MemoryType::on_device>* hist,
bool isDense,
GHistRowT<MemoryType::on_device>* hist_buffer,
::sycl::event event_priv,
bool force_atomic_use) {
return BuildHistKernel<GradientSumT>(qu_, gpair_device, row_indices, gmat,
isDense, hist, hist_buffer, event_priv,
force_atomic_use);
}
template
::sycl::event GHistBuilder<float>::BuildHist(
const USMVector<GradientPair, MemoryType::on_device>& gpair_device,
const RowSetCollection::Elem& row_indices,
const GHistIndexMatrix& gmat,
GHistRow<float, MemoryType::on_device>* hist,
bool isDense,
GHistRow<float, MemoryType::on_device>* hist_buffer,
::sycl::event event_priv,
bool force_atomic_use);
template
::sycl::event GHistBuilder<double>::BuildHist(
const USMVector<GradientPair, MemoryType::on_device>& gpair_device,
const RowSetCollection::Elem& row_indices,
const GHistIndexMatrix& gmat,
GHistRow<double, MemoryType::on_device>* hist,
bool isDense,
GHistRow<double, MemoryType::on_device>* hist_buffer,
::sycl::event event_priv,
bool force_atomic_use);
template<typename GradientSumT>
void GHistBuilder<GradientSumT>::SubtractionTrick(GHistRowT<MemoryType::on_device>* self,
const GHistRowT<MemoryType::on_device>& sibling,
const GHistRowT<MemoryType::on_device>& parent) {
const size_t size = self->Size();
CHECK_EQ(sibling.Size(), size);
CHECK_EQ(parent.Size(), size);
SubtractionHist(qu_, self, parent, sibling, size, ::sycl::event());
}
template
void GHistBuilder<float>::SubtractionTrick(GHistRow<float, MemoryType::on_device>* self,
const GHistRow<float, MemoryType::on_device>& sibling,
const GHistRow<float, MemoryType::on_device>& parent);
template
void GHistBuilder<double>::SubtractionTrick(GHistRow<double, MemoryType::on_device>* self,
const GHistRow<double, MemoryType::on_device>& sibling,
const GHistRow<double, MemoryType::on_device>& parent);
} // namespace common
} // namespace sycl
} // namespace xgboost

View File

@ -0,0 +1,89 @@
/*!
* Copyright 2017-2023 by Contributors
* \file hist_util.h
*/
#ifndef PLUGIN_SYCL_COMMON_HIST_UTIL_H_
#define PLUGIN_SYCL_COMMON_HIST_UTIL_H_
#include <vector>
#include <unordered_map>
#include <memory>
#include "../data.h"
#include "row_set.h"
#include "../../src/common/hist_util.h"
#include "../data/gradient_index.h"
#include <CL/sycl.hpp>
namespace xgboost {
namespace sycl {
namespace common {
template<typename GradientSumT, MemoryType memory_type = MemoryType::shared>
using GHistRow = USMVector<xgboost::detail::GradientPairInternal<GradientSumT>, memory_type>;
using BinTypeSize = ::xgboost::common::BinTypeSize;
class ColumnMatrix;
/*!
* \brief Fill histogram with zeroes
*/
template<typename GradientSumT>
void InitHist(::sycl::queue qu,
GHistRow<GradientSumT, MemoryType::on_device>* hist,
size_t size, ::sycl::event* event);
/*!
* \brief Compute subtraction: dst = src1 - src2
*/
template<typename GradientSumT>
::sycl::event SubtractionHist(::sycl::queue qu,
GHistRow<GradientSumT, MemoryType::on_device>* dst,
const GHistRow<GradientSumT, MemoryType::on_device>& src1,
const GHistRow<GradientSumT, MemoryType::on_device>& src2,
size_t size, ::sycl::event event_priv);
/*!
* \brief Builder for histograms of gradient statistics
*/
template<typename GradientSumT>
class GHistBuilder {
public:
template<MemoryType memory_type = MemoryType::shared>
using GHistRowT = GHistRow<GradientSumT, memory_type>;
GHistBuilder() = default;
GHistBuilder(::sycl::queue qu, uint32_t nbins) : qu_{qu}, nbins_{nbins} {}
// Construct a histogram via histogram aggregation
::sycl::event BuildHist(const USMVector<GradientPair, MemoryType::on_device>& gpair_device,
const RowSetCollection::Elem& row_indices,
const GHistIndexMatrix& gmat,
GHistRowT<MemoryType::on_device>* HistCollection,
bool isDense,
GHistRowT<MemoryType::on_device>* hist_buffer,
::sycl::event event,
bool force_atomic_use = false);
// Construct a histogram via subtraction trick
void SubtractionTrick(GHistRowT<MemoryType::on_device>* self,
const GHistRowT<MemoryType::on_device>& sibling,
const GHistRowT<MemoryType::on_device>& parent);
uint32_t GetNumBins() const {
return nbins_;
}
private:
/*! \brief Number of all bins over all features */
uint32_t nbins_ { 0 };
::sycl::queue qu_;
};
} // namespace common
} // namespace sycl
} // namespace xgboost
#endif // PLUGIN_SYCL_COMMON_HIST_UTIL_H_

View File

@ -8,22 +8,23 @@
namespace xgboost::sycl { namespace xgboost::sycl {
template<typename T, typename Container> template<typename T, typename Container>
void VerifySyclVector(const USMVector<T, MemoryType::shared>& sycl_vector, void VerifySyclVector(const USMVector<T, MemoryType::shared>& sycl_vector,
const Container& host_vector) { const Container& host_vector, T eps = T()) {
ASSERT_EQ(sycl_vector.Size(), host_vector.size()); ASSERT_EQ(sycl_vector.Size(), host_vector.size());
size_t size = sycl_vector.Size(); size_t size = sycl_vector.Size();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
ASSERT_EQ(sycl_vector[i], host_vector[i]); EXPECT_NEAR(sycl_vector[i], host_vector[i], eps);
} }
} }
template<typename T, typename Container> template<typename T, typename Container>
void VerifySyclVector(const std::vector<T>& sycl_vector, const Container& host_vector) { void VerifySyclVector(const std::vector<T>& sycl_vector,
const Container& host_vector, T eps = T()) {
ASSERT_EQ(sycl_vector.size(), host_vector.size()); ASSERT_EQ(sycl_vector.size(), host_vector.size());
size_t size = sycl_vector.size(); size_t size = sycl_vector.size();
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
ASSERT_EQ(sycl_vector[i], host_vector[i]); EXPECT_NEAR(sycl_vector[i], host_vector[i], eps);
} }
} }

View File

@ -0,0 +1,157 @@
/**
* Copyright 2020-2024 by XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#pragma GCC diagnostic pop
#include "../../../plugin/sycl/common/hist_util.h"
#include "../../../plugin/sycl/device_manager.h"
#include "sycl_helpers.h"
#include "../helpers.h"
namespace xgboost::sycl::common {
template <typename GradientSumT>
void GHistBuilderTest(float sparsity, bool force_atomic_use) {
const size_t num_rows = 8;
const size_t num_columns = 1;
const int n_bins = 2;
const GradientSumT eps = 1e-6;
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();
sycl::DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
GHistIndexMatrix gmat_sycl;
gmat_sycl.Init(qu, &ctx, dmat, n_bins);
xgboost::GHistIndexMatrix gmat{&ctx, p_fmat.get(), n_bins, 0.3, false};
RowSetCollection row_set_collection;
auto& row_indices = row_set_collection.Data();
row_indices.Resize(&qu, num_rows);
size_t* p_row_indices = row_indices.Data();
qu.submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(num_rows),
[p_row_indices](::sycl::item<1> pid) {
const size_t idx = pid.get_id(0);
p_row_indices[idx] = idx;
});
}).wait_and_throw();
row_set_collection.Init();
auto builder = GHistBuilder<GradientSumT>(qu, n_bins);
std::vector<GradientPair> gpair = {
{0.1f, 0.2f}, {0.3f, 0.4f}, {0.5f, 0.6f}, {0.7f, 0.8f},
{0.9f, 0.1f}, {0.2f, 0.3f}, {0.4f, 0.5f}, {0.6f, 0.7f}};
CHECK_EQ(gpair.size(), num_rows);
USMVector<GradientPair, MemoryType::on_device> gpair_device(&qu, gpair);
std::vector<GradientSumT> hist_host(2*n_bins);
GHistRow<GradientSumT, MemoryType::on_device> hist(&qu, 2 * n_bins);
::sycl::event event;
const size_t nblocks = 2;
GHistRow<GradientSumT, MemoryType::on_device> hist_buffer(&qu, 2 * nblocks * n_bins);
InitHist(qu, &hist, hist.Size(), &event);
InitHist(qu, &hist_buffer, hist_buffer.Size(), &event);
event = builder.BuildHist(gpair_device, row_set_collection[0], gmat_sycl, &hist,
sparsity < eps , &hist_buffer, event, force_atomic_use);
qu.memcpy(hist_host.data(), hist.Data(),
2 * n_bins * sizeof(GradientSumT), event);
qu.wait_and_throw();
// Build hist on host to compare
std::vector<GradientSumT> hist_desired(2*n_bins);
for (size_t rid = 0; rid < num_rows; ++rid) {
const size_t ibegin = gmat.row_ptr[rid];
const size_t iend = gmat.row_ptr[rid + 1];
for (size_t i = ibegin; i < iend; ++i) {
const size_t bin_idx = gmat.index[i];
hist_desired[2*bin_idx] += gpair[rid].GetGrad();
hist_desired[2*bin_idx+1] += gpair[rid].GetHess();
}
}
VerifySyclVector(hist_host, hist_desired, eps);
}
template <typename GradientSumT>
void GHistSubtractionTest() {
const size_t n_bins = 4;
using GHistType = GHistRow<GradientSumT, MemoryType::on_device>;
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
::sycl::event event;
std::vector<GradientSumT> hist1_host = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8};
GHistType hist1(&qu, 2 * n_bins);
event = qu.memcpy(hist1.Data(), hist1_host.data(),
2 * n_bins * sizeof(GradientSumT), event);
std::vector<GradientSumT> hist2_host = {0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1};
GHistType hist2(&qu, 2 * n_bins);
event = qu.memcpy(hist2.Data(), hist2_host.data(),
2 * n_bins * sizeof(GradientSumT), event);
std::vector<GradientSumT> hist3_host(2 * n_bins);
GHistType hist3(&qu, 2 * n_bins);
event = SubtractionHist(qu, &hist3, hist1, hist2, n_bins, event);
qu.memcpy(hist3_host.data(), hist3.Data(),
2 * n_bins * sizeof(GradientSumT), event);
qu.wait_and_throw();
std::vector<GradientSumT> hist3_desired(2 * n_bins);
for (size_t idx = 0; idx < 2 * n_bins; ++idx) {
hist3_desired[idx] = hist1_host[idx] - hist2_host[idx];
}
const GradientSumT eps = 1e-6;
VerifySyclVector(hist3_host, hist3_desired, eps);
}
TEST(SyclGHistBuilder, ByBlockDenseCase) {
GHistBuilderTest<float>(0.0, false);
GHistBuilderTest<double>(0.0, false);
}
TEST(SyclGHistBuilder, ByBlockSparseCase) {
GHistBuilderTest<float>(0.3, false);
GHistBuilderTest<double>(0.3, false);
}
TEST(SyclGHistBuilder, ByAtomicDenseCase) {
GHistBuilderTest<float>(0.0, true);
GHistBuilderTest<double>(0.0, true);
}
TEST(SyclGHistBuilder, ByAtomicSparseCase) {
GHistBuilderTest<float>(0.3, true);
GHistBuilderTest<double>(0.3, true);
}
TEST(SyclGHistBuilder, Subtraction) {
GHistSubtractionTest<float>();
GHistSubtractionTest<double>();
}
} // namespace xgboost::sycl::common