[SYCL] Add nodes initialisation (#10269)

---------

Co-authored-by: Dmitry Razdoburdin <>
Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
Dmitry Razdoburdin 2024-05-21 17:38:52 +02:00 committed by GitHub
parent 7a54ca41c9
commit c7e7ce7569
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 342 additions and 33 deletions

View File

@ -2,11 +2,6 @@
* Copyright 2017-2023 by Contributors * Copyright 2017-2023 by Contributors
* \file device_manager.cc * \file device_manager.cc
*/ */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#pragma GCC diagnostic pop
#include "../sycl/device_manager.h" #include "../sycl/device_manager.h"
#include "../../src/collective/communicator-inl.h" #include "../../src/collective/communicator-inl.h"

View File

@ -12,7 +12,11 @@
#include <CL/sycl.hpp> #include <CL/sycl.hpp>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include "xgboost/context.h" #include "xgboost/context.h"
#pragma GCC diagnostic pop
namespace xgboost { namespace xgboost {
namespace sycl { namespace sycl {

View File

@ -3,19 +3,15 @@
* \file multiclass_obj.cc * \file multiclass_obj.cc
* \brief Definition of multi-class classification objectives. * \brief Definition of multi-class classification objectives.
*/ */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#pragma GCC diagnostic pop
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <limits> #include <limits>
#include <utility> #include <utility>
#include "xgboost/parameter.h"
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare" #pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include "xgboost/parameter.h"
#include "xgboost/data.h" #include "xgboost/data.h"
#include "../../src/common/math.h" #include "../../src/common/math.h"
#pragma GCC diagnostic pop #pragma GCC diagnostic pop

View File

@ -8,6 +8,7 @@
#include <oneapi/dpl/random> #include <oneapi/dpl/random>
#include "../common/hist_util.h" #include "../common/hist_util.h"
#include "../../src/collective/allreduce.h"
namespace xgboost { namespace xgboost {
namespace sycl { namespace sycl {
@ -111,7 +112,6 @@ void HistUpdater<GradientSumT>::InitSampling(
template<typename GradientSumT> template<typename GradientSumT>
void HistUpdater<GradientSumT>::InitData( void HistUpdater<GradientSumT>::InitData(
Context const * ctx,
const common::GHistIndexMatrix& gmat, const common::GHistIndexMatrix& gmat,
const USMVector<GradientPair, MemoryType::on_device> &gpair, const USMVector<GradientPair, MemoryType::on_device> &gpair,
const DMatrix& fmat, const DMatrix& fmat,
@ -215,6 +215,101 @@ void HistUpdater<GradientSumT>::InitData(
data_layout_ = kSparseData; data_layout_ = kSparseData;
} }
} }
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
/* specialized code for dense data:
choose the column that has a least positive number of discrete bins.
For dense data (with no missing value),
the sum of gradient histogram is equal to snode[nid] */
const std::vector<uint32_t>& row_ptr = gmat.cut.Ptrs();
const auto nfeature = static_cast<bst_uint>(row_ptr.size() - 1);
uint32_t min_nbins_per_feature = 0;
for (bst_uint i = 0; i < nfeature; ++i) {
const uint32_t nbins = row_ptr[i + 1] - row_ptr[i];
if (nbins > 0) {
if (min_nbins_per_feature == 0 || min_nbins_per_feature > nbins) {
min_nbins_per_feature = nbins;
fid_least_bins_ = i;
}
}
}
CHECK_GT(min_nbins_per_feature, 0U);
}
std::fill(snode_host_.begin(), snode_host_.end(), NodeEntry<GradientSumT>(param_));
builder_monitor_.Stop("InitData");
}
template <typename GradientSumT>
void HistUpdater<GradientSumT>::InitNewNode(int nid,
const common::GHistIndexMatrix& gmat,
const USMVector<GradientPair,
MemoryType::on_device> &gpair,
const DMatrix& fmat,
const RegTree& tree) {
builder_monitor_.Start("InitNewNode");
snode_host_.resize(tree.NumNodes(), NodeEntry<GradientSumT>(param_));
{
if (tree[nid].IsRoot()) {
GradStats<GradientSumT> grad_stat;
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
const std::vector<uint32_t>& row_ptr = gmat.cut.Ptrs();
const uint32_t ibegin = row_ptr[fid_least_bins_];
const uint32_t iend = row_ptr[fid_least_bins_ + 1];
const auto* hist = reinterpret_cast<GradStats<GradientSumT>*>(hist_[nid].Data());
std::vector<GradStats<GradientSumT>> ets(iend - ibegin);
qu_.memcpy(ets.data(), hist + ibegin,
(iend - ibegin) * sizeof(GradStats<GradientSumT>)).wait_and_throw();
for (const auto& et : ets) {
grad_stat += et;
}
} else {
const common::RowSetCollection::Elem e = row_set_collection_[nid];
const size_t* row_idxs = e.begin;
const size_t size = e.Size();
const GradientPair* gpair_ptr = gpair.DataConst();
::sycl::buffer<GradStats<GradientSumT>> buff(&grad_stat, 1);
qu_.submit([&](::sycl::handler& cgh) {
auto reduction = ::sycl::reduction(buff, cgh, ::sycl::plus<>());
cgh.parallel_for<>(::sycl::range<1>(size), reduction,
[=](::sycl::item<1> pid, auto& sum) {
size_t i = pid.get_id(0);
size_t row_idx = row_idxs[i];
if constexpr (std::is_same<GradientPair::ValueT, GradientSumT>::value) {
sum += gpair_ptr[row_idx];
} else {
sum += GradStats<GradientSumT>(gpair_ptr[row_idx].GetGrad(),
gpair_ptr[row_idx].GetHess());
}
});
}).wait_and_throw();
}
auto rc = collective::Allreduce(
ctx_, linalg::MakeVec(reinterpret_cast<GradientSumT*>(&grad_stat), 2),
collective::Op::kSum);
SafeColl(rc);
snode_host_[nid].stats = grad_stat;
} else {
int parent_id = tree[nid].Parent();
if (tree[nid].IsLeftChild()) {
snode_host_[nid].stats = snode_host_[parent_id].best.left_sum;
} else {
snode_host_[nid].stats = snode_host_[parent_id].best.right_sum;
}
}
}
// calculating the weights
{
auto evaluator = tree_evaluator_.GetEvaluator();
bst_uint parentid = tree[nid].Parent();
snode_host_[nid].weight = evaluator.CalcWeight(parentid, snode_host_[nid].stats);
snode_host_[nid].root_gain = evaluator.CalcGain(parentid, snode_host_[nid].stats);
}
builder_monitor_.Stop("InitNewNode");
} }
template class HistUpdater<float>; template class HistUpdater<float>;

View File

@ -26,6 +26,22 @@ namespace xgboost {
namespace sycl { namespace sycl {
namespace tree { namespace tree {
// data structure
template<typename GradType>
struct NodeEntry {
/*! \brief statics for node entry */
GradStats<GradType> stats;
/*! \brief loss of this node, without split */
GradType root_gain;
/*! \brief weight calculated related to current data */
GradType weight;
/*! \brief current best solution */
SplitEntry<GradType> best;
// constructor
explicit NodeEntry(const xgboost::tree::TrainParam& param)
: root_gain(0.0f), weight(0.0f) {}
};
template<typename GradientSumT> template<typename GradientSumT>
class HistUpdater { class HistUpdater {
public: public:
@ -33,12 +49,13 @@ class HistUpdater {
using GHistRowT = common::GHistRow<GradientSumT, memory_type>; using GHistRowT = common::GHistRow<GradientSumT, memory_type>;
using GradientPairT = xgboost::detail::GradientPairInternal<GradientSumT>; using GradientPairT = xgboost::detail::GradientPairInternal<GradientSumT>;
explicit HistUpdater(::sycl::queue qu, explicit HistUpdater(const Context* ctx,
::sycl::queue qu,
const xgboost::tree::TrainParam& param, const xgboost::tree::TrainParam& param,
std::unique_ptr<TreeUpdater> pruner, std::unique_ptr<TreeUpdater> pruner,
FeatureInteractionConstraintHost int_constraints_, FeatureInteractionConstraintHost int_constraints_,
DMatrix const* fmat) DMatrix const* fmat)
: qu_(qu), param_(param), : ctx_(ctx), qu_(qu), param_(param),
tree_evaluator_(qu, param, fmat->Info().num_col_), tree_evaluator_(qu, param, fmat->Info().num_col_),
pruner_(std::move(pruner)), pruner_(std::move(pruner)),
interaction_constraints_{std::move(int_constraints_)}, interaction_constraints_{std::move(int_constraints_)},
@ -61,8 +78,7 @@ class HistUpdater {
USMVector<size_t, MemoryType::on_device>* row_indices); USMVector<size_t, MemoryType::on_device>* row_indices);
void InitData(Context const * ctx, void InitData(const common::GHistIndexMatrix& gmat,
const common::GHistIndexMatrix& gmat,
const USMVector<GradientPair, MemoryType::on_device> &gpair, const USMVector<GradientPair, MemoryType::on_device> &gpair,
const DMatrix& fmat, const DMatrix& fmat,
const RegTree& tree); const RegTree& tree);
@ -78,6 +94,12 @@ class HistUpdater {
data_layout_ != kSparseData, hist_buffer, event_priv); data_layout_ != kSparseData, hist_buffer, event_priv);
} }
void InitNewNode(int nid,
const common::GHistIndexMatrix& gmat,
const USMVector<GradientPair, MemoryType::on_device> &gpair,
const DMatrix& fmat,
const RegTree& tree);
void BuildLocalHistograms(const common::GHistIndexMatrix &gmat, void BuildLocalHistograms(const common::GHistIndexMatrix &gmat,
RegTree *p_tree, RegTree *p_tree,
const USMVector<GradientPair, MemoryType::on_device> &gpair); const USMVector<GradientPair, MemoryType::on_device> &gpair);
@ -89,6 +111,7 @@ class HistUpdater {
const USMVector<GradientPair, MemoryType::on_device> &gpair); const USMVector<GradientPair, MemoryType::on_device> &gpair);
// --data fields-- // --data fields--
const Context* ctx_;
size_t sub_group_size_; size_t sub_group_size_;
// the internal row sets // the internal row sets
@ -113,9 +136,16 @@ class HistUpdater {
/*! \brief culmulative histogram of gradients. */ /*! \brief culmulative histogram of gradients. */
common::HistCollection<GradientSumT, MemoryType::on_device> hist_; common::HistCollection<GradientSumT, MemoryType::on_device> hist_;
/*! \brief TreeNode Data: statistics for each constructed node */
std::vector<NodeEntry<GradientSumT>> snode_host_;
xgboost::common::Monitor builder_monitor_; xgboost::common::Monitor builder_monitor_;
xgboost::common::Monitor kernel_monitor_; xgboost::common::Monitor kernel_monitor_;
/*! \brief feature with least # of bins. to be used for dense specialization
of InitNewNode() */
uint32_t fid_least_bins_;
uint64_t seed_ = 0; uint64_t seed_ = 0;
// key is the node id which should be calculated by Subtraction Trick, value is the node which // key is the node id which should be calculated by Subtraction Trick, value is the node which

View File

@ -49,6 +49,115 @@ struct TrainParam {
template <typename GradType> template <typename GradType>
using GradStats = xgboost::detail::GradientPairInternal<GradType>; using GradStats = xgboost::detail::GradientPairInternal<GradType>;
/*!
* \brief SYCL implementation of SplitEntryContainer for device compilation.
* Original structure cannot be used due 'cat_bits' field of type std::vector<uint32_t>,
* which is not device-copyable
*/
template<typename GradientT>
struct SplitEntryContainer {
/*! \brief loss change after split this node */
bst_float loss_chg {0.0f};
/*! \brief split index */
bst_feature_t sindex{0};
bst_float split_value{0.0f};
GradientT left_sum;
GradientT right_sum;
SplitEntryContainer() = default;
friend std::ostream& operator<<(std::ostream& os, SplitEntryContainer const& s) {
os << "loss_chg: " << s.loss_chg << ", "
<< "split index: " << s.SplitIndex() << ", "
<< "split value: " << s.split_value << ", "
<< "left_sum: " << s.left_sum << ", "
<< "right_sum: " << s.right_sum;
return os;
}
/*!\return feature index to split on */
bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); }
/*!\return whether missing value goes to left branch */
bool DefaultLeft() const { return (sindex >> 31) != 0; }
/*!
* \brief decides whether we can replace current entry with the given statistics
*
* This function gives better priority to lower index when loss_chg == new_loss_chg.
* Not the best way, but helps to give consistent result during multi-thread
* execution.
*
* \param new_loss_chg the loss reduction get through the split
* \param split_index the feature index where the split is on
*/
inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
if (::sycl::isinf(new_loss_chg)) { // in some cases new_loss_chg can be NaN or Inf,
// for example when lambda = 0 & min_child_weight = 0
// skip value in this case
return false;
} else if (this->SplitIndex() <= split_index) {
return new_loss_chg > this->loss_chg;
} else {
return !(this->loss_chg > new_loss_chg);
}
}
/*!
* \brief update the split entry, replace it if e is better
* \param e candidate split solution
* \return whether the proposed split is better and can replace current split
*/
inline bool Update(const SplitEntryContainer &e) {
if (this->NeedReplace(e.loss_chg, e.SplitIndex())) {
this->loss_chg = e.loss_chg;
this->sindex = e.sindex;
this->split_value = e.split_value;
this->left_sum = e.left_sum;
this->right_sum = e.right_sum;
return true;
} else {
return false;
}
}
/*!
* \brief update the split entry, replace it if e is better
* \param new_loss_chg loss reduction of new candidate
* \param split_index feature index to split on
* \param new_split_value the split point
* \param default_left whether the missing value goes to left
* \return whether the proposed split is better and can replace current split
*/
bool Update(bst_float new_loss_chg, unsigned split_index,
bst_float new_split_value, bool default_left,
const GradientT &left_sum,
const GradientT &right_sum) {
if (this->NeedReplace(new_loss_chg, split_index)) {
this->loss_chg = new_loss_chg;
if (default_left) {
split_index |= (1U << 31);
}
this->sindex = split_index;
this->split_value = new_split_value;
this->left_sum = left_sum;
this->right_sum = right_sum;
return true;
} else {
return false;
}
}
/*! \brief same as update, used by AllReduce*/
inline static void Reduce(SplitEntryContainer &dst, // NOLINT(*)
const SplitEntryContainer &src) { // NOLINT(*)
dst.Update(src);
}
};
template<typename GradType>
using SplitEntry = SplitEntryContainer<GradStats<GradType>>;
} // namespace tree } // namespace tree
} // namespace sycl } // namespace sycl
} // namespace xgboost } // namespace xgboost

View File

@ -16,11 +16,13 @@ namespace xgboost::sycl::tree {
template <typename GradientSumT> template <typename GradientSumT>
class TestHistUpdater : public HistUpdater<GradientSumT> { class TestHistUpdater : public HistUpdater<GradientSumT> {
public: public:
TestHistUpdater(::sycl::queue qu, TestHistUpdater(const Context* ctx,
::sycl::queue qu,
const xgboost::tree::TrainParam& param, const xgboost::tree::TrainParam& param,
std::unique_ptr<TreeUpdater> pruner, std::unique_ptr<TreeUpdater> pruner,
FeatureInteractionConstraintHost int_constraints_, FeatureInteractionConstraintHost int_constraints_,
DMatrix const* fmat) : HistUpdater<GradientSumT>(qu, param, std::move(pruner), DMatrix const* fmat) : HistUpdater<GradientSumT>(ctx, qu, param,
std::move(pruner),
int_constraints_, fmat) {} int_constraints_, fmat) {}
void TestInitSampling(const USMVector<GradientPair, MemoryType::on_device> &gpair, void TestInitSampling(const USMVector<GradientPair, MemoryType::on_device> &gpair,
@ -28,12 +30,11 @@ class TestHistUpdater : public HistUpdater<GradientSumT> {
HistUpdater<GradientSumT>::InitSampling(gpair, row_indices); HistUpdater<GradientSumT>::InitSampling(gpair, row_indices);
} }
auto* TestInitData(Context const * ctx, auto* TestInitData(const common::GHistIndexMatrix& gmat,
const common::GHistIndexMatrix& gmat,
const USMVector<GradientPair, MemoryType::on_device> &gpair, const USMVector<GradientPair, MemoryType::on_device> &gpair,
const DMatrix& fmat, const DMatrix& fmat,
const RegTree& tree) { const RegTree& tree) {
HistUpdater<GradientSumT>::InitData(ctx, gmat, gpair, fmat, tree); HistUpdater<GradientSumT>::InitData(gmat, gpair, fmat, tree);
return &(HistUpdater<GradientSumT>::row_set_collection_); return &(HistUpdater<GradientSumT>::row_set_collection_);
} }
@ -44,6 +45,15 @@ class TestHistUpdater : public HistUpdater<GradientSumT> {
HistUpdater<GradientSumT>::BuildHistogramsLossGuide(entry, gmat, p_tree, gpair); HistUpdater<GradientSumT>::BuildHistogramsLossGuide(entry, gmat, p_tree, gpair);
return &(HistUpdater<GradientSumT>::hist_); return &(HistUpdater<GradientSumT>::hist_);
} }
auto TestInitNewNode(int nid,
const common::GHistIndexMatrix& gmat,
const USMVector<GradientPair, MemoryType::on_device> &gpair,
const DMatrix& fmat,
const RegTree& tree) {
HistUpdater<GradientSumT>::InitNewNode(nid, gmat, gpair, fmat, tree);
return HistUpdater<GradientSumT>::snode_host_[nid];
}
}; };
void GenerateRandomGPairs(::sycl::queue* qu, GradientPair* gpair_ptr, size_t num_rows, bool has_neg_hess) { void GenerateRandomGPairs(::sycl::queue* qu, GradientPair* gpair_ptr, size_t num_rows, bool has_neg_hess) {
@ -79,7 +89,7 @@ void TestHistUpdaterSampling(const xgboost::tree::TrainParam& param) {
FeatureInteractionConstraintHost int_constraints; FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)}; std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(qu, param, std::move(pruner), int_constraints, p_fmat.get()); TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
USMVector<size_t, MemoryType::on_device> row_indices_0(&qu, num_rows); USMVector<size_t, MemoryType::on_device> row_indices_0(&qu, num_rows);
USMVector<size_t, MemoryType::on_device> row_indices_1(&qu, num_rows); USMVector<size_t, MemoryType::on_device> row_indices_1(&qu, num_rows);
@ -135,7 +145,7 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
FeatureInteractionConstraintHost int_constraints; FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)}; std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(qu, param, std::move(pruner), int_constraints, p_fmat.get()); TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows); USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
GenerateRandomGPairs(&qu, gpair.Data(), num_rows, has_neg_hess); GenerateRandomGPairs(&qu, gpair.Data(), num_rows, has_neg_hess);
@ -146,7 +156,7 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
gmat.Init(qu, &ctx, dmat, n_bins); gmat.Init(qu, &ctx, dmat, n_bins);
RegTree tree; RegTree tree;
auto* row_set_collection = updater.TestInitData(&ctx, gmat, gpair, *p_fmat, tree); auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);
auto& row_indices = row_set_collection->Data(); auto& row_indices = row_set_collection->Data();
std::vector<size_t> row_indices_host(row_indices.Size()); std::vector<size_t> row_indices_host(row_indices.Size());
@ -191,7 +201,7 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa
FeatureInteractionConstraintHost int_constraints; FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)}; std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(qu, param, std::move(pruner), int_constraints, p_fmat.get()); TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>()); updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>()); updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
@ -213,7 +223,7 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa
ExpandEntry node1(1, tree.GetDepth(1)); ExpandEntry node1(1, tree.GetDepth(1));
ExpandEntry node2(2, tree.GetDepth(2)); ExpandEntry node2(2, tree.GetDepth(2));
auto* row_set_collection = updater.TestInitData(&ctx, gmat, gpair, *p_fmat, tree); auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);
row_set_collection->AddSplit(0, 1, 2, 42, num_rows - 42); row_set_collection->AddSplit(0, 1, 2, 42, num_rows - 42);
updater.TestBuildHistogramsLossGuide(node0, gmat, &tree, gpair); updater.TestBuildHistogramsLossGuide(node0, gmat, &tree, gpair);
@ -237,6 +247,66 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa
} }
} }
template <typename GradientSumT>
void TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sparsity) {
const size_t num_rows = 1u << 8;
const size_t num_columns = 1;
const size_t n_bins = 32;
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
ObjInfo task{ObjInfo::kRegression};
auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();
FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
auto* gpair_ptr = gpair.Data();
GenerateRandomGPairs(&qu, gpair_ptr, num_rows, false);
DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
common::GHistIndexMatrix gmat;
gmat.Init(qu, &ctx, dmat, n_bins);
RegTree tree;
tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
ExpandEntry node(ExpandEntry::kRootNid, tree.GetDepth(ExpandEntry::kRootNid));
auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);
auto& row_idxs = row_set_collection->Data();
const size_t* row_idxs_ptr = row_idxs.DataConst();
updater.TestBuildHistogramsLossGuide(node, gmat, &tree, gpair);
const auto snode = updater.TestInitNewNode(ExpandEntry::kRootNid, gmat, gpair, *p_fmat, tree);
GradStats<GradientSumT> grad_stat;
{
::sycl::buffer<GradStats<GradientSumT>> buff(&grad_stat, 1);
qu.submit([&](::sycl::handler& cgh) {
auto buff_acc = buff.template get_access<::sycl::access::mode::read_write>(cgh);
cgh.single_task<>([=]() {
for (size_t i = 0; i < num_rows; ++i) {
size_t row_idx = row_idxs_ptr[i];
buff_acc[0] += GradStats<GradientSumT>(gpair_ptr[row_idx].GetGrad(),
gpair_ptr[row_idx].GetHess());
}
});
}).wait_and_throw();
}
EXPECT_NEAR(snode.stats.GetGrad(), grad_stat.GetGrad(), 1e-6 * grad_stat.GetGrad());
EXPECT_NEAR(snode.stats.GetHess(), grad_stat.GetHess(), 1e-6 * grad_stat.GetHess());
}
TEST(SyclHistUpdater, Sampling) { TEST(SyclHistUpdater, Sampling) {
xgboost::tree::TrainParam param; xgboost::tree::TrainParam param;
param.UpdateAllowUnknown(Args{{"subsample", "0.7"}}); param.UpdateAllowUnknown(Args{{"subsample", "0.7"}});
@ -266,4 +336,14 @@ TEST(SyclHistUpdater, BuildHistogramsLossGuide) {
TestHistUpdaterBuildHistogramsLossGuide<double>(param, 0.5); TestHistUpdaterBuildHistogramsLossGuide<double>(param, 0.5);
} }
TEST(SyclHistUpdater, InitNewNode) {
xgboost::tree::TrainParam param;
param.UpdateAllowUnknown(Args{{"max_depth", "3"}});
TestHistUpdaterInitNewNode<float>(param, 0.0);
TestHistUpdaterInitNewNode<float>(param, 0.5);
TestHistUpdaterInitNewNode<double>(param, 0.0);
TestHistUpdaterInitNewNode<double>(param, 0.5);
}
} // namespace xgboost::sycl::tree } // namespace xgboost::sycl::tree