[sycl] add partitioning and related tests (#10080)
Co-authored-by: Dmitry Razdoburdin <>
This commit is contained in:
committed by
GitHub
parent
2c12b956da
commit
7a61216690
@@ -21,6 +21,9 @@
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#include "../data.h"
|
||||
#include "row_set.h"
|
||||
#include "../data/gradient_index.h"
|
||||
#include "../tree/expand_entry.h"
|
||||
|
||||
#include <CL/sycl.hpp>
|
||||
|
||||
@@ -28,6 +31,87 @@ namespace xgboost {
|
||||
namespace sycl {
|
||||
namespace common {
|
||||
|
||||
// split row indexes (rid_span) to 2 parts (both stored in rid_buf) depending
|
||||
// on comparison of indexes values (idx_span) and split point (split_cond)
|
||||
// Handle dense columns
|
||||
template <bool default_left, typename BinIdxType>
|
||||
inline ::sycl::event PartitionDenseKernel(
|
||||
::sycl::queue* qu,
|
||||
const GHistIndexMatrix& gmat,
|
||||
const RowSetCollection::Elem& rid_span,
|
||||
const size_t fid,
|
||||
const int32_t split_cond,
|
||||
xgboost::common::Span<size_t>* rid_buf,
|
||||
size_t* parts_size,
|
||||
::sycl::event event) {
|
||||
const size_t row_stride = gmat.row_stride;
|
||||
const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
|
||||
const size_t* rid = rid_span.begin;
|
||||
const size_t range_size = rid_span.Size();
|
||||
const size_t offset = gmat.cut.Ptrs()[fid];
|
||||
|
||||
size_t* p_rid_buf = rid_buf->data();
|
||||
|
||||
return qu->submit([&](::sycl::handler& cgh) {
|
||||
cgh.depends_on(event);
|
||||
cgh.parallel_for<>(::sycl::range<1>(range_size), [=](::sycl::item<1> nid) {
|
||||
const size_t id = rid[nid.get_id(0)];
|
||||
const int32_t value = static_cast<int32_t>(gradient_index[id * row_stride + fid] + offset);
|
||||
const bool is_left = value <= split_cond;
|
||||
if (is_left) {
|
||||
AtomicRef<size_t> n_left(parts_size[0]);
|
||||
p_rid_buf[n_left.fetch_add(1)] = id;
|
||||
} else {
|
||||
AtomicRef<size_t> n_right(parts_size[1]);
|
||||
p_rid_buf[range_size - n_right.fetch_add(1) - 1] = id;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// split row indexes (rid_span) to 2 parts (both stored in rid_buf) depending
|
||||
// on comparison of indexes values (idx_span) and split point (split_cond)
|
||||
// Handle sparce columns
|
||||
template <bool default_left, typename BinIdxType>
|
||||
inline ::sycl::event PartitionSparseKernel(::sycl::queue* qu,
|
||||
const GHistIndexMatrix& gmat,
|
||||
const RowSetCollection::Elem& rid_span,
|
||||
const size_t fid,
|
||||
const int32_t split_cond,
|
||||
xgboost::common::Span<size_t>* rid_buf,
|
||||
size_t* parts_size,
|
||||
::sycl::event event) {
|
||||
const size_t row_stride = gmat.row_stride;
|
||||
const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
|
||||
const size_t* rid = rid_span.begin;
|
||||
const size_t range_size = rid_span.Size();
|
||||
const uint32_t* cut_ptrs = gmat.cut_device.Ptrs().DataConst();
|
||||
|
||||
size_t* p_rid_buf = rid_buf->data();
|
||||
return qu->submit([&](::sycl::handler& cgh) {
|
||||
cgh.depends_on(event);
|
||||
cgh.parallel_for<>(::sycl::range<1>(range_size), [=](::sycl::item<1> nid) {
|
||||
const size_t id = rid[nid.get_id(0)];
|
||||
|
||||
const BinIdxType* gr_index_local = gradient_index + row_stride * id;
|
||||
const int32_t fid_local = std::lower_bound(gr_index_local,
|
||||
gr_index_local + row_stride,
|
||||
cut_ptrs[fid]) - gr_index_local;
|
||||
const bool is_left = (fid_local >= row_stride ||
|
||||
gr_index_local[fid_local] >= cut_ptrs[fid + 1]) ?
|
||||
default_left :
|
||||
gr_index_local[fid_local] <= split_cond;
|
||||
if (is_left) {
|
||||
AtomicRef<size_t> n_left(parts_size[0]);
|
||||
p_rid_buf[n_left.fetch_add(1)] = id;
|
||||
} else {
|
||||
AtomicRef<size_t> n_right(parts_size[1]);
|
||||
p_rid_buf[range_size - n_right.fetch_add(1) - 1] = id;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// The builder is required for samples partition to left and rights children for set of nodes
|
||||
class PartitionBuilder {
|
||||
public:
|
||||
@@ -53,7 +137,6 @@ class PartitionBuilder {
|
||||
return result_rows_[2 * nid];
|
||||
}
|
||||
|
||||
|
||||
size_t GetNRightElems(int nid) const {
|
||||
return result_rows_[2 * nid + 1];
|
||||
}
|
||||
@@ -72,19 +155,97 @@ class PartitionBuilder {
|
||||
return { data_.Data() + nodes_offsets_[nid], nodes_offsets_[nid + 1] - nodes_offsets_[nid] };
|
||||
}
|
||||
|
||||
template <typename BinIdxType>
|
||||
::sycl::event Partition(const int32_t split_cond,
|
||||
const GHistIndexMatrix& gmat,
|
||||
const RowSetCollection::Elem& rid_span,
|
||||
const xgboost::RegTree::Node& node,
|
||||
xgboost::common::Span<size_t>* rid_buf,
|
||||
size_t* parts_size,
|
||||
::sycl::event event) {
|
||||
const bst_uint fid = node.SplitIndex();
|
||||
const bool default_left = node.DefaultLeft();
|
||||
|
||||
if (gmat.IsDense()) {
|
||||
if (default_left) {
|
||||
return PartitionDenseKernel<true, BinIdxType>(qu_, gmat, rid_span, fid,
|
||||
split_cond, rid_buf, parts_size, event);
|
||||
} else {
|
||||
return PartitionDenseKernel<false, BinIdxType>(qu_, gmat, rid_span, fid,
|
||||
split_cond, rid_buf, parts_size, event);
|
||||
}
|
||||
} else {
|
||||
if (default_left) {
|
||||
return PartitionSparseKernel<true, BinIdxType>(qu_, gmat, rid_span, fid,
|
||||
split_cond, rid_buf, parts_size, event);
|
||||
} else {
|
||||
return PartitionSparseKernel<false, BinIdxType>(qu_, gmat, rid_span, fid,
|
||||
split_cond, rid_buf, parts_size, event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Entry point for Partition
|
||||
void Partition(const GHistIndexMatrix& gmat,
|
||||
const std::vector<tree::ExpandEntry> nodes,
|
||||
const RowSetCollection& row_set_collection,
|
||||
const std::vector<int32_t>& split_conditions,
|
||||
RegTree* p_tree,
|
||||
::sycl::event* general_event) {
|
||||
nodes_events_.resize(n_nodes_);
|
||||
|
||||
parts_size_.ResizeAndFill(qu_, 2 * n_nodes_, 0, general_event);
|
||||
|
||||
for (size_t node_in_set = 0; node_in_set < n_nodes_; node_in_set++) {
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
::sycl::event& node_event = nodes_events_[node_in_set];
|
||||
const auto& rid_span = row_set_collection[nid];
|
||||
if (rid_span.Size() > 0) {
|
||||
const RegTree::Node& node = (*p_tree)[nid];
|
||||
xgboost::common::Span<size_t> rid_buf = GetData(node_in_set);
|
||||
size_t* part_size = parts_size_.Data() + 2 * node_in_set;
|
||||
int32_t split_condition = split_conditions[node_in_set];
|
||||
switch (gmat.index.GetBinTypeSize()) {
|
||||
case common::BinTypeSize::kUint8BinsTypeSize:
|
||||
node_event = Partition<uint8_t>(split_condition, gmat, rid_span, node,
|
||||
&rid_buf, part_size, *general_event);
|
||||
break;
|
||||
case common::BinTypeSize::kUint16BinsTypeSize:
|
||||
node_event = Partition<uint16_t>(split_condition, gmat, rid_span, node,
|
||||
&rid_buf, part_size, *general_event);
|
||||
break;
|
||||
case common::BinTypeSize::kUint32BinsTypeSize:
|
||||
node_event = Partition<uint32_t>(split_condition, gmat, rid_span, node,
|
||||
&rid_buf, part_size, *general_event);
|
||||
break;
|
||||
default:
|
||||
CHECK(false); // no default behavior
|
||||
}
|
||||
} else {
|
||||
node_event = ::sycl::event();
|
||||
}
|
||||
}
|
||||
|
||||
*general_event = qu_->memcpy(result_rows_.data(),
|
||||
parts_size_.DataConst(),
|
||||
sizeof(size_t) * 2 * n_nodes_,
|
||||
nodes_events_);
|
||||
}
|
||||
|
||||
void MergeToArray(size_t nid,
|
||||
size_t* data_result,
|
||||
::sycl::event event) {
|
||||
::sycl::event* event) {
|
||||
size_t n_nodes_total = GetNLeftElems(nid) + GetNRightElems(nid);
|
||||
if (n_nodes_total > 0) {
|
||||
const size_t* data = data_.Data() + nodes_offsets_[nid];
|
||||
qu_->memcpy(data_result, data, sizeof(size_t) * n_nodes_total, event);
|
||||
qu_->memcpy(data_result, data, sizeof(size_t) * n_nodes_total, *event);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<size_t> nodes_offsets_;
|
||||
std::vector<size_t> result_rows_;
|
||||
std::vector<::sycl::event> nodes_events_;
|
||||
size_t n_nodes_;
|
||||
|
||||
USMVector<size_t, MemoryType::on_device> parts_size_;
|
||||
|
||||
@@ -171,20 +171,20 @@ class USMVector {
|
||||
}
|
||||
}
|
||||
|
||||
::sycl::event ResizeAndFill(::sycl::queue* qu, size_t size_new, int v) {
|
||||
void ResizeAndFill(::sycl::queue* qu, size_t size_new, int v, ::sycl::event* event) {
|
||||
if (size_new <= size_) {
|
||||
size_ = size_new;
|
||||
return qu->memset(data_.get(), v, size_new * sizeof(T));
|
||||
*event = qu->memset(data_.get(), v, size_new * sizeof(T), *event);
|
||||
} else if (size_new <= capacity_) {
|
||||
size_ = size_new;
|
||||
return qu->memset(data_.get(), v, size_new * sizeof(T));
|
||||
*event = qu->memset(data_.get(), v, size_new * sizeof(T), *event);
|
||||
} else {
|
||||
size_t size_old = size_;
|
||||
auto data_old = data_;
|
||||
size_ = size_new;
|
||||
capacity_ = size_new;
|
||||
data_ = allocate_memory_(qu, size_);
|
||||
return qu->memset(data_.get(), v, size_new * sizeof(T));
|
||||
*event = qu->memset(data_.get(), v, size_new * sizeof(T), *event);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,11 +211,16 @@ class USMVector {
|
||||
struct DeviceMatrix {
|
||||
DMatrix* p_mat; // Pointer to the original matrix on the host
|
||||
::sycl::queue qu_;
|
||||
USMVector<size_t> row_ptr;
|
||||
USMVector<size_t, MemoryType::on_device> row_ptr;
|
||||
USMVector<Entry, MemoryType::on_device> data;
|
||||
size_t total_offset;
|
||||
|
||||
DeviceMatrix(::sycl::queue qu, DMatrix* dmat) : p_mat(dmat), qu_(qu) {
|
||||
DeviceMatrix() = default;
|
||||
|
||||
void Init(::sycl::queue qu, DMatrix* dmat) {
|
||||
qu_ = qu;
|
||||
p_mat = dmat;
|
||||
|
||||
size_t num_row = 0;
|
||||
size_t num_nonzero = 0;
|
||||
for (auto &batch : dmat->GetBatches<SparsePage>()) {
|
||||
@@ -226,27 +231,41 @@ struct DeviceMatrix {
|
||||
}
|
||||
|
||||
row_ptr.Resize(&qu_, num_row + 1);
|
||||
size_t* rows = row_ptr.Data();
|
||||
data.Resize(&qu_, num_nonzero);
|
||||
|
||||
size_t data_offset = 0;
|
||||
::sycl::event event;
|
||||
for (auto &batch : dmat->GetBatches<SparsePage>()) {
|
||||
const auto& data_vec = batch.data.HostVector();
|
||||
const auto& offset_vec = batch.offset.HostVector();
|
||||
size_t batch_size = batch.Size();
|
||||
if (batch_size > 0) {
|
||||
std::copy(offset_vec.data(), offset_vec.data() + batch_size,
|
||||
row_ptr.Data() + batch.base_rowid);
|
||||
if (batch.base_rowid > 0) {
|
||||
for (size_t i = 0; i < batch_size; i++)
|
||||
row_ptr[i + batch.base_rowid] += batch.base_rowid;
|
||||
const auto base_rowid = batch.base_rowid;
|
||||
event = qu.memcpy(row_ptr.Data() + base_rowid, offset_vec.data(),
|
||||
sizeof(size_t) * batch_size, event);
|
||||
if (base_rowid > 0) {
|
||||
qu.submit([&](::sycl::handler& cgh) {
|
||||
cgh.depends_on(event);
|
||||
cgh.parallel_for<>(::sycl::range<1>(batch_size), [=](::sycl::id<1> pid) {
|
||||
int row_id = pid[0];
|
||||
rows[row_id] += base_rowid;
|
||||
});
|
||||
});
|
||||
}
|
||||
qu.memcpy(data.Data() + data_offset,
|
||||
data_vec.data(),
|
||||
offset_vec[batch_size] * sizeof(Entry)).wait();
|
||||
event = qu.memcpy(data.Data() + data_offset, data_vec.data(),
|
||||
sizeof(Entry) * offset_vec[batch_size], event);
|
||||
data_offset += offset_vec[batch_size];
|
||||
qu.wait();
|
||||
}
|
||||
}
|
||||
row_ptr[num_row] = data_offset;
|
||||
qu.submit([&](::sycl::handler& cgh) {
|
||||
cgh.depends_on(event);
|
||||
cgh.single_task<>([=] {
|
||||
rows[num_row] = data_offset;
|
||||
});
|
||||
});
|
||||
qu.wait();
|
||||
total_offset = data_offset;
|
||||
}
|
||||
|
||||
|
||||
@@ -280,7 +280,8 @@ class Predictor : public xgboost::Predictor {
|
||||
uint32_t tree_end = 0) const override {
|
||||
::sycl::queue qu = device_manager.GetQueue(ctx_->Device());
|
||||
// TODO(razdoburdin): remove temporary workaround after cache fix
|
||||
sycl::DeviceMatrix device_matrix(qu, dmat);
|
||||
sycl::DeviceMatrix device_matrix;
|
||||
device_matrix.Init(qu, dmat);
|
||||
|
||||
auto* out_preds = &predts->predictions;
|
||||
if (tree_end == 0) {
|
||||
|
||||
50
plugin/sycl/tree/expand_entry.h
Normal file
50
plugin/sycl/tree/expand_entry.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*!
|
||||
* Copyright 2017-2024 by Contributors
|
||||
* \file expand_entry.h
|
||||
*/
|
||||
#ifndef PLUGIN_SYCL_TREE_EXPAND_ENTRY_H_
|
||||
#define PLUGIN_SYCL_TREE_EXPAND_ENTRY_H_
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
|
||||
#include "../../src/tree/constraints.h"
|
||||
#pragma GCC diagnostic pop
|
||||
#include "../../src/tree/hist/expand_entry.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace sycl {
|
||||
namespace tree {
|
||||
/* tree growing policies */
|
||||
struct ExpandEntry : public xgboost::tree::ExpandEntryImpl<ExpandEntry> {
|
||||
static constexpr bst_node_t kRootNid = 0;
|
||||
|
||||
xgboost::tree::SplitEntry split;
|
||||
|
||||
ExpandEntry(int nid, int depth) : ExpandEntryImpl{nid, depth} {}
|
||||
|
||||
inline bst_node_t GetSiblingId(const xgboost::RegTree* p_tree) const {
|
||||
CHECK_EQ((*p_tree)[nid].IsRoot(), false);
|
||||
const size_t parent_id = (*p_tree)[nid].Parent();
|
||||
return GetSiblingId(p_tree, parent_id);
|
||||
}
|
||||
|
||||
inline bst_node_t GetSiblingId(const xgboost::RegTree* p_tree, size_t parent_id) const {
|
||||
return p_tree->IsLeftChild(nid) ? p_tree->RightChild(parent_id)
|
||||
: p_tree->LeftChild(parent_id);
|
||||
}
|
||||
|
||||
bool IsValidImpl(xgboost::tree::TrainParam const ¶m, int32_t num_leaves) const {
|
||||
if (split.loss_chg <= kRtEps) return false;
|
||||
if (split.loss_chg < param.min_split_loss) return false;
|
||||
if (param.max_depth > 0 && depth == param.max_depth) return false;
|
||||
if (param.max_leaves > 0 && num_leaves == param.max_leaves) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tree
|
||||
} // namespace sycl
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // PLUGIN_SYCL_TREE_EXPAND_ENTRY_H_
|
||||
Reference in New Issue
Block a user