[sycl] add loss guided hist building (#10251)
Co-authored-by: Dmitry Razdoburdin <>
This commit is contained in:
committed by
GitHub
parent
9b465052ce
commit
f588252481
46
plugin/sycl/tree/hist_row_adder.h
Normal file
46
plugin/sycl/tree/hist_row_adder.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*!
|
||||
* Copyright 2017-2024 by Contributors
|
||||
* \file hist_row_adder.h
|
||||
*/
|
||||
#ifndef PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_
|
||||
#define PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
namespace xgboost {
|
||||
namespace sycl {
|
||||
namespace tree {
|
||||
|
||||
template <typename GradientSumT>
|
||||
class HistRowsAdder {
|
||||
public:
|
||||
virtual void AddHistRows(HistUpdater<GradientSumT>* builder,
|
||||
std::vector<int>* sync_ids, RegTree *p_tree) = 0;
|
||||
virtual ~HistRowsAdder() = default;
|
||||
};
|
||||
|
||||
template <typename GradientSumT>
|
||||
class BatchHistRowsAdder: public HistRowsAdder<GradientSumT> {
|
||||
public:
|
||||
void AddHistRows(HistUpdater<GradientSumT>* builder,
|
||||
std::vector<int>* sync_ids, RegTree *p_tree) override {
|
||||
builder->builder_monitor_.Start("AddHistRows");
|
||||
|
||||
for (auto const& entry : builder->nodes_for_explicit_hist_build_) {
|
||||
int nid = entry.nid;
|
||||
auto event = builder->hist_.AddHistRow(nid);
|
||||
}
|
||||
for (auto const& node : builder->nodes_for_subtraction_trick_) {
|
||||
auto event = builder->hist_.AddHistRow(node.nid);
|
||||
}
|
||||
|
||||
builder->builder_monitor_.Stop("AddHistRows");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tree
|
||||
} // namespace sycl
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_
|
||||
68
plugin/sycl/tree/hist_synchronizer.h
Normal file
68
plugin/sycl/tree/hist_synchronizer.h
Normal file
@@ -0,0 +1,68 @@
|
||||
/*!
|
||||
* Copyright 2017-2024 by Contributors
|
||||
* \file hist_synchronizer.h
|
||||
*/
|
||||
#ifndef PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
|
||||
#define PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../common/hist_util.h"
|
||||
#include "expand_entry.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace sycl {
|
||||
namespace tree {
|
||||
|
||||
template <typename GradientSumT>
|
||||
class HistUpdater;
|
||||
|
||||
template <typename GradientSumT>
|
||||
class HistSynchronizer {
|
||||
public:
|
||||
virtual void SyncHistograms(HistUpdater<GradientSumT>* builder,
|
||||
const std::vector<int>& sync_ids,
|
||||
RegTree *p_tree) = 0;
|
||||
virtual ~HistSynchronizer() = default;
|
||||
};
|
||||
|
||||
template <typename GradientSumT>
|
||||
class BatchHistSynchronizer: public HistSynchronizer<GradientSumT> {
|
||||
public:
|
||||
void SyncHistograms(HistUpdater<GradientSumT>* builder,
|
||||
const std::vector<int>& sync_ids,
|
||||
RegTree *p_tree) override {
|
||||
builder->builder_monitor_.Start("SyncHistograms");
|
||||
const size_t nbins = builder->hist_builder_.GetNumBins();
|
||||
|
||||
hist_sync_events_.resize(builder->nodes_for_explicit_hist_build_.size());
|
||||
for (int i = 0; i < builder->nodes_for_explicit_hist_build_.size(); i++) {
|
||||
const auto entry = builder->nodes_for_explicit_hist_build_[i];
|
||||
auto& this_hist = builder->hist_[entry.nid];
|
||||
|
||||
if (!(*p_tree)[entry.nid].IsRoot()) {
|
||||
const size_t parent_id = (*p_tree)[entry.nid].Parent();
|
||||
auto& parent_hist = builder->hist_[parent_id];
|
||||
auto& sibling_hist = builder->hist_[entry.GetSiblingId(p_tree, parent_id)];
|
||||
hist_sync_events_[i] = common::SubtractionHist(builder->qu_, &sibling_hist, parent_hist,
|
||||
this_hist, nbins, ::sycl::event());
|
||||
}
|
||||
}
|
||||
builder->qu_.wait_and_throw();
|
||||
|
||||
builder->builder_monitor_.Stop("SyncHistograms");
|
||||
}
|
||||
|
||||
std::vector<::sycl::event> GetEvents() const {
|
||||
return hist_sync_events_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<::sycl::event> hist_sync_events_;
|
||||
};
|
||||
|
||||
} // namespace tree
|
||||
} // namespace sycl
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
|
||||
@@ -7,10 +7,69 @@
|
||||
|
||||
#include <oneapi/dpl/random>
|
||||
|
||||
#include "../common/hist_util.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace sycl {
|
||||
namespace tree {
|
||||
|
||||
template <typename GradientSumT>
|
||||
void HistUpdater<GradientSumT>::SetHistSynchronizer(
|
||||
HistSynchronizer<GradientSumT> *sync) {
|
||||
hist_synchronizer_.reset(sync);
|
||||
}
|
||||
|
||||
template <typename GradientSumT>
|
||||
void HistUpdater<GradientSumT>::SetHistRowsAdder(
|
||||
HistRowsAdder<GradientSumT> *adder) {
|
||||
hist_rows_adder_.reset(adder);
|
||||
}
|
||||
|
||||
template <typename GradientSumT>
|
||||
void HistUpdater<GradientSumT>::BuildHistogramsLossGuide(
|
||||
ExpandEntry entry,
|
||||
const common::GHistIndexMatrix &gmat,
|
||||
RegTree *p_tree,
|
||||
const USMVector<GradientPair, MemoryType::on_device> &gpair_device) {
|
||||
nodes_for_explicit_hist_build_.clear();
|
||||
nodes_for_subtraction_trick_.clear();
|
||||
nodes_for_explicit_hist_build_.push_back(entry);
|
||||
|
||||
if (!(*p_tree)[entry.nid].IsRoot()) {
|
||||
auto sibling_id = entry.GetSiblingId(p_tree);
|
||||
nodes_for_subtraction_trick_.emplace_back(sibling_id, p_tree->GetDepth(sibling_id));
|
||||
}
|
||||
|
||||
std::vector<int> sync_ids;
|
||||
hist_rows_adder_->AddHistRows(this, &sync_ids, p_tree);
|
||||
qu_.wait_and_throw();
|
||||
BuildLocalHistograms(gmat, p_tree, gpair_device);
|
||||
hist_synchronizer_->SyncHistograms(this, sync_ids, p_tree);
|
||||
}
|
||||
|
||||
template<typename GradientSumT>
|
||||
void HistUpdater<GradientSumT>::BuildLocalHistograms(
|
||||
const common::GHistIndexMatrix &gmat,
|
||||
RegTree *p_tree,
|
||||
const USMVector<GradientPair, MemoryType::on_device> &gpair_device) {
|
||||
builder_monitor_.Start("BuildLocalHistograms");
|
||||
const size_t n_nodes = nodes_for_explicit_hist_build_.size();
|
||||
::sycl::event event;
|
||||
|
||||
for (size_t i = 0; i < n_nodes; i++) {
|
||||
const int32_t nid = nodes_for_explicit_hist_build_[i].nid;
|
||||
|
||||
if (row_set_collection_[nid].Size() > 0) {
|
||||
event = BuildHist(gpair_device, row_set_collection_[nid], gmat, &(hist_[nid]),
|
||||
&(hist_buffer_.GetDeviceBuffer()), event);
|
||||
} else {
|
||||
common::InitHist(qu_, &(hist_[nid]), hist_[nid].Size(), &event);
|
||||
}
|
||||
}
|
||||
qu_.wait_and_throw();
|
||||
builder_monitor_.Stop("BuildLocalHistograms");
|
||||
}
|
||||
|
||||
template<typename GradientSumT>
|
||||
void HistUpdater<GradientSumT>::InitSampling(
|
||||
const USMVector<GradientPair, MemoryType::on_device> &gpair,
|
||||
@@ -70,6 +129,21 @@ void HistUpdater<GradientSumT>::InitData(
|
||||
// initialize the row set
|
||||
{
|
||||
row_set_collection_.Clear();
|
||||
|
||||
// initialize histogram collection
|
||||
uint32_t nbins = gmat.cut.Ptrs().back();
|
||||
hist_.Init(qu_, nbins);
|
||||
|
||||
hist_buffer_.Init(qu_, nbins);
|
||||
size_t buffer_size = kBufferSize;
|
||||
if (buffer_size > info.num_row_ / kMinBlockSize + 1) {
|
||||
buffer_size = info.num_row_ / kMinBlockSize + 1;
|
||||
}
|
||||
hist_buffer_.Reset(buffer_size);
|
||||
|
||||
// initialize histogram builder
|
||||
hist_builder_ = common::GHistBuilder<GradientSumT>(qu_, nbins);
|
||||
|
||||
USMVector<size_t, MemoryType::on_device>* row_indices = &(row_set_collection_.Data());
|
||||
row_indices->Resize(&qu_, info.num_row_);
|
||||
size_t* p_row_indices = row_indices->Data();
|
||||
@@ -122,6 +196,25 @@ void HistUpdater<GradientSumT>::InitData(
|
||||
}
|
||||
}
|
||||
row_set_collection_.Init();
|
||||
|
||||
{
|
||||
/* determine layout of data */
|
||||
const size_t nrow = info.num_row_;
|
||||
const size_t ncol = info.num_col_;
|
||||
const size_t nnz = info.num_nonzero_;
|
||||
// number of discrete bins for feature 0
|
||||
const uint32_t nbins_f0 = gmat.cut.Ptrs()[1] - gmat.cut.Ptrs()[0];
|
||||
if (nrow * ncol == nnz) {
|
||||
// dense data with zero-based indexing
|
||||
data_layout_ = kDenseDataZeroBased;
|
||||
} else if (nbins_f0 == 0 && nrow * (ncol - 1) == nnz) {
|
||||
// dense data with one-based indexing
|
||||
data_layout_ = kDenseDataOneBased;
|
||||
} else {
|
||||
// sparse data
|
||||
data_layout_ = kSparseData;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template class HistUpdater<float>;
|
||||
|
||||
@@ -12,10 +12,13 @@
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "../common/partition_builder.h"
|
||||
#include "split_evaluator.h"
|
||||
#include "hist_synchronizer.h"
|
||||
#include "hist_row_adder.h"
|
||||
|
||||
#include "../data.h"
|
||||
|
||||
@@ -26,6 +29,10 @@ namespace tree {
|
||||
template<typename GradientSumT>
|
||||
class HistUpdater {
|
||||
public:
|
||||
template <MemoryType memory_type = MemoryType::shared>
|
||||
using GHistRowT = common::GHistRow<GradientSumT, memory_type>;
|
||||
using GradientPairT = xgboost::detail::GradientPairInternal<GradientSumT>;
|
||||
|
||||
explicit HistUpdater(::sycl::queue qu,
|
||||
const xgboost::tree::TrainParam& param,
|
||||
std::unique_ptr<TreeUpdater> pruner,
|
||||
@@ -43,7 +50,13 @@ class HistUpdater {
|
||||
sub_group_size_ = sub_group_sizes.back();
|
||||
}
|
||||
|
||||
void SetHistSynchronizer(HistSynchronizer<GradientSumT>* sync);
|
||||
void SetHistRowsAdder(HistRowsAdder<GradientSumT>* adder);
|
||||
|
||||
protected:
|
||||
friend class BatchHistSynchronizer<GradientSumT>;
|
||||
friend class BatchHistRowsAdder<GradientSumT>;
|
||||
|
||||
void InitSampling(const USMVector<GradientPair, MemoryType::on_device> &gpair,
|
||||
USMVector<size_t, MemoryType::on_device>* row_indices);
|
||||
|
||||
@@ -54,6 +67,27 @@ class HistUpdater {
|
||||
const DMatrix& fmat,
|
||||
const RegTree& tree);
|
||||
|
||||
inline ::sycl::event BuildHist(
|
||||
const USMVector<GradientPair, MemoryType::on_device>& gpair_device,
|
||||
const common::RowSetCollection::Elem row_indices,
|
||||
const common::GHistIndexMatrix& gmat,
|
||||
GHistRowT<MemoryType::on_device>* hist,
|
||||
GHistRowT<MemoryType::on_device>* hist_buffer,
|
||||
::sycl::event event_priv) {
|
||||
return hist_builder_.BuildHist(gpair_device, row_indices, gmat, hist,
|
||||
data_layout_ != kSparseData, hist_buffer, event_priv);
|
||||
}
|
||||
|
||||
void BuildLocalHistograms(const common::GHistIndexMatrix &gmat,
|
||||
RegTree *p_tree,
|
||||
const USMVector<GradientPair, MemoryType::on_device> &gpair);
|
||||
|
||||
void BuildHistogramsLossGuide(
|
||||
ExpandEntry entry,
|
||||
const common::GHistIndexMatrix &gmat,
|
||||
RegTree *p_tree,
|
||||
const USMVector<GradientPair, MemoryType::on_device> &gpair);
|
||||
|
||||
// --data fields--
|
||||
size_t sub_group_size_;
|
||||
|
||||
@@ -69,11 +103,30 @@ class HistUpdater {
|
||||
const RegTree* p_last_tree_;
|
||||
DMatrix const* const p_last_fmat_;
|
||||
|
||||
enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData };
|
||||
DataLayout data_layout_;
|
||||
|
||||
constexpr static size_t kBufferSize = 2048;
|
||||
constexpr static size_t kMinBlockSize = 128;
|
||||
common::GHistBuilder<GradientSumT> hist_builder_;
|
||||
common::ParallelGHistBuilder<GradientSumT> hist_buffer_;
|
||||
/*! \brief culmulative histogram of gradients. */
|
||||
common::HistCollection<GradientSumT, MemoryType::on_device> hist_;
|
||||
|
||||
xgboost::common::Monitor builder_monitor_;
|
||||
xgboost::common::Monitor kernel_monitor_;
|
||||
|
||||
uint64_t seed_ = 0;
|
||||
|
||||
// key is the node id which should be calculated by Subtraction Trick, value is the node which
|
||||
// provides the evidence for substracts
|
||||
std::vector<ExpandEntry> nodes_for_subtraction_trick_;
|
||||
// list of nodes whose histograms would be built explicitly.
|
||||
std::vector<ExpandEntry> nodes_for_explicit_hist_build_;
|
||||
|
||||
std::unique_ptr<HistSynchronizer<GradientSumT>> hist_synchronizer_;
|
||||
std::unique_ptr<HistRowsAdder<GradientSumT>> hist_rows_adder_;
|
||||
|
||||
::sycl::queue qu_;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user