Partial rewrite EllpackPage (#5352)
This commit is contained in:
@@ -181,13 +181,13 @@ class CompressedIterator {
|
||||
typedef value_type reference; // NOLINT
|
||||
|
||||
private:
|
||||
CompressedByteT *buffer_;
|
||||
const CompressedByteT *buffer_;
|
||||
size_t symbol_bits_;
|
||||
size_t offset_;
|
||||
|
||||
public:
|
||||
CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {}
|
||||
CompressedIterator(CompressedByteT *buffer, size_t num_symbols)
|
||||
CompressedIterator(const CompressedByteT *buffer, size_t num_symbols)
|
||||
: buffer_(buffer), offset_(0) {
|
||||
symbol_bits_ = detail::SymbolBits(num_symbols);
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace common {
|
||||
|
||||
HistogramCuts::HistogramCuts() {
|
||||
monitor_.Init(__FUNCTION__);
|
||||
cut_ptrs_.emplace_back(0);
|
||||
cut_ptrs_.HostVector().emplace_back(0);
|
||||
}
|
||||
|
||||
// Dispatch to specific builder.
|
||||
@@ -52,7 +52,7 @@ void HistogramCuts::Build(DMatrix* dmat, uint32_t const max_num_bins) {
|
||||
DenseCuts cuts(this);
|
||||
cuts.Build(dmat, max_num_bins);
|
||||
}
|
||||
LOG(INFO) << "Total number of hist bins: " << cut_ptrs_.back();
|
||||
LOG(INFO) << "Total number of hist bins: " << cut_ptrs_.HostVector().back();
|
||||
}
|
||||
|
||||
bool CutsBuilder::UseGroup(DMatrix* dmat) {
|
||||
@@ -75,7 +75,10 @@ void SparseCuts::SingleThreadBuild(SparsePage const& page, MetaInfo const& info,
|
||||
|
||||
// Data groups, used in ranking.
|
||||
std::vector<bst_uint> const& group_ptr = info.group_ptr_;
|
||||
p_cuts_->min_vals_.resize(end_col - beg_col, 0);
|
||||
auto &local_min_vals = p_cuts_->min_vals_.HostVector();
|
||||
auto &local_cuts = p_cuts_->cut_values_.HostVector();
|
||||
auto &local_ptrs = p_cuts_->cut_ptrs_.HostVector();
|
||||
local_min_vals.resize(end_col - beg_col, 0);
|
||||
|
||||
for (uint32_t col_id = beg_col; col_id < page.Size() && col_id < end_col; ++col_id) {
|
||||
// Using a local variable makes things easier, but at the cost of memory trashing.
|
||||
@@ -85,7 +88,7 @@ void SparseCuts::SingleThreadBuild(SparsePage const& page, MetaInfo const& info,
|
||||
max_num_bins);
|
||||
if (n_bins == 0) {
|
||||
// cut_ptrs_ is initialized with a zero, so there's always an element at the back
|
||||
p_cuts_->cut_ptrs_.emplace_back(p_cuts_->cut_ptrs_.back());
|
||||
local_ptrs.emplace_back(local_ptrs.back());
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -112,17 +115,17 @@ void SparseCuts::SingleThreadBuild(SparsePage const& page, MetaInfo const& info,
|
||||
// Can be use data[1] as the min values so that we don't need to
|
||||
// store another array?
|
||||
float mval = summary.data[0].value;
|
||||
p_cuts_->min_vals_[col_id - beg_col] = mval - (fabs(mval) + 1e-5);
|
||||
local_min_vals[col_id - beg_col] = mval - (fabs(mval) + 1e-5);
|
||||
|
||||
this->AddCutPoint(summary, max_num_bins);
|
||||
|
||||
bst_float cpt = (summary.size > 0) ?
|
||||
summary.data[summary.size - 1].value :
|
||||
p_cuts_->min_vals_[col_id - beg_col];
|
||||
local_min_vals[col_id - beg_col];
|
||||
cpt += fabs(cpt) + 1e-5;
|
||||
p_cuts_->cut_values_.emplace_back(cpt);
|
||||
local_cuts.emplace_back(cpt);
|
||||
|
||||
p_cuts_->cut_ptrs_.emplace_back(p_cuts_->cut_values_.size());
|
||||
local_ptrs.emplace_back(local_cuts.size());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,33 +199,40 @@ void SparseCuts::Concat(
|
||||
std::vector<std::unique_ptr<SparseCuts>> const& cuts, uint32_t n_cols) {
|
||||
monitor_.Start(__FUNCTION__);
|
||||
uint32_t nthreads = omp_get_max_threads();
|
||||
p_cuts_->min_vals_.resize(n_cols, std::numeric_limits<float>::max());
|
||||
auto &local_min_vals = p_cuts_->min_vals_.HostVector();
|
||||
auto &local_cuts = p_cuts_->cut_values_.HostVector();
|
||||
auto &local_ptrs = p_cuts_->cut_ptrs_.HostVector();
|
||||
local_min_vals.resize(n_cols, std::numeric_limits<float>::max());
|
||||
size_t min_vals_tail = 0;
|
||||
|
||||
for (uint32_t t = 0; t < nthreads; ++t) {
|
||||
auto& thread_min_vals = cuts[t]->p_cuts_->min_vals_.HostVector();
|
||||
auto& thread_cuts = cuts[t]->p_cuts_->cut_values_.HostVector();
|
||||
auto& thread_ptrs = cuts[t]->p_cuts_->cut_ptrs_.HostVector();
|
||||
|
||||
// concat csc pointers.
|
||||
size_t const old_ptr_size = p_cuts_->cut_ptrs_.size();
|
||||
p_cuts_->cut_ptrs_.resize(
|
||||
cuts[t]->p_cuts_->cut_ptrs_.size() + p_cuts_->cut_ptrs_.size() - 1);
|
||||
size_t const new_icp_size = p_cuts_->cut_ptrs_.size();
|
||||
auto tail = p_cuts_->cut_ptrs_[old_ptr_size-1];
|
||||
size_t const old_ptr_size = local_ptrs.size();
|
||||
local_ptrs.resize(
|
||||
thread_ptrs.size() + local_ptrs.size() - 1);
|
||||
size_t const new_icp_size = local_ptrs.size();
|
||||
auto tail = local_ptrs[old_ptr_size-1];
|
||||
for (size_t j = old_ptr_size; j < new_icp_size; ++j) {
|
||||
p_cuts_->cut_ptrs_[j] = tail + cuts[t]->p_cuts_->cut_ptrs_[j-old_ptr_size+1];
|
||||
local_ptrs[j] = tail + thread_ptrs[j-old_ptr_size+1];
|
||||
}
|
||||
// concat csc values
|
||||
size_t const old_iv_size = p_cuts_->cut_values_.size();
|
||||
p_cuts_->cut_values_.resize(
|
||||
cuts[t]->p_cuts_->cut_values_.size() + p_cuts_->cut_values_.size());
|
||||
size_t const new_iv_size = p_cuts_->cut_values_.size();
|
||||
size_t const old_iv_size = local_cuts.size();
|
||||
local_cuts.resize(
|
||||
thread_cuts.size() + local_cuts.size());
|
||||
size_t const new_iv_size = local_cuts.size();
|
||||
for (size_t j = old_iv_size; j < new_iv_size; ++j) {
|
||||
p_cuts_->cut_values_[j] = cuts[t]->p_cuts_->cut_values_[j-old_iv_size];
|
||||
local_cuts[j] = thread_cuts[j-old_iv_size];
|
||||
}
|
||||
// merge min values
|
||||
for (size_t j = 0; j < cuts[t]->p_cuts_->min_vals_.size(); ++j) {
|
||||
p_cuts_->min_vals_.at(min_vals_tail + j) =
|
||||
std::min(p_cuts_->min_vals_.at(min_vals_tail + j), cuts.at(t)->p_cuts_->min_vals_.at(j));
|
||||
for (size_t j = 0; j < thread_min_vals.size(); ++j) {
|
||||
local_min_vals.at(min_vals_tail + j) =
|
||||
std::min(local_min_vals.at(min_vals_tail + j), thread_min_vals.at(j));
|
||||
}
|
||||
min_vals_tail += cuts[t]->p_cuts_->min_vals_.size();
|
||||
min_vals_tail += thread_min_vals.size();
|
||||
}
|
||||
monitor_.Stop(__FUNCTION__);
|
||||
}
|
||||
@@ -323,27 +333,27 @@ void DenseCuts::Init
|
||||
// TODO(chenqin): rabit failure recovery assumes no boostrap onetime call after loadcheckpoint
|
||||
// we need to move this allreduce before loadcheckpoint call in future
|
||||
sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());
|
||||
p_cuts_->min_vals_.resize(sketchs.size());
|
||||
p_cuts_->min_vals_.HostVector().resize(sketchs.size());
|
||||
|
||||
for (size_t fid = 0; fid < summary_array.size(); ++fid) {
|
||||
WQSketch::SummaryContainer a;
|
||||
a.Reserve(max_num_bins + 1);
|
||||
a.SetPrune(summary_array[fid], max_num_bins + 1);
|
||||
const bst_float mval = a.data[0].value;
|
||||
p_cuts_->min_vals_[fid] = mval - (fabs(mval) + 1e-5);
|
||||
p_cuts_->min_vals_.HostVector()[fid] = mval - (fabs(mval) + 1e-5);
|
||||
AddCutPoint(a, max_num_bins);
|
||||
// push a value that is greater than anything
|
||||
const bst_float cpt
|
||||
= (a.size > 0) ? a.data[a.size - 1].value : p_cuts_->min_vals_[fid];
|
||||
= (a.size > 0) ? a.data[a.size - 1].value : p_cuts_->min_vals_.HostVector()[fid];
|
||||
// this must be bigger than last value in a scale
|
||||
const bst_float last = cpt + (fabs(cpt) + 1e-5);
|
||||
p_cuts_->cut_values_.push_back(last);
|
||||
p_cuts_->cut_values_.HostVector().push_back(last);
|
||||
|
||||
// Ensure that every feature gets at least one quantile point
|
||||
CHECK_LE(p_cuts_->cut_values_.size(), std::numeric_limits<uint32_t>::max());
|
||||
auto cut_size = static_cast<uint32_t>(p_cuts_->cut_values_.size());
|
||||
CHECK_GT(cut_size, p_cuts_->cut_ptrs_.back());
|
||||
p_cuts_->cut_ptrs_.push_back(cut_size);
|
||||
CHECK_LE(p_cuts_->cut_values_.HostVector().size(), std::numeric_limits<uint32_t>::max());
|
||||
auto cut_size = static_cast<uint32_t>(p_cuts_->cut_values_.HostVector().size());
|
||||
CHECK_GT(cut_size, p_cuts_->cut_ptrs_.HostVector().back());
|
||||
p_cuts_->cut_ptrs_.HostVector().push_back(cut_size);
|
||||
}
|
||||
monitor_.Stop(__func__);
|
||||
}
|
||||
|
||||
@@ -44,17 +44,35 @@ class HistogramCuts {
|
||||
using BinIdx = uint32_t;
|
||||
common::Monitor monitor_;
|
||||
|
||||
std::vector<bst_float> cut_values_;
|
||||
std::vector<uint32_t> cut_ptrs_;
|
||||
std::vector<float> min_vals_; // storing minimum value in a sketch set.
|
||||
|
||||
public:
|
||||
HostDeviceVector<bst_float> cut_values_;
|
||||
HostDeviceVector<uint32_t> cut_ptrs_;
|
||||
HostDeviceVector<float> min_vals_; // storing minimum value in a sketch set.
|
||||
|
||||
HistogramCuts();
|
||||
HistogramCuts(HistogramCuts const& that) = delete;
|
||||
HistogramCuts(HistogramCuts const& that) {
|
||||
cut_values_.Resize(that.cut_values_.Size());
|
||||
cut_ptrs_.Resize(that.cut_ptrs_.Size());
|
||||
min_vals_.Resize(that.min_vals_.Size());
|
||||
cut_values_.Copy(that.cut_values_);
|
||||
cut_ptrs_.Copy(that.cut_ptrs_);
|
||||
min_vals_.Copy(that.min_vals_);
|
||||
}
|
||||
|
||||
HistogramCuts(HistogramCuts&& that) noexcept(true) {
|
||||
*this = std::forward<HistogramCuts&&>(that);
|
||||
}
|
||||
HistogramCuts& operator=(HistogramCuts const& that) = delete;
|
||||
|
||||
HistogramCuts& operator=(HistogramCuts const& that) {
|
||||
cut_values_.Resize(that.cut_values_.Size());
|
||||
cut_ptrs_.Resize(that.cut_ptrs_.Size());
|
||||
min_vals_.Resize(that.min_vals_.Size());
|
||||
cut_values_.Copy(that.cut_values_);
|
||||
cut_ptrs_.Copy(that.cut_ptrs_);
|
||||
min_vals_.Copy(that.min_vals_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
HistogramCuts& operator=(HistogramCuts&& that) noexcept(true) {
|
||||
monitor_ = std::move(that.monitor_);
|
||||
cut_ptrs_ = std::move(that.cut_ptrs_);
|
||||
@@ -67,28 +85,30 @@ class HistogramCuts {
|
||||
void Build(DMatrix* dmat, uint32_t const max_num_bins);
|
||||
/* \brief How many bins a feature has. */
|
||||
uint32_t FeatureBins(uint32_t feature) const {
|
||||
return cut_ptrs_.at(feature+1) - cut_ptrs_[feature];
|
||||
return cut_ptrs_.ConstHostVector().at(feature + 1) -
|
||||
cut_ptrs_.ConstHostVector()[feature];
|
||||
}
|
||||
|
||||
// Getters. Cuts should be of no use after building histogram indices, but currently
|
||||
// it's deeply linked with quantile_hist, gpu sketcher and gpu_hist. So we preserve
|
||||
// these for now.
|
||||
std::vector<uint32_t> const& Ptrs() const { return cut_ptrs_; }
|
||||
std::vector<float> const& Values() const { return cut_values_; }
|
||||
std::vector<float> const& MinValues() const { return min_vals_; }
|
||||
std::vector<uint32_t> const& Ptrs() const { return cut_ptrs_.ConstHostVector(); }
|
||||
std::vector<float> const& Values() const { return cut_values_.ConstHostVector(); }
|
||||
std::vector<float> const& MinValues() const { return min_vals_.ConstHostVector(); }
|
||||
|
||||
size_t TotalBins() const { return cut_ptrs_.back(); }
|
||||
size_t TotalBins() const { return cut_ptrs_.ConstHostVector().back(); }
|
||||
|
||||
// Return the index of a cut point that is strictly greater than the input
|
||||
// value, or the last available index if none exists
|
||||
BinIdx SearchBin(float value, uint32_t column_id) const {
|
||||
auto beg = cut_ptrs_.at(column_id);
|
||||
auto end = cut_ptrs_.at(column_id + 1);
|
||||
auto it = std::upper_bound(cut_values_.cbegin() + beg, cut_values_.cbegin() + end, value);
|
||||
if (it == cut_values_.cend()) {
|
||||
it = cut_values_.cend() - 1;
|
||||
auto beg = cut_ptrs_.ConstHostVector().at(column_id);
|
||||
auto end = cut_ptrs_.ConstHostVector().at(column_id + 1);
|
||||
const auto &values = cut_values_.ConstHostVector();
|
||||
auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
|
||||
if (it == values.cend()) {
|
||||
it = values.cend() - 1;
|
||||
}
|
||||
BinIdx idx = it - cut_values_.cbegin();
|
||||
BinIdx idx = it - values.cbegin();
|
||||
return idx;
|
||||
}
|
||||
|
||||
@@ -133,8 +153,8 @@ class CutsBuilder {
|
||||
size_t required_cuts = std::min(summary.size, static_cast<size_t>(max_bin));
|
||||
for (size_t i = 1; i < required_cuts; ++i) {
|
||||
bst_float cpt = summary.data[i].value;
|
||||
if (i == 1 || cpt > p_cuts_->cut_values_.back()) {
|
||||
p_cuts_->cut_values_.push_back(cpt);
|
||||
if (i == 1 || cpt > p_cuts_->cut_values_.ConstHostVector().back()) {
|
||||
p_cuts_->cut_values_.HostVector().push_back(cpt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -371,6 +371,7 @@ void HostDeviceVector<T>::Resize(size_t new_size, T v) {
|
||||
template class HostDeviceVector<bst_float>;
|
||||
template class HostDeviceVector<GradientPair>;
|
||||
template class HostDeviceVector<int32_t>; // bst_node_t
|
||||
template class HostDeviceVector<uint8_t>;
|
||||
template class HostDeviceVector<Entry>;
|
||||
template class HostDeviceVector<uint64_t>; // bst_row_t
|
||||
template class HostDeviceVector<uint32_t>; // bst_feature_t
|
||||
|
||||
Reference in New Issue
Block a user