Optimized ApplySplit, BuildHist and UpdatePredictCache functions on CPU (#5244)

* Split up sparse and dense build hist kernels.
* Add `PartitionBuilder`.
This commit is contained in:
Egor Smirnov 2020-02-29 11:11:42 +03:00 committed by GitHub
parent b81f8cbbc0
commit 1b97eaf7a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 694 additions and 387 deletions

View File

@ -37,6 +37,7 @@ class Column {
size_t Size() const { return len_; }
uint32_t GetGlobalBinIdx(size_t idx) const { return index_base_ + index_[idx]; }
uint32_t GetFeatureBinIdx(size_t idx) const { return index_[idx]; }
common::Span<const uint32_t> GetFeatureBinIdxPtr() const { return { index_, len_ }; }
// column.GetFeatureBinIdx(idx) + column.GetBaseIdx(idx) ==
// column.GetGlobalBinIdx(idx)
uint32_t GetBaseIdx() const { return index_base_; }
@ -186,8 +187,8 @@ class ColumnMatrix {
std::vector<size_t> feature_counts_;
std::vector<ColumnType> type_;
SimpleArray<uint32_t> index_; // index_: may store smaller integers; needs padding
SimpleArray<size_t> row_ind_;
std::vector<uint32_t> index_; // index_: may store smaller integers; needs padding
std::vector<size_t> row_ind_;
std::vector<ColumnBoundary> boundary_;
// index_base_[fid]: least bin id for feature fid

View File

@ -672,7 +672,7 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
}
/*!
* \brief fill a histogram by zeroes
* \brief fill a histogram by zeros in range [begin, end)
*/
void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end) {
memset(hist.data() + begin, '\0', (end-begin)*sizeof(tree::GradStats));
@ -719,36 +719,59 @@ void SubtractionHist(GHistRow dst, const GHistRow src1, const GHistRow src2,
}
}
struct Prefetch {
public:
static constexpr size_t kCacheLineSize = 64;
static constexpr size_t kPrefetchOffset = 10;
static constexpr size_t kPrefetchStep =
kCacheLineSize / sizeof(decltype(GHistIndexMatrix::index)::value_type);
void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
private:
static constexpr size_t kNoPrefetchSize =
kPrefetchOffset + kCacheLineSize /
sizeof(decltype(GHistIndexMatrix::row_ptr)::value_type);
public:
static size_t NoPrefetchSize(size_t rows) {
return std::min(rows, kNoPrefetchSize);
}
};
constexpr size_t Prefetch::kNoPrefetchSize;
template<typename FPType, bool do_prefetch>
void BuildHistDenseKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
const size_t n_features,
GHistRow hist) {
const size_t size = row_indices.Size();
const size_t* rid = row_indices.begin;
const size_t nrows = row_indices.Size();
const uint32_t* index = gmat.index.data();
const size_t* row_ptr = gmat.row_ptr.data();
const float* pgh = reinterpret_cast<const float*>(gpair.data());
const uint32_t* gradient_index = gmat.index.data();
FPType* hist_data = reinterpret_cast<FPType*>(hist.data());
double* hist_data = reinterpret_cast<double*>(hist.data());
const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian.
// So we need to multiply each row-index/bin-index by 2
// to work with gradient pairs as a singe row FP array
const size_t cache_line_size = 64;
const size_t prefetch_offset = 10;
size_t no_prefetch_size = prefetch_offset + cache_line_size/sizeof(*rid);
no_prefetch_size = no_prefetch_size > nrows ? nrows : no_prefetch_size;
for (size_t i = 0; i < size; ++i) {
const size_t icol_start = rid[i] * n_features;
const size_t idx_gh = two * rid[i];
for (size_t i = 0; i < nrows; ++i) {
const size_t icol_start = row_ptr[rid[i]];
const size_t icol_end = row_ptr[rid[i]+1];
if (do_prefetch) {
const size_t icol_start_prefetch = rid[i + Prefetch::kPrefetchOffset] * n_features;
if (i < nrows - no_prefetch_size) {
PREFETCH_READ_T0(row_ptr + rid[i + prefetch_offset]);
PREFETCH_READ_T0(pgh + 2*rid[i + prefetch_offset]);
PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
for (size_t j = icol_start_prefetch; j < icol_start_prefetch + n_features;
j += Prefetch::kPrefetchStep) {
PREFETCH_READ_T0(gradient_index + j);
}
}
for (size_t j = icol_start; j < icol_end; ++j) {
const uint32_t idx_bin = 2*index[j];
const size_t idx_gh = 2*rid[i];
for (size_t j = icol_start; j < icol_start + n_features; ++j) {
const uint32_t idx_bin = two * gradient_index[j];
hist_data[idx_bin] += pgh[idx_gh];
hist_data[idx_bin+1] += pgh[idx_gh+1];
@ -756,6 +779,84 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
}
}
template<typename FPType, bool do_prefetch>
void BuildHistSparseKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow hist) {
const size_t size = row_indices.Size();
const size_t* rid = row_indices.begin;
const float* pgh = reinterpret_cast<const float*>(gpair.data());
const uint32_t* gradient_index = gmat.index.data();
const size_t* row_ptr = gmat.row_ptr.data();
FPType* hist_data = reinterpret_cast<FPType*>(hist.data());
const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian.
// So we need to multiply each row-index/bin-index by 2
// to work with gradient pairs as a singe row FP array
for (size_t i = 0; i < size; ++i) {
const size_t icol_start = row_ptr[rid[i]];
const size_t icol_end = row_ptr[rid[i]+1];
const size_t idx_gh = two * rid[i];
if (do_prefetch) {
const size_t icol_start_prftch = row_ptr[rid[i+Prefetch::kPrefetchOffset]];
const size_t icol_end_prefect = row_ptr[rid[i+Prefetch::kPrefetchOffset]+1];
PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
for (size_t j = icol_start_prftch; j < icol_end_prefect; j+=Prefetch::kPrefetchStep) {
PREFETCH_READ_T0(gradient_index + j);
}
}
for (size_t j = icol_start; j < icol_end; ++j) {
const uint32_t idx_bin = two * gradient_index[j];
hist_data[idx_bin] += pgh[idx_gh];
hist_data[idx_bin+1] += pgh[idx_gh+1];
}
}
}
template<typename FPType, bool do_prefetch>
void BuildHistKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const bool isDense, GHistRow hist) {
if (row_indices.Size() && isDense) {
const size_t* row_ptr = gmat.row_ptr.data();
const size_t n_features = row_ptr[row_indices.begin[0]+1] - row_ptr[row_indices.begin[0]];
BuildHistDenseKernel<FPType, do_prefetch>(gpair, row_indices, gmat, n_features, hist);
} else {
BuildHistSparseKernel<FPType, do_prefetch>(gpair, row_indices, gmat, hist);
}
}
void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow hist,
bool isDense) {
using FPType = decltype(tree::GradStats::sum_grad);
const size_t nrows = row_indices.Size();
const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);
// if need to work with all rows from bin-matrix (e.g. root node)
const bool contiguousBlock = (row_indices.begin[nrows - 1] - row_indices.begin[0]) == (nrows - 1);
if (contiguousBlock) {
// contiguous memory access, built-in HW prefetching is enough
BuildHistKernel<FPType, false>(gpair, row_indices, gmat, isDense, hist);
} else {
const RowSetCollection::Elem span1(row_indices.begin, row_indices.end - no_prefetch_size);
const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size, row_indices.end);
BuildHistKernel<FPType, true>(gpair, span1, gmat, isDense, hist);
// no prefetching to avoid loading extra memory
BuildHistKernel<FPType, false>(gpair, span2, gmat, isDense, hist);
}
}
void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexBlockMatrix& gmatb,

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2017 by Contributors
* Copyright 2017-2020 by Contributors
* \file hist_util.h
* \brief Utility for fast histogram aggregation
* \author Philip Cho, Tianqi Chen
@ -25,75 +25,6 @@
namespace xgboost {
namespace common {
/*
* \brief A thin wrapper around dynamically allocated C-style array.
* Make sure to call resize() before use.
*/
template<typename T>
struct SimpleArray {
~SimpleArray() {
std::free(ptr_);
ptr_ = nullptr;
}
void resize(size_t n) {
T* ptr = static_cast<T*>(std::malloc(n * sizeof(T)));
CHECK(ptr) << "Failed to allocate memory";
if (ptr_) {
std::memcpy(ptr, ptr_, n_ * sizeof(T));
std::free(ptr_);
}
ptr_ = ptr;
n_ = n;
}
T& operator[](size_t idx) {
return ptr_[idx];
}
T& operator[](size_t idx) const {
return ptr_[idx];
}
size_t size() const {
return n_;
}
T back() const {
return ptr_[n_-1];
}
T* data() {
return ptr_;
}
const T* data() const {
return ptr_;
}
T* begin() {
return ptr_;
}
const T* begin() const {
return ptr_;
}
T* end() {
return ptr_ + n_;
}
const T* end() const {
return ptr_ + n_;
}
private:
T* ptr_ = nullptr;
size_t n_ = 0;
};
/*!
* \brief A single row in global histogram index.
* Directly represent the global index in the histogram entry.
@ -161,7 +92,7 @@ class HistogramCuts {
return idx;
}
BinIdx SearchBin(Entry const& e) {
BinIdx SearchBin(Entry const& e) const {
return SearchBin(e.fvalue, e.index);
}
};
@ -261,8 +192,9 @@ size_t DeviceSketch(int device,
/*!
* \brief preprocessed global index matrix, in CSR format
* Transform floating values to integer index in histogram
* This is a global histogram index.
*
* Transform floating values to integer index in histogram This is a global histogram
* index for CPU histogram. On GPU ellpack page is used.
*/
struct GHistIndexMatrix {
/*! \brief row pointer to rows by element position */
@ -606,17 +538,15 @@ class ParallelGHistBuilder {
*/
class GHistBuilder {
public:
// initialize builder
inline void Init(size_t nthread, uint32_t nbins) {
nthread_ = nthread;
nbins_ = nbins;
}
GHistBuilder() : nthread_{0}, nbins_{0} {}
GHistBuilder(size_t nthread, uint32_t nbins) : nthread_{nthread}, nbins_{nbins} {}
// construct a histogram via histogram aggregation
void BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow hist);
GHistRow hist,
bool isDense);
// same, with feature grouping
void BuildBlockHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
@ -625,7 +555,7 @@ class GHistBuilder {
// construct a histogram via subtraction trick
void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent);
uint32_t GetNumBins() {
uint32_t GetNumBins() const {
return nbins_;
}

View File

@ -10,6 +10,7 @@
#include <xgboost/data.h>
#include <algorithm>
#include <vector>
#include <utility>
namespace xgboost {
namespace common {
@ -29,7 +30,7 @@ class RowSetCollection {
= default;
Elem(const size_t* begin,
const size_t* end,
int node_id)
int node_id = -1)
: begin(begin), end(end), node_id(node_id) {}
inline size_t Size() const {
@ -57,6 +58,13 @@ class RowSetCollection {
<< "access element that is not in the set";
return e;
}
/*! \brief return corresponding element set given the node_id */
inline Elem& operator[](unsigned node_id) {
Elem& e = elem_of_each_node_[node_id];
return e;
}
// clear up things
inline void Clear() {
elem_of_each_node_.clear();
@ -83,25 +91,18 @@ class RowSetCollection {
}
// split rowset into two
inline void AddSplit(unsigned node_id,
const std::vector<Split>& row_split_tloc,
unsigned left_node_id,
unsigned right_node_id) {
unsigned right_node_id,
size_t n_left,
size_t n_right) {
const Elem e = elem_of_each_node_[node_id];
const auto nthread = static_cast<bst_omp_uint>(row_split_tloc.size());
CHECK(e.begin != nullptr);
size_t* all_begin = dmlc::BeginPtr(row_indices_);
size_t* begin = all_begin + (e.begin - all_begin);
size_t* it = begin;
for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
std::copy(row_split_tloc[tid].left.begin(), row_split_tloc[tid].left.end(), it);
it += row_split_tloc[tid].left.size();
}
size_t* split_pt = it;
for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
std::copy(row_split_tloc[tid].right.begin(), row_split_tloc[tid].right.end(), it);
it += row_split_tloc[tid].right.size();
}
CHECK_EQ(n_left + n_right, e.Size());
CHECK_LE(begin + n_left, e.end);
CHECK_EQ(begin + n_left + n_right, e.end);
if (left_node_id >= elem_of_each_node_.size()) {
elem_of_each_node_.resize(left_node_id + 1, Elem(nullptr, nullptr, -1));
@ -110,12 +111,12 @@ class RowSetCollection {
elem_of_each_node_.resize(right_node_id + 1, Elem(nullptr, nullptr, -1));
}
elem_of_each_node_[left_node_id] = Elem(begin, split_pt, left_node_id);
elem_of_each_node_[right_node_id] = Elem(split_pt, e.end, right_node_id);
elem_of_each_node_[left_node_id] = Elem(begin, begin + n_left, left_node_id);
elem_of_each_node_[right_node_id] = Elem(begin + n_left, e.end, right_node_id);
elem_of_each_node_[node_id] = Elem(nullptr, nullptr, -1);
}
// stores the row indices in the set
// stores the row indexes in the set
std::vector<size_t> row_indices_;
private:
@ -123,6 +124,121 @@ class RowSetCollection {
std::vector<Elem> elem_of_each_node_;
};
// The builder is required for samples partition to left and rights children for set of nodes
// Responsible for:
// 1) Effective memory allocation for intermediate results for multi-thread work
// 2) Merging partial results produced by threads into original row set (row_set_collection_)
// BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature
template<size_t BlockSize>
class PartitionBuilder {
public:
template<typename Func>
void Init(const size_t n_tasks, size_t n_nodes, Func funcNTaks) {
left_right_nodes_sizes_.resize(n_nodes);
blocks_offsets_.resize(n_nodes+1);
blocks_offsets_[0] = 0;
for (size_t i = 1; i < n_nodes+1; ++i) {
blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTaks(i-1);
}
if (n_tasks > max_n_tasks_) {
mem_blocks_.resize(n_tasks);
max_n_tasks_ = n_tasks;
}
}
common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx).left(), end - begin };
}
common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx).right(), end - begin };
}
void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx).n_left = n_left;
}
void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx).n_right = n_right;
}
size_t GetNLeftElems(int nid) const {
return left_right_nodes_sizes_[nid].first;
}
size_t GetNRightElems(int nid) const {
return left_right_nodes_sizes_[nid].second;
}
// Each thread has partial results for some set of tree-nodes
// The function decides order of merging partial results into final row set
void CalculateRowOffsets() {
for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
size_t n_left = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
mem_blocks_[j].n_offset_left = n_left;
n_left += mem_blocks_[j].n_left;
}
size_t n_right = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
mem_blocks_[j].n_offset_right = n_left + n_right;
n_right += mem_blocks_[j].n_right;
}
left_right_nodes_sizes_[i] = {n_left, n_right};
}
}
void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
size_t task_idx = GetTaskIdx(nid, begin);
size_t* left_result = rows_indexes + mem_blocks_[task_idx].n_offset_left;
size_t* right_result = rows_indexes + mem_blocks_[task_idx].n_offset_right;
const size_t* left = mem_blocks_[task_idx].left();
const size_t* right = mem_blocks_[task_idx].right();
std::copy_n(left, mem_blocks_[task_idx].n_left, left_result);
std::copy_n(right, mem_blocks_[task_idx].n_right, right_result);
}
protected:
size_t GetTaskIdx(int nid, size_t begin) {
return blocks_offsets_[nid] + begin / BlockSize;
}
struct BlockInfo{
size_t n_left;
size_t n_right;
size_t n_offset_left;
size_t n_offset_right;
size_t* left() {
return &left_data_[0];
}
size_t* right() {
return &right_data_[0];
}
private:
alignas(128) size_t left_data_[BlockSize];
alignas(128) size_t right_data_[BlockSize];
};
std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
std::vector<size_t> blocks_offsets_;
std::vector<BlockInfo> mem_blocks_;
size_t max_n_tasks_ = 0;
};
} // namespace common
} // namespace xgboost

View File

@ -9,6 +9,8 @@
#include <vector>
#include <algorithm>
#include "xgboost/logging.h"
namespace xgboost {
namespace common {
@ -20,11 +22,11 @@ class Range1d {
CHECK_LT(begin, end);
}
size_t begin() {
size_t begin() const { // NOLINT
return begin_;
}
size_t end() {
size_t end() const { // NOLINT
return end_;
}

View File

@ -239,17 +239,14 @@ void QuantileHistMaker::Builder::BuildNodeStats(
builder_monitor_.Stop("BuildNodeStats");
}
void QuantileHistMaker::Builder::EvaluateSplits(
void QuantileHistMaker::Builder::AddSplitsToTree(
const GHistIndexMatrix &gmat,
const ColumnMatrix &column_matrix,
DMatrix *p_fmat,
RegTree *p_tree,
int *num_leaves,
int depth,
unsigned *timestamp,
std::vector<ExpandEntry>* nodes_for_apply_split,
std::vector<ExpandEntry>* temp_qexpand_depth) {
EvaluateSplit(qexpand_depth_wise_, gmat, hist_, *p_fmat, *p_tree);
for (auto const& entry : qexpand_depth_wise_) {
int nid = entry.nid;
@ -258,7 +255,17 @@ void QuantileHistMaker::Builder::EvaluateSplits(
(param_.max_leaves > 0 && (*num_leaves) == param_.max_leaves)) {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} else {
this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
nodes_for_apply_split->push_back(entry);
NodeEntry& e = snode_[nid];
bst_float left_leaf_weight =
spliteval_->ComputeWeight(nid, e.best.left_sum) * param_.learning_rate;
bst_float right_leaf_weight =
spliteval_->ComputeWeight(nid, e.best.right_sum) * param_.learning_rate;
p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value,
e.best.DefaultLeft(), e.weight, left_leaf_weight,
right_leaf_weight, e.best.loss_chg, e.stats.sum_hess);
int left_id = (*p_tree)[nid].LeftChild();
int right_id = (*p_tree)[nid].RightChild();
temp_qexpand_depth->push_back(ExpandEntry(left_id, right_id,
@ -271,6 +278,24 @@ void QuantileHistMaker::Builder::EvaluateSplits(
}
}
void QuantileHistMaker::Builder::EvaluateAndApplySplits(
const GHistIndexMatrix &gmat,
const ColumnMatrix &column_matrix,
RegTree *p_tree,
int *num_leaves,
int depth,
unsigned *timestamp,
std::vector<ExpandEntry> *temp_qexpand_depth) {
EvaluateSplits(qexpand_depth_wise_, gmat, hist_, *p_tree);
std::vector<ExpandEntry> nodes_for_apply_split;
AddSplitsToTree(gmat, p_tree, num_leaves, depth, timestamp,
&nodes_for_apply_split, temp_qexpand_depth);
ApplySplit(nodes_for_apply_split, gmat, column_matrix, hist_, p_tree);
}
// Split nodes to 2 sets depending on amount of rows in each node
// Histograms for small nodes will be built explicitly
// Histograms for big nodes will be built by 'Subtraction Trick'
@ -335,7 +360,7 @@ void QuantileHistMaker::Builder::ExpandWithDepthWise(
SyncHistograms(starting_index, sync_count, p_tree);
BuildNodeStats(gmat, p_fmat, p_tree, gpair_h);
EvaluateSplits(gmat, column_matrix, p_fmat, p_tree, &num_leaves, depth, &timestamp,
EvaluateAndApplySplits(gmat, column_matrix, p_tree, &num_leaves, depth, &timestamp,
&temp_qexpand_depth);
// clean up
qexpand_depth_wise_.clear();
@ -367,7 +392,7 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide(
this->InitNewNode(ExpandEntry::kRootNid, gmat, gpair_h, *p_fmat, *p_tree);
this->EvaluateSplit({node}, gmat, hist_, *p_fmat, *p_tree);
this->EvaluateSplits({node}, gmat, hist_, *p_tree);
node.loss_chg = snode_[ExpandEntry::kRootNid].best.loss_chg;
qexpand_loss_guided_->push(node);
@ -377,12 +402,19 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide(
const ExpandEntry candidate = qexpand_loss_guided_->top();
const int nid = candidate.nid;
qexpand_loss_guided_->pop();
if (candidate.loss_chg <= kRtEps
|| (param_.max_depth > 0 && candidate.depth == param_.max_depth)
|| (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) {
if (candidate.IsValid(param_, num_leaves)) {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} else {
this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
NodeEntry& e = snode_[nid];
bst_float left_leaf_weight =
spliteval_->ComputeWeight(nid, e.best.left_sum) * param_.learning_rate;
bst_float right_leaf_weight =
spliteval_->ComputeWeight(nid, e.best.right_sum) * param_.learning_rate;
p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value,
e.best.DefaultLeft(), e.weight, left_leaf_weight,
right_leaf_weight, e.best.loss_chg, e.stats.sum_hess);
this->ApplySplit({candidate}, gmat, column_matrix, hist_, p_tree);
const int cleft = (*p_tree)[nid].LeftChild();
const int cright = (*p_tree)[nid].RightChild();
@ -410,7 +442,7 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide(
snode_[cleft].weight, snode_[cright].weight);
interaction_constraints_.Split(nid, featureid, cleft, cright);
this->EvaluateSplit({left_node, right_node}, gmat, hist_, *p_fmat, *p_tree);
this->EvaluateSplits({left_node, right_node}, gmat, hist_, *p_tree);
left_node.loss_chg = snode_[cleft].best.loss_chg;
right_node.loss_chg = snode_[cright].best.loss_chg;
@ -473,7 +505,14 @@ bool QuantileHistMaker::Builder::UpdatePredictionCache(
CHECK_GT(out_preds.size(), 0U);
for (const RowSetCollection::Elem rowset : row_set_collection_) {
size_t n_nodes = row_set_collection_.end() - row_set_collection_.begin();
common::BlockedSpace2d space(n_nodes, [&](size_t node) {
return row_set_collection_[node].Size();
}, 1024);
common::ParallelFor2d(space, this->nthread_, [&](size_t node, common::Range1d r) {
const RowSetCollection::Elem rowset = row_set_collection_[node];
if (rowset.begin != nullptr && rowset.end != nullptr) {
int nid = rowset.node_id;
bst_float leaf_value;
@ -487,11 +526,11 @@ bool QuantileHistMaker::Builder::UpdatePredictionCache(
}
leaf_value = (*p_last_tree_)[nid].LeafValue();
for (const size_t* it = rowset.begin; it < rowset.end; ++it) {
for (const size_t* it = rowset.begin + r.begin(); it < rowset.begin + r.end(); ++it) {
out_preds[*it] += leaf_value;
}
}
}
});
builder_monitor_.Stop("UpdatePredictionCache");
return true;
@ -526,7 +565,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
{
this->nthread_ = omp_get_num_threads();
}
hist_builder_.Init(this->nthread_, nbins);
hist_builder_ = GHistBuilder(this->nthread_, nbins);
std::vector<size_t>& row_indices = row_set_collection_.row_indices_;
row_indices.resize(info.num_row_);
@ -674,12 +713,11 @@ bool QuantileHistMaker::Builder::SplitContainsMissingValues(const GradStats e,
}
// nodes_set - set of nodes to be processed in parallel
void QuantileHistMaker::Builder::EvaluateSplit(const std::vector<ExpandEntry>& nodes_set,
void QuantileHistMaker::Builder::EvaluateSplits(const std::vector<ExpandEntry>& nodes_set,
const GHistIndexMatrix& gmat,
const HistCollection& hist,
const DMatrix& fmat,
const RegTree& tree) {
builder_monitor_.Start("EvaluateSplit");
builder_monitor_.Start("EvaluateSplits");
const size_t n_nodes_in_set = nodes_set.size();
const size_t nthread = std::max(1, this->nthread_);
@ -714,10 +752,10 @@ void QuantileHistMaker::Builder::EvaluateSplit(const std::vector<ExpandEntry>& n
for (auto idx_in_feature_set = r.begin(); idx_in_feature_set < r.end(); ++idx_in_feature_set) {
const auto fid = features_sets[nid_in_set]->ConstHostVector()[idx_in_feature_set];
if (interaction_constraints_.Query(nid, fid)) {
auto grad_stats = this->EnumerateSplit<+1>(gmat, node_hist, snode_[nid], fmat.Info(),
auto grad_stats = this->EnumerateSplit<+1>(gmat, node_hist, snode_[nid],
&best_split_tloc_[nthread*nid_in_set + tid], fid, nid);
if (SplitContainsMissingValues(grad_stats, snode_[nid])) {
this->EnumerateSplit<-1>(gmat, node_hist, snode_[nid], fmat.Info(),
this->EnumerateSplit<-1>(gmat, node_hist, snode_[nid],
&best_split_tloc_[nthread*nid_in_set + tid], fid, nid);
}
}
@ -732,38 +770,157 @@ void QuantileHistMaker::Builder::EvaluateSplit(const std::vector<ExpandEntry>& n
}
}
builder_monitor_.Stop("EvaluateSplit");
builder_monitor_.Stop("EvaluateSplits");
}
void QuantileHistMaker::Builder::ApplySplit(int nid,
// split row indexes (rid_span) to 2 parts (left_part, right_part) depending
// on comparison of indexes values (idx_span) and split point (split_cond)
// Handle dense columns
// Analog of std::stable_partition, but in no-inplace manner
template <bool default_left>
inline std::pair<size_t, size_t> PartitionDenseKernel(
common::Span<const size_t> rid_span, common::Span<const uint32_t> idx_span,
const int32_t split_cond, const uint32_t offset,
common::Span<size_t> left_part, common::Span<size_t> right_part) {
const uint32_t* idx = idx_span.data();
size_t* p_left_part = left_part.data();
size_t* p_right_part = right_part.data();
size_t nleft_elems = 0;
size_t nright_elems = 0;
const uint32_t missing_val = std::numeric_limits<uint32_t>::max();
for (auto rid : rid_span) {
if (idx[rid] == missing_val) {
if (default_left) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
} else {
if (static_cast<int32_t>(idx[rid] + offset) <= split_cond) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
}
}
return {nleft_elems, nright_elems};
}
// Split row indexes (rid_span) to 2 parts (left_part, right_part) depending
// on comparison of indexes values (idx_span) and split point (split_cond).
// Handle sparse columns
template<bool default_left>
inline std::pair<size_t, size_t> PartitionSparseKernel(
common::Span<const size_t> rid_span, const int32_t split_cond, const Column& column,
common::Span<size_t> left_part, common::Span<size_t> right_part) {
size_t* p_left_part = left_part.data();
size_t* p_right_part = right_part.data();
size_t nleft_elems = 0;
size_t nright_elems = 0;
if (rid_span.size()) { // ensure that rid_span is nonempty range
// search first nonzero row with index >= rid_span.front()
const size_t* p = std::lower_bound(column.GetRowData(),
column.GetRowData() + column.Size(),
rid_span.front());
if (p != column.GetRowData() + column.Size() && *p <= rid_span.back()) {
size_t cursor = p - column.GetRowData();
for (auto rid : rid_span) {
while (cursor < column.Size()
&& column.GetRowIdx(cursor) < rid
&& column.GetRowIdx(cursor) <= rid_span.back()) {
++cursor;
}
if (cursor < column.Size() && column.GetRowIdx(cursor) == rid) {
const uint32_t rbin = column.GetFeatureBinIdx(cursor);
if (static_cast<int32_t>(rbin + column.GetBaseIdx()) <= split_cond) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
++cursor;
} else {
// missing value
if (default_left) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
}
}
} else { // all rows in rid_span have missing values
if (default_left) {
std::copy(rid_span.begin(), rid_span.end(), p_left_part);
nleft_elems = rid_span.size();
} else {
std::copy(rid_span.begin(), rid_span.end(), p_right_part);
nright_elems = rid_span.size();
}
}
}
return {nleft_elems, nright_elems};
}
void QuantileHistMaker::Builder::PartitionKernel(
const size_t node_in_set, const size_t nid, common::Range1d range,
const int32_t split_cond, const ColumnMatrix& column_matrix,
const GHistIndexMatrix& gmat, const RegTree& tree) {
const size_t* rid = row_set_collection_[nid].begin;
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
common::Span<size_t> left = partition_builder_.GetLeftBuffer(node_in_set,
range.begin(), range.end());
common::Span<size_t> right = partition_builder_.GetRightBuffer(node_in_set,
range.begin(), range.end());
const bst_uint fid = tree[nid].SplitIndex();
const bool default_left = tree[nid].DefaultLeft();
const auto column = column_matrix.GetColumn(fid);
const uint32_t offset = column.GetBaseIdx();
common::Span<const uint32_t> idx_spin = column.GetFeatureBinIdxPtr();
std::pair<size_t, size_t> child_nodes_sizes;
if (column.GetType() == xgboost::common::kDenseColumn) {
if (default_left) {
child_nodes_sizes = PartitionDenseKernel<true>(
rid_span, idx_spin, split_cond, offset, left, right);
} else {
child_nodes_sizes = PartitionDenseKernel<false>(
rid_span, idx_spin, split_cond, offset, left, right);
}
} else {
if (default_left) {
child_nodes_sizes = PartitionSparseKernel<true>(rid_span, split_cond, column, left, right);
} else {
child_nodes_sizes = PartitionSparseKernel<false>(rid_span, split_cond, column, left, right);
}
}
const size_t n_left = child_nodes_sizes.first;
const size_t n_right = child_nodes_sizes.second;
partition_builder_.SetNLeftElems(node_in_set, range.begin(), range.end(), n_left);
partition_builder_.SetNRightElems(node_in_set, range.begin(), range.end(), n_right);
}
void QuantileHistMaker::Builder::FindSplitConditions(const std::vector<ExpandEntry>& nodes,
const RegTree& tree,
const GHistIndexMatrix& gmat,
const ColumnMatrix& column_matrix,
const HistCollection& hist,
const DMatrix& fmat,
RegTree* p_tree) {
builder_monitor_.Start("ApplySplit");
// TODO(hcho3): support feature sampling by levels
std::vector<int32_t>* split_conditions) {
const size_t n_nodes = nodes.size();
split_conditions->resize(n_nodes);
/* 1. Create child nodes */
NodeEntry& e = snode_[nid];
bst_float left_leaf_weight =
spliteval_->ComputeWeight(nid, e.best.left_sum) * param_.learning_rate;
bst_float right_leaf_weight =
spliteval_->ComputeWeight(nid, e.best.right_sum) * param_.learning_rate;
p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value,
e.best.DefaultLeft(), e.weight, left_leaf_weight,
right_leaf_weight, e.best.loss_chg, e.stats.sum_hess);
/* 2. Categorize member rows */
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
row_split_tloc_.resize(nthread);
for (bst_omp_uint i = 0; i < nthread; ++i) {
row_split_tloc_[i].left.clear();
row_split_tloc_[i].right.clear();
}
const bool default_left = (*p_tree)[nid].DefaultLeft();
const bst_uint fid = (*p_tree)[nid].SplitIndex();
const bst_float split_pt = (*p_tree)[nid].SplitCond();
for (size_t i = 0; i < nodes.size(); ++i) {
const int32_t nid = nodes[i].nid;
const bst_uint fid = tree[nid].SplitIndex();
const bst_float split_pt = tree[nid].SplitCond();
const uint32_t lower_bound = gmat.cut.Ptrs()[fid];
const uint32_t upper_bound = gmat.cut.Ptrs()[fid + 1];
int32_t split_cond = -1;
@ -776,154 +933,77 @@ void QuantileHistMaker::Builder::ApplySplit(int nid,
split_cond = static_cast<int32_t>(i);
}
}
const auto& rowset = row_set_collection_[nid];
Column column = column_matrix.GetColumn(fid);
if (column.GetType() == xgboost::common::kDenseColumn) {
ApplySplitDenseData(rowset, gmat, &row_split_tloc_, column, split_cond,
default_left);
} else {
ApplySplitSparseData(rowset, gmat, &row_split_tloc_, column, lower_bound,
upper_bound, split_cond, default_left);
(*split_conditions)[i] = split_cond;
}
}
row_set_collection_.AddSplit(
nid, row_split_tloc_, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild());
void QuantileHistMaker::Builder::AddSplitsToRowSet(const std::vector<ExpandEntry>& nodes,
RegTree* p_tree) {
const size_t n_nodes = nodes.size();
for (size_t i = 0; i < n_nodes; ++i) {
const int32_t nid = nodes[i].nid;
const size_t n_left = partition_builder_.GetNLeftElems(i);
const size_t n_right = partition_builder_.GetNRightElems(i);
row_set_collection_.AddSplit(nid, (*p_tree)[nid].LeftChild(),
(*p_tree)[nid].RightChild(), n_left, n_right);
}
}
void QuantileHistMaker::Builder::ApplySplit(const std::vector<ExpandEntry> nodes,
const GHistIndexMatrix& gmat,
const ColumnMatrix& column_matrix,
const HistCollection& hist,
RegTree* p_tree) {
builder_monitor_.Start("ApplySplit");
// 1. Find split condition for each split
const size_t n_nodes = nodes.size();
std::vector<int32_t> split_conditions;
FindSplitConditions(nodes, *p_tree, gmat, &split_conditions);
// 2.1 Create a blocked space of size SUM(samples in each node)
common::BlockedSpace2d space(n_nodes, [&](size_t node_in_set) {
int32_t nid = nodes[node_in_set].nid;
return row_set_collection_[nid].Size();
}, kPartitionBlockSize);
// 2.2 Initialize the partition builder
// allocate buffers for storage intermediate results by each thread
partition_builder_.Init(space.Size(), n_nodes, [&](size_t node_in_set) {
const int32_t nid = nodes[node_in_set].nid;
const size_t size = row_set_collection_[nid].Size();
const size_t n_tasks = size / kPartitionBlockSize + !!(size % kPartitionBlockSize);
return n_tasks;
});
// 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node
// Store results in intermediate buffers from partition_builder_
common::ParallelFor2d(space, this->nthread_, [&](size_t node_in_set, common::Range1d r) {
const int32_t nid = nodes[node_in_set].nid;
PartitionKernel(node_in_set, nid, r,
split_conditions[node_in_set], column_matrix, gmat, *p_tree);
});
// 3. Compute offsets to copy blocks of row-indexes
// from partition_builder_ to row_set_collection_
partition_builder_.CalculateRowOffsets();
// 4. Copy elements from partition_builder_ to row_set_collection_ back
// with updated row-indexes for each tree-node
common::ParallelFor2d(space, this->nthread_, [&](size_t node_in_set, common::Range1d r) {
const int32_t nid = nodes[node_in_set].nid;
partition_builder_.MergeToArray(node_in_set, r.begin(),
const_cast<size_t*>(row_set_collection_[nid].begin));
});
// 5. Add info about splits into row_set_collection_
AddSplitsToRowSet(nodes, p_tree);
builder_monitor_.Stop("ApplySplit");
}
void QuantileHistMaker::Builder::ApplySplitDenseData(
const RowSetCollection::Elem rowset,
const GHistIndexMatrix& gmat,
std::vector<RowSetCollection::Split>* p_row_split_tloc,
const Column& column,
bst_int split_cond,
bool default_left) {
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
constexpr int kUnroll = 8; // loop unrolling factor
const size_t nrows = rowset.end - rowset.begin;
const size_t rest = nrows % kUnroll;
#pragma omp parallel for num_threads(nthread_) schedule(static)
for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) {
const bst_uint tid = omp_get_thread_num();
auto& left = row_split_tloc[tid].left;
auto& right = row_split_tloc[tid].right;
size_t rid[kUnroll];
uint32_t rbin[kUnroll];
for (int k = 0; k < kUnroll; ++k) {
rid[k] = rowset.begin[i + k];
}
for (int k = 0; k < kUnroll; ++k) {
rbin[k] = column.GetFeatureBinIdx(rid[k]);
}
for (int k = 0; k < kUnroll; ++k) { // NOLINT
if (rbin[k] == std::numeric_limits<uint32_t>::max()) { // missing value
if (default_left) {
left.push_back(rid[k]);
} else {
right.push_back(rid[k]);
}
} else {
if (static_cast<int32_t>(rbin[k] + column.GetBaseIdx()) <= split_cond) {
left.push_back(rid[k]);
} else {
right.push_back(rid[k]);
}
}
}
}
for (size_t i = nrows - rest; i < nrows; ++i) {
auto& left = row_split_tloc[nthread_-1].left;
auto& right = row_split_tloc[nthread_-1].right;
const size_t rid = rowset.begin[i];
const uint32_t rbin = column.GetFeatureBinIdx(rid);
if (rbin == std::numeric_limits<uint32_t>::max()) { // missing value
if (default_left) {
left.push_back(rid);
} else {
right.push_back(rid);
}
} else {
if (static_cast<int32_t>(rbin + column.GetBaseIdx()) <= split_cond) {
left.push_back(rid);
} else {
right.push_back(rid);
}
}
}
}
void QuantileHistMaker::Builder::ApplySplitSparseData(
const RowSetCollection::Elem rowset,
const GHistIndexMatrix& gmat,
std::vector<RowSetCollection::Split>* p_row_split_tloc,
const Column& column,
bst_uint lower_bound,
bst_uint upper_bound,
bst_int split_cond,
bool default_left) {
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
const size_t nrows = rowset.end - rowset.begin;
#pragma omp parallel num_threads(nthread_)
{
const auto tid = static_cast<size_t>(omp_get_thread_num());
const size_t ibegin = tid * nrows / nthread_;
const size_t iend = (tid + 1) * nrows / nthread_;
if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range
// search first nonzero row with index >= rowset[ibegin]
const size_t* p = std::lower_bound(column.GetRowData(),
column.GetRowData() + column.Size(),
rowset.begin[ibegin]);
auto& left = row_split_tloc[tid].left;
auto& right = row_split_tloc[tid].right;
if (p != column.GetRowData() + column.Size() && *p <= rowset.begin[iend - 1]) {
size_t cursor = p - column.GetRowData();
for (size_t i = ibegin; i < iend; ++i) {
const size_t rid = rowset.begin[i];
while (cursor < column.Size()
&& column.GetRowIdx(cursor) < rid
&& column.GetRowIdx(cursor) <= rowset.begin[iend - 1]) {
++cursor;
}
if (cursor < column.Size() && column.GetRowIdx(cursor) == rid) {
const uint32_t rbin = column.GetFeatureBinIdx(cursor);
if (static_cast<int32_t>(rbin + column.GetBaseIdx()) <= split_cond) {
left.push_back(rid);
} else {
right.push_back(rid);
}
++cursor;
} else {
// missing value
if (default_left) {
left.push_back(rid);
} else {
right.push_back(rid);
}
}
}
} else { // all rows in [ibegin, iend) have missing values
if (default_left) {
for (size_t i = ibegin; i < iend; ++i) {
const size_t rid = rowset.begin[i];
left.push_back(rid);
}
} else {
for (size_t i = ibegin; i < iend; ++i) {
const size_t rid = rowset.begin[i];
right.push_back(rid);
}
}
}
}
}
}
void QuantileHistMaker::Builder::InitNewNode(int nid,
const GHistIndexMatrix& gmat,
const std::vector<GradientPair>& gpair,
@ -981,13 +1061,8 @@ void QuantileHistMaker::Builder::InitNewNode(int nid,
// for the particular feature fid.
template <int d_step>
GradStats QuantileHistMaker::Builder::EnumerateSplit(
const GHistIndexMatrix& gmat,
const GHistRow& hist,
const NodeEntry& snode,
const MetaInfo& info,
SplitEntry* p_best,
bst_uint fid,
bst_uint nodeID) {
const GHistIndexMatrix &gmat, const GHistRow &hist, const NodeEntry &snode,
SplitEntry *p_best, bst_uint fid, bst_uint nodeID) const {
CHECK(d_step == +1 || d_step == -1);
// aliases

View File

@ -161,7 +161,7 @@ class QuantileHistMaker: public TreeUpdater {
if (param_.enable_feature_grouping > 0) {
hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist);
} else {
hist_builder_.BuildHist(gpair, row_indices, gmat, hist);
hist_builder_.BuildHist(gpair, row_indices, gmat, hist, data_layout_ != kSparseData);
}
}
@ -186,6 +186,13 @@ class QuantileHistMaker: public TreeUpdater {
unsigned timestamp;
ExpandEntry(int nid, int sibling_nid, int depth, bst_float loss_chg, unsigned tstmp):
nid(nid), sibling_nid(sibling_nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {}
bool IsValid(TrainParam const& param, int32_t num_leaves) const {
bool ret = loss_chg <= kRtEps ||
(param.max_depth > 0 && this->depth == param.max_depth) ||
(param.max_leaves > 0 && num_leaves == param.max_leaves);
return ret;
}
};
// initialize temp data structure
@ -194,34 +201,27 @@ class QuantileHistMaker: public TreeUpdater {
const DMatrix& fmat,
const RegTree& tree);
void EvaluateSplit(const std::vector<ExpandEntry>& nodes_set,
void EvaluateSplits(const std::vector<ExpandEntry>& nodes_set,
const GHistIndexMatrix& gmat,
const HistCollection& hist,
const DMatrix& fmat,
const RegTree& tree);
void ApplySplit(int nid,
void ApplySplit(std::vector<ExpandEntry> nodes,
const GHistIndexMatrix& gmat,
const ColumnMatrix& column_matrix,
const HistCollection& hist,
const DMatrix& fmat,
RegTree* p_tree);
void ApplySplitDenseData(const RowSetCollection::Elem rowset,
const GHistIndexMatrix& gmat,
std::vector<RowSetCollection::Split>* p_row_split_tloc,
const Column& column,
bst_int split_cond,
bool default_left);
void PartitionKernel(const size_t node_in_set, const size_t nid, common::Range1d range,
const int32_t split_cond,
const ColumnMatrix& column_matrix, const GHistIndexMatrix& gmat,
const RegTree& tree);
void ApplySplitSparseData(const RowSetCollection::Elem rowset,
const GHistIndexMatrix& gmat,
std::vector<RowSetCollection::Split>* p_row_split_tloc,
const Column& column,
bst_uint lower_bound,
bst_uint upper_bound,
bst_int split_cond,
bool default_left);
void AddSplitsToRowSet(const std::vector<ExpandEntry>& nodes, RegTree* p_tree);
void FindSplitConditions(const std::vector<ExpandEntry>& nodes, const RegTree& tree,
const GHistIndexMatrix& gmat, std::vector<int32_t>* split_conditions);
void InitNewNode(int nid,
const GHistIndexMatrix& gmat,
@ -233,14 +233,9 @@ class QuantileHistMaker: public TreeUpdater {
// Returns the sum of gradients corresponding to the data points that contains a non-missing
// value for the particular feature fid.
template <int d_step>
GradStats EnumerateSplit(
const GHistIndexMatrix& gmat,
const GHistRow& hist,
const NodeEntry& snode,
const MetaInfo& info,
SplitEntry* p_best,
bst_uint fid,
bst_uint nodeID);
GradStats EnumerateSplit(const GHistIndexMatrix &gmat, const GHistRow &hist,
const NodeEntry &snode, SplitEntry *p_best,
bst_uint fid, bst_uint nodeID) const;
// if sum of statistics for non-missing values in the node
// is equal to sum of statistics for all values:
@ -286,15 +281,23 @@ class QuantileHistMaker: public TreeUpdater {
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h);
void EvaluateSplits(const GHistIndexMatrix &gmat,
void EvaluateAndApplySplits(const GHistIndexMatrix &gmat,
const ColumnMatrix &column_matrix,
DMatrix *p_fmat,
RegTree *p_tree,
int *num_leaves,
int depth,
unsigned *timestamp,
std::vector<ExpandEntry> *temp_qexpand_depth);
void AddSplitsToTree(
const GHistIndexMatrix &gmat,
RegTree *p_tree,
int *num_leaves,
int depth,
unsigned *timestamp,
std::vector<ExpandEntry>* nodes_for_apply_split,
std::vector<ExpandEntry>* temp_qexpand_depth);
void ExpandWithLossGuide(const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb,
const ColumnMatrix& column_matrix,
@ -335,6 +338,9 @@ class QuantileHistMaker: public TreeUpdater {
std::unique_ptr<SplitEvaluator> spliteval_;
FeatureInteractionConstraintHost interaction_constraints_;
static constexpr size_t kPartitionBlockSize = 2048;
common::PartitionBuilder<kPartitionBlockSize> partition_builder_;
// back pointers to tree and data matrix
const RegTree* p_last_tree_;
DMatrix const* const p_last_fmat_;

View File

@ -0,0 +1,76 @@
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <utility>
#include "../../../src/common/row_set.h"
#include "../helpers.h"
namespace xgboost {
namespace common {
TEST(PartitionBuilder, BasicTest) {
constexpr size_t kBlockSize = 16;
constexpr size_t kNodes = 5;
constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;
std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };
PartitionBuilder<kBlockSize> builder;
builder.Init(kTasks, kNodes, [&](size_t i) {
return tasks[i];
});
std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };
for(size_t nid = 0; nid < kNodes; ++nid) {
size_t value_left = 0;
size_t value_right = 0;
size_t left_total = tasks[nid] * rows_for_left_node[nid];
for(size_t j = 0; j < tasks[nid]; ++j) {
size_t begin = kBlockSize*j;
size_t end = kBlockSize*(j+1);
auto left = builder.GetLeftBuffer(nid, begin, end);
auto right = builder.GetRightBuffer(nid, begin, end);
size_t n_left = rows_for_left_node[nid];
size_t n_right = kBlockSize - rows_for_left_node[nid];
for(size_t i = 0; i < n_left; i++) {
left[i] = value_left++;
}
for(size_t i = 0; i < n_right; i++) {
right[i] = left_total + value_right++;
}
builder.SetNLeftElems(nid, begin, end, n_left);
builder.SetNRightElems(nid, begin, end, n_right);
}
}
builder.CalculateRowOffsets();
std::vector<size_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);
for(size_t nid = 0; nid < kNodes; ++nid) {
for(size_t j = 0; j < tasks[nid]; ++j) {
builder.MergeToArray(nid, kBlockSize*j, v.data());
}
for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {
ASSERT_EQ(v[j], j);
}
size_t n_left = builder.GetNLeftElems(nid);
size_t n_right = builder.GetNRightElems(nid);
ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
}
}
} // namespace common
} // namespace xgboost

View File

@ -213,7 +213,7 @@ class QuantileHistMock : public QuantileHistMaker {
/* Now compare against result given by EvaluateSplit() */
ExpandEntry node(ExpandEntry::kRootNid, ExpandEntry::kEmptyNid,
tree.GetDepth(0), snode_[0].best.loss_chg, 0);
RealImpl::EvaluateSplit({node}, gmat, hist_, *(*dmat), tree);
RealImpl::EvaluateSplits({node}, gmat, hist_, tree);
ASSERT_EQ(snode_[0].best.SplitIndex(), best_split_feature);
ASSERT_EQ(snode_[0].best.split_value, gmat.cut.Values()[best_split_threshold]);