Simplify sparse and dense CPU hist kernels (#7029)

* Simplify sparse and dense kernels
* Extract row partitioner.

Co-authored-by: Kirill Shvets <kirill.shvets@intel.com>
This commit is contained in:
ShvetsKS 2021-06-11 13:26:30 +03:00 committed by GitHub
parent 1faad825f4
commit 2567404ab6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 369 additions and 434 deletions

View File

@ -30,6 +30,8 @@ enum ColumnType {
template <typename BinIdxType> template <typename BinIdxType>
class Column { class Column {
public: public:
static constexpr int32_t kMissingId = -1;
Column(ColumnType type, common::Span<const BinIdxType> index, const uint32_t index_base) Column(ColumnType type, common::Span<const BinIdxType> index, const uint32_t index_base)
: type_(type), : type_(type),
index_(index), index_(index),
@ -71,6 +73,30 @@ class SparseColumn: public Column<BinIdxType> {
const size_t* GetRowData() const { return row_ind_.data(); } const size_t* GetRowData() const { return row_ind_.data(); }
int32_t GetBinIdx(size_t rid, size_t* state) const {
const size_t column_size = this->Size();
if (!((*state) < column_size)) {
return this->kMissingId;
}
while ((*state) < column_size && GetRowIdx(*state) < rid) {
++(*state);
}
if (((*state) < column_size) && GetRowIdx(*state) == rid) {
return this->GetGlobalBinIdx(*state);
} else {
return this->kMissingId;
}
}
size_t GetInitialState(const size_t first_row_id) const {
const size_t* row_data = GetRowData();
const size_t column_size = this->Size();
// search first nonzero row with index >= rid_span.front()
const size_t* p = std::lower_bound(row_data, row_data + column_size, first_row_id);
// column_size if all messing
return p - row_data;
}
size_t GetRowIdx(size_t idx) const { size_t GetRowIdx(size_t idx) const {
return row_ind_.data()[idx]; return row_ind_.data()[idx];
} }
@ -80,7 +106,7 @@ class SparseColumn: public Column<BinIdxType> {
common::Span<const size_t> row_ind_; common::Span<const size_t> row_ind_;
}; };
template <typename BinIdxType> template <typename BinIdxType, bool any_missing>
class DenseColumn: public Column<BinIdxType> { class DenseColumn: public Column<BinIdxType> {
public: public:
DenseColumn(ColumnType type, common::Span<const BinIdxType> index, DenseColumn(ColumnType type, common::Span<const BinIdxType> index,
@ -90,6 +116,19 @@ class DenseColumn: public Column<BinIdxType> {
missing_flags_(missing_flags), missing_flags_(missing_flags),
feature_offset_(feature_offset) {} feature_offset_(feature_offset) {}
bool IsMissing(size_t idx) const { return missing_flags_[feature_offset_ + idx]; } bool IsMissing(size_t idx) const { return missing_flags_[feature_offset_ + idx]; }
int32_t GetBinIdx(size_t idx, size_t* state) const {
if (any_missing) {
return IsMissing(idx) ? this->kMissingId : this->GetGlobalBinIdx(idx);
} else {
return this->GetGlobalBinIdx(idx);
}
}
size_t GetInitialState(const size_t first_row_id) const {
return 0;
}
private: private:
/* flags for missing values in dense columns */ /* flags for missing values in dense columns */
const std::vector<bool>& missing_flags_; const std::vector<bool>& missing_flags_;
@ -202,7 +241,7 @@ class ColumnMatrix {
/* Fetch an individual column. This code should be used with type swith /* Fetch an individual column. This code should be used with type swith
to determine type of bin id's */ to determine type of bin id's */
template <typename BinIdxType> template <typename BinIdxType, bool any_missing>
std::unique_ptr<const Column<BinIdxType> > GetColumn(unsigned fid) const { std::unique_ptr<const Column<BinIdxType> > GetColumn(unsigned fid) const {
CHECK_EQ(sizeof(BinIdxType), bins_type_size_); CHECK_EQ(sizeof(BinIdxType), bins_type_size_);
@ -213,7 +252,8 @@ class ColumnMatrix {
column_size }; column_size };
std::unique_ptr<const Column<BinIdxType> > res; std::unique_ptr<const Column<BinIdxType> > res;
if (type_[fid] == ColumnType::kDenseColumn) { if (type_[fid] == ColumnType::kDenseColumn) {
res.reset(new DenseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid], CHECK_EQ(any_missing, any_missing_);
res.reset(new DenseColumn<BinIdxType, any_missing>(type_[fid], bin_index, index_base_[fid],
missing_flags_, feature_offset)); missing_flags_, feature_offset));
} else { } else {
res.reset(new SparseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid], res.reset(new SparseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid],

View File

@ -287,57 +287,18 @@ struct Prefetch {
constexpr size_t Prefetch::kNoPrefetchSize; constexpr size_t Prefetch::kNoPrefetchSize;
template<typename FPType, bool do_prefetch, typename BinIdxType> template<typename FPType, bool do_prefetch, typename BinIdxType, bool any_missing = true>
void BuildHistDenseKernel(const std::vector<GradientPair>& gpair, void BuildHistKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
const size_t n_features,
GHistRow<FPType> hist) { GHistRow<FPType> hist) {
const size_t size = row_indices.Size(); const size_t size = row_indices.Size();
const size_t* rid = row_indices.begin; const size_t* rid = row_indices.begin;
const float* pgh = reinterpret_cast<const float*>(gpair.data()); const float* pgh = reinterpret_cast<const float*>(gpair.data());
const BinIdxType* gradient_index = gmat.index.data<BinIdxType>(); const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
const uint32_t* offsets = gmat.index.Offset();
FPType* hist_data = reinterpret_cast<FPType*>(hist.data());
const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian.
// So we need to multiply each row-index/bin-index by 2
// to work with gradient pairs as a singe row FP array
for (size_t i = 0; i < size; ++i) {
const size_t icol_start = rid[i] * n_features;
const size_t idx_gh = two * rid[i];
if (do_prefetch) {
const size_t icol_start_prefetch = rid[i + Prefetch::kPrefetchOffset] * n_features;
PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
for (size_t j = icol_start_prefetch; j < icol_start_prefetch + n_features;
j += Prefetch::GetPrefetchStep<BinIdxType>()) {
PREFETCH_READ_T0(gradient_index + j);
}
}
const BinIdxType* gr_index_local = gradient_index + icol_start;
for (size_t j = 0; j < n_features; ++j) {
const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[j]) +
offsets[j]);
hist_data[idx_bin] += pgh[idx_gh];
hist_data[idx_bin+1] += pgh[idx_gh+1];
}
}
}
template<typename FPType, bool do_prefetch>
void BuildHistSparseKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<FPType> hist) {
const size_t size = row_indices.Size();
const size_t* rid = row_indices.begin;
const float* pgh = reinterpret_cast<const float*>(gpair.data());
const uint32_t* gradient_index = gmat.index.data<uint32_t>();
const size_t* row_ptr = gmat.row_ptr.data(); const size_t* row_ptr = gmat.row_ptr.data();
const uint32_t* offsets = gmat.index.Offset();
const size_t n_features = row_ptr[row_indices.begin[0]+1] - row_ptr[row_indices.begin[0]];
FPType* hist_data = reinterpret_cast<FPType*>(hist.data()); FPType* hist_data = reinterpret_cast<FPType*>(hist.data());
const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian. // 2 FP values: gradient and hessian.
@ -345,13 +306,16 @@ void BuildHistSparseKernel(const std::vector<GradientPair>& gpair,
// to work with gradient pairs as a singe row FP array // to work with gradient pairs as a singe row FP array
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
const size_t icol_start = row_ptr[rid[i]]; const size_t icol_start = any_missing ? row_ptr[rid[i]] : rid[i] * n_features;
const size_t icol_end = row_ptr[rid[i]+1]; const size_t icol_end = any_missing ? row_ptr[rid[i]+1] : icol_start + n_features;
const size_t row_size = icol_end - icol_start;
const size_t idx_gh = two * rid[i]; const size_t idx_gh = two * rid[i];
if (do_prefetch) { if (do_prefetch) {
const size_t icol_start_prftch = row_ptr[rid[i+Prefetch::kPrefetchOffset]]; const size_t icol_start_prftch = any_missing ? row_ptr[rid[i+Prefetch::kPrefetchOffset]] :
const size_t icol_end_prefect = row_ptr[rid[i+Prefetch::kPrefetchOffset]+1]; rid[i + Prefetch::kPrefetchOffset] * n_features;
const size_t icol_end_prefect = any_missing ? row_ptr[rid[i+Prefetch::kPrefetchOffset]+1] :
icol_start_prftch + n_features;
PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]); PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
for (size_t j = icol_start_prftch; j < icol_end_prefect; for (size_t j = icol_start_prftch; j < icol_end_prefect;
@ -359,47 +323,34 @@ void BuildHistSparseKernel(const std::vector<GradientPair>& gpair,
PREFETCH_READ_T0(gradient_index + j); PREFETCH_READ_T0(gradient_index + j);
} }
} }
for (size_t j = icol_start; j < icol_end; ++j) { const BinIdxType* gr_index_local = gradient_index + icol_start;
const uint32_t idx_bin = two * gradient_index[j];
for (size_t j = 0; j < row_size; ++j) {
const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[j]) + (
any_missing ? 0 : offsets[j]));
hist_data[idx_bin] += pgh[idx_gh]; hist_data[idx_bin] += pgh[idx_gh];
hist_data[idx_bin+1] += pgh[idx_gh+1]; hist_data[idx_bin+1] += pgh[idx_gh+1];
} }
} }
} }
template<typename FPType, bool do_prefetch, bool any_missing>
template<typename FPType, bool do_prefetch, typename BinIdxType> void BuildHistDispatch(const std::vector<GradientPair>& gpair,
void BuildHistDispatchKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, GHistRow<FPType> hist, bool isDense) { const GHistIndexMatrix& gmat, GHistRow<FPType> hist) {
if (isDense) {
const size_t* row_ptr = gmat.row_ptr.data();
const size_t n_features = row_ptr[row_indices.begin[0]+1] - row_ptr[row_indices.begin[0]];
BuildHistDenseKernel<FPType, do_prefetch, BinIdxType>(gpair, row_indices,
gmat, n_features, hist);
} else {
BuildHistSparseKernel<FPType, do_prefetch>(gpair, row_indices,
gmat, hist);
}
}
template<typename FPType, bool do_prefetch>
void BuildHistKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const bool isDense, GHistRow<FPType> hist) {
const bool is_dense = row_indices.Size() && isDense;
switch (gmat.index.GetBinTypeSize()) { switch (gmat.index.GetBinTypeSize()) {
case kUint8BinsTypeSize: case kUint8BinsTypeSize:
BuildHistDispatchKernel<FPType, do_prefetch, uint8_t>(gpair, row_indices, BuildHistKernel<FPType, do_prefetch, uint8_t, any_missing>(gpair, row_indices,
gmat, hist, is_dense); gmat, hist);
break; break;
case kUint16BinsTypeSize: case kUint16BinsTypeSize:
BuildHistDispatchKernel<FPType, do_prefetch, uint16_t>(gpair, row_indices, BuildHistKernel<FPType, do_prefetch, uint16_t, any_missing>(gpair, row_indices,
gmat, hist, is_dense); gmat, hist);
break; break;
case kUint32BinsTypeSize: case kUint32BinsTypeSize:
BuildHistDispatchKernel<FPType, do_prefetch, uint32_t>(gpair, row_indices, BuildHistKernel<FPType, do_prefetch, uint32_t, any_missing>(gpair, row_indices,
gmat, hist, is_dense); gmat, hist);
break; break;
default: default:
CHECK(false); // no default behavior CHECK(false); // no default behavior
@ -407,10 +358,12 @@ void BuildHistKernel(const std::vector<GradientPair>& gpair,
} }
template <typename GradientSumT> template <typename GradientSumT>
template <bool any_missing>
void GHistBuilder<GradientSumT>::BuildHist( void GHistBuilder<GradientSumT>::BuildHist(
const std::vector<GradientPair> &gpair, const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat, const RowSetCollection::Elem row_indices,
GHistRowT hist, bool isDense) { const GHistIndexMatrix &gmat,
GHistRowT hist) {
const size_t nrows = row_indices.Size(); const size_t nrows = row_indices.Size();
const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows); const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);
@ -419,28 +372,36 @@ void GHistBuilder<GradientSumT>::BuildHist(
if (contiguousBlock) { if (contiguousBlock) {
// contiguous memory access, built-in HW prefetching is enough // contiguous memory access, built-in HW prefetching is enough
BuildHistKernel<GradientSumT, false>(gpair, row_indices, gmat, isDense, hist); BuildHistDispatch<GradientSumT, false, any_missing>(gpair, row_indices, gmat, hist);
} else { } else {
const RowSetCollection::Elem span1(row_indices.begin, row_indices.end - no_prefetch_size); const RowSetCollection::Elem span1(row_indices.begin, row_indices.end - no_prefetch_size);
const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size, row_indices.end); const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size, row_indices.end);
BuildHistKernel<GradientSumT, true>(gpair, span1, gmat, isDense, hist); BuildHistDispatch<GradientSumT, true, any_missing>(gpair, span1, gmat, hist);
// no prefetching to avoid loading extra memory // no prefetching to avoid loading extra memory
BuildHistKernel<GradientSumT, false>(gpair, span2, gmat, isDense, hist); BuildHistDispatch<GradientSumT, false, any_missing>(gpair, span2, gmat, hist);
} }
} }
template template
void GHistBuilder<float>::BuildHist(const std::vector<GradientPair>& gpair, void GHistBuilder<float>::BuildHist<true>(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
GHistRow<float> hist, GHistRow<float> hist);
bool isDense);
template template
void GHistBuilder<double>::BuildHist(const std::vector<GradientPair>& gpair, void GHistBuilder<float>::BuildHist<false>(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
GHistRow<double> hist, GHistRow<float> hist);
bool isDense); template
void GHistBuilder<double>::BuildHist<true>(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<double> hist);
template
void GHistBuilder<double>::BuildHist<false>(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<double> hist);
template<typename GradientSumT> template<typename GradientSumT>
void GHistBuilder<GradientSumT>::SubtractionTrick(GHistRowT self, void GHistBuilder<GradientSumT>::SubtractionTrick(GHistRowT self,

View File

@ -627,11 +627,11 @@ class GHistBuilder {
GHistBuilder(size_t nthread, uint32_t nbins) : nthread_{nthread}, nbins_{nbins} {} GHistBuilder(size_t nthread, uint32_t nbins) : nthread_{nthread}, nbins_{nbins} {}
// construct a histogram via histogram aggregation // construct a histogram via histogram aggregation
template <bool any_missing>
void BuildHist(const std::vector<GradientPair>& gpair, void BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
GHistRowT hist, GHistRowT hist);
bool isDense);
// construct a histogram via subtraction trick // construct a histogram via subtraction trick
void SubtractionTrick(GHistRowT self, void SubtractionTrick(GHistRowT self,
GHistRowT sibling, GHistRowT sibling,

View File

@ -0,0 +1,228 @@
/*!
* Copyright 2021 by Contributors
* \file row_set.h
* \brief Quick Utility to compute subset of rows
* \author Philip Cho, Tianqi Chen
*/
#ifndef XGBOOST_COMMON_PARTITION_BUILDER_H_
#define XGBOOST_COMMON_PARTITION_BUILDER_H_
#include <xgboost/data.h>
#include <algorithm>
#include <vector>
#include <utility>
#include <memory>
#include "xgboost/tree_model.h"
#include "../common/column_matrix.h"
namespace xgboost {
namespace common {
// The builder is required for samples partition to left and rights children for set of nodes
// Responsible for:
// 1) Effective memory allocation for intermediate results for multi-thread work
// 2) Merging partial results produced by threads into original row set (row_set_collection_)
// BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature
template<size_t BlockSize>
class PartitionBuilder {
public:
template<typename Func>
void Init(const size_t n_tasks, size_t n_nodes, Func funcNTaks) {
left_right_nodes_sizes_.resize(n_nodes);
blocks_offsets_.resize(n_nodes+1);
blocks_offsets_[0] = 0;
for (size_t i = 1; i < n_nodes+1; ++i) {
blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTaks(i-1);
}
if (n_tasks > max_n_tasks_) {
mem_blocks_.resize(n_tasks);
max_n_tasks_ = n_tasks;
}
}
// split row indexes (rid_span) to 2 parts (left_part, right_part) depending
// on comparison of indexes values (idx_span) and split point (split_cond)
// Handle dense columns
// Analog of std::stable_partition, but in no-inplace manner
template <bool default_left, bool any_missing, typename ColumnType>
inline std::pair<size_t, size_t> PartitionKernel(const ColumnType& column,
common::Span<const size_t> rid_span, const int32_t split_cond,
common::Span<size_t> left_part, common::Span<size_t> right_part) {
size_t* p_left_part = left_part.data();
size_t* p_right_part = right_part.data();
size_t nleft_elems = 0;
size_t nright_elems = 0;
auto state = column.GetInitialState(rid_span.front());
for (auto rid : rid_span) {
const int32_t bin_id = column.GetBinIdx(rid, &state);
if (any_missing && bin_id == ColumnType::kMissingId) {
if (default_left) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
} else {
if (bin_id <= split_cond) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
}
}
return {nleft_elems, nright_elems};
}
template <typename BinIdxType, bool any_missing>
void Partition(const size_t node_in_set, const size_t nid, const common::Range1d range,
const int32_t split_cond,
const ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
common::Span<size_t> left = GetLeftBuffer(node_in_set,
range.begin(), range.end());
common::Span<size_t> right = GetRightBuffer(node_in_set,
range.begin(), range.end());
const bst_uint fid = tree[nid].SplitIndex();
const bool default_left = tree[nid].DefaultLeft();
const auto column_ptr = column_matrix.GetColumn<BinIdxType, any_missing>(fid);
std::pair<size_t, size_t> child_nodes_sizes;
if (column_ptr->GetType() == xgboost::common::kDenseColumn) {
const common::DenseColumn<BinIdxType, any_missing>& column =
static_cast<const common::DenseColumn<BinIdxType, any_missing>& >(*(column_ptr.get()));
if (default_left) {
child_nodes_sizes = PartitionKernel<true, any_missing>(column, rid_span,
split_cond, left, right);
} else {
child_nodes_sizes = PartitionKernel<false, any_missing>(column, rid_span,
split_cond, left, right);
}
} else {
CHECK_EQ(any_missing, true);
const common::SparseColumn<BinIdxType>& column
= static_cast<const common::SparseColumn<BinIdxType>& >(*(column_ptr.get()));
if (default_left) {
child_nodes_sizes = PartitionKernel<true, any_missing>(column, rid_span,
split_cond, left, right);
} else {
child_nodes_sizes = PartitionKernel<false, any_missing>(column, rid_span,
split_cond, left, right);
}
}
const size_t n_left = child_nodes_sizes.first;
const size_t n_right = child_nodes_sizes.second;
SetNLeftElems(node_in_set, range.begin(), range.end(), n_left);
SetNRightElems(node_in_set, range.begin(), range.end(), n_right);
}
// allocate thread local memory, should be called for each specific task
void AllocateForTask(size_t id) {
if (mem_blocks_[id].get() == nullptr) {
BlockInfo* local_block_ptr = new BlockInfo;
CHECK_NE(local_block_ptr, (BlockInfo*)nullptr);
mem_blocks_[id].reset(local_block_ptr);
}
}
common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Left(), end - begin };
}
common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Right(), end - begin };
}
void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx)->n_left = n_left;
}
void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx)->n_right = n_right;
}
size_t GetNLeftElems(int nid) const {
return left_right_nodes_sizes_[nid].first;
}
size_t GetNRightElems(int nid) const {
return left_right_nodes_sizes_[nid].second;
}
// Each thread has partial results for some set of tree-nodes
// The function decides order of merging partial results into final row set
void CalculateRowOffsets() {
for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
size_t n_left = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
mem_blocks_[j]->n_offset_left = n_left;
n_left += mem_blocks_[j]->n_left;
}
size_t n_right = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
mem_blocks_[j]->n_offset_right = n_left + n_right;
n_right += mem_blocks_[j]->n_right;
}
left_right_nodes_sizes_[i] = {n_left, n_right};
}
}
void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
size_t task_idx = GetTaskIdx(nid, begin);
size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;
const size_t* left = mem_blocks_[task_idx]->Left();
const size_t* right = mem_blocks_[task_idx]->Right();
std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
}
size_t GetTaskIdx(int nid, size_t begin) {
return blocks_offsets_[nid] + begin / BlockSize;
}
protected:
struct BlockInfo{
size_t n_left;
size_t n_right;
size_t n_offset_left;
size_t n_offset_right;
size_t* Left() {
return &left_data_[0];
}
size_t* Right() {
return &right_data_[0];
}
private:
size_t left_data_[BlockSize];
size_t right_data_[BlockSize];
};
std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
std::vector<size_t> blocks_offsets_;
std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
size_t max_n_tasks_ = 0;
};
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_PARTITION_BUILDER_H_

View File

@ -126,130 +126,6 @@ class RowSetCollection {
std::vector<Elem> elem_of_each_node_; std::vector<Elem> elem_of_each_node_;
}; };
// The builder is required for samples partition to left and rights children for set of nodes
// Responsible for:
// 1) Effective memory allocation for intermediate results for multi-thread work
// 2) Merging partial results produced by threads into original row set (row_set_collection_)
// BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature
template<size_t BlockSize>
class PartitionBuilder {
public:
template<typename Func>
void Init(const size_t n_tasks, size_t n_nodes, Func funcNTaks) {
left_right_nodes_sizes_.resize(n_nodes);
blocks_offsets_.resize(n_nodes+1);
blocks_offsets_[0] = 0;
for (size_t i = 1; i < n_nodes+1; ++i) {
blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTaks(i-1);
}
if (n_tasks > max_n_tasks_) {
mem_blocks_.resize(n_tasks);
max_n_tasks_ = n_tasks;
}
}
// allocate thread local memory, should be called for each specific task
void AllocateForTask(size_t id) {
if (mem_blocks_[id].get() == nullptr) {
BlockInfo* local_block_ptr = new BlockInfo;
CHECK_NE(local_block_ptr, (BlockInfo*)nullptr);
mem_blocks_[id].reset(local_block_ptr);
}
}
common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Left(), end - begin };
}
common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Right(), end - begin };
}
void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx)->n_left = n_left;
}
void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx)->n_right = n_right;
}
size_t GetNLeftElems(int nid) const {
return left_right_nodes_sizes_[nid].first;
}
size_t GetNRightElems(int nid) const {
return left_right_nodes_sizes_[nid].second;
}
// Each thread has partial results for some set of tree-nodes
// The function decides order of merging partial results into final row set
void CalculateRowOffsets() {
for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
size_t n_left = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
mem_blocks_[j]->n_offset_left = n_left;
n_left += mem_blocks_[j]->n_left;
}
size_t n_right = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
mem_blocks_[j]->n_offset_right = n_left + n_right;
n_right += mem_blocks_[j]->n_right;
}
left_right_nodes_sizes_[i] = {n_left, n_right};
}
}
void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
size_t task_idx = GetTaskIdx(nid, begin);
size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;
const size_t* left = mem_blocks_[task_idx]->Left();
const size_t* right = mem_blocks_[task_idx]->Right();
std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
}
size_t GetTaskIdx(int nid, size_t begin) {
return blocks_offsets_[nid] + begin / BlockSize;
}
protected:
struct BlockInfo{
size_t n_left;
size_t n_right;
size_t n_offset_left;
size_t n_offset_right;
size_t* Left() {
return &left_data_[0];
}
size_t* Right() {
return &right_data_[0];
}
private:
size_t left_data_[BlockSize];
size_t right_data_[BlockSize];
};
std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
std::vector<size_t> blocks_offsets_;
std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
size_t max_n_tasks_ = 0;
};
} // namespace common } // namespace common
} // namespace xgboost } // namespace xgboost

View File

@ -290,6 +290,7 @@ void QuantileHistMaker::Builder<GradientSumT>::SetHistRowsAdder(
} }
template <typename GradientSumT> template <typename GradientSumT>
template <bool any_missing>
void QuantileHistMaker::Builder<GradientSumT>::InitRoot( void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
const GHistIndexMatrix &gmat, const GHistIndexMatrix &gmat,
const DMatrix& fmat, const DMatrix& fmat,
@ -307,7 +308,7 @@ void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
int sync_count = 0; int sync_count = 0;
hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree);
BuildLocalHistograms(gmat, p_tree, gpair_h); BuildLocalHistograms<any_missing>(gmat, p_tree, gpair_h);
hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree);
this->InitNewNode(CPUExpandEntry::kRootNid, gmat, gpair_h, fmat, *p_tree); this->InitNewNode(CPUExpandEntry::kRootNid, gmat, gpair_h, fmat, *p_tree);
@ -319,6 +320,7 @@ void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
} }
template<typename GradientSumT> template<typename GradientSumT>
template <bool any_missing>
void QuantileHistMaker::Builder<GradientSumT>::BuildLocalHistograms( void QuantileHistMaker::Builder<GradientSumT>::BuildLocalHistograms(
const GHistIndexMatrix &gmat, const GHistIndexMatrix &gmat,
RegTree *p_tree, RegTree *p_tree,
@ -350,7 +352,8 @@ void QuantileHistMaker::Builder<GradientSumT>::BuildLocalHistograms(
auto rid_set = RowSetCollection::Elem(start_of_row_set + r.begin(), auto rid_set = RowSetCollection::Elem(start_of_row_set + r.begin(),
start_of_row_set + r.end(), start_of_row_set + r.end(),
nid); nid);
BuildHist(gpair_h, rid_set, gmat, hist_buffer_.GetInitializedHist(tid, nid_in_set)); hist_builder_.template BuildHist<any_missing>(gpair_h, rid_set, gmat,
hist_buffer_.GetInitializedHist(tid, nid_in_set));
}); });
builder_monitor_.Stop("BuildLocalHistograms"); builder_monitor_.Stop("BuildLocalHistograms");
@ -439,6 +442,7 @@ void QuantileHistMaker::Builder<GradientSumT>::BuildNodeStats(
} }
template<typename GradientSumT> template<typename GradientSumT>
template <bool any_missing>
void QuantileHistMaker::Builder<GradientSumT>::ExpandTree( void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
const ColumnMatrix& column_matrix, const ColumnMatrix& column_matrix,
@ -450,7 +454,7 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
Driver<CPUExpandEntry> driver(static_cast<TrainParam::TreeGrowPolicy>(param_.grow_policy)); Driver<CPUExpandEntry> driver(static_cast<TrainParam::TreeGrowPolicy>(param_.grow_policy));
std::vector<CPUExpandEntry> expand; std::vector<CPUExpandEntry> expand;
InitRoot(gmat, *p_fmat, p_tree, gpair_h, &num_leaves, &expand); InitRoot<any_missing>(gmat, *p_fmat, p_tree, gpair_h, &num_leaves, &expand);
driver.Push(expand[0]); driver.Push(expand[0]);
int depth = 0; int depth = 0;
@ -465,14 +469,14 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
AddSplitsToTree(expand, p_tree, &num_leaves, &nodes_for_apply_split); AddSplitsToTree(expand, p_tree, &num_leaves, &nodes_for_apply_split);
if (nodes_for_apply_split.size() != 0) { if (nodes_for_apply_split.size() != 0) {
ApplySplit(nodes_for_apply_split, gmat, column_matrix, hist_, p_tree); ApplySplit<any_missing>(nodes_for_apply_split, gmat, column_matrix, hist_, p_tree);
SplitSiblings(nodes_for_apply_split, &nodes_to_evaluate, p_tree); SplitSiblings(nodes_for_apply_split, &nodes_to_evaluate, p_tree);
int starting_index = std::numeric_limits<int>::max(); int starting_index = std::numeric_limits<int>::max();
int sync_count = 0; int sync_count = 0;
hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree);
if (depth < param_.max_depth) { if (depth < param_.max_depth) {
BuildLocalHistograms(gmat, p_tree, gpair_h); BuildLocalHistograms<any_missing>(gmat, p_tree, gpair_h);
hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree);
} }
@ -520,8 +524,11 @@ void QuantileHistMaker::Builder<GradientSumT>::Update(
this->InitData(gmat, *p_fmat, *p_tree, gpair_ptr); this->InitData(gmat, *p_fmat, *p_tree, gpair_ptr);
ExpandTree(gmat, column_matrix, p_fmat, p_tree, *gpair_ptr); if (column_matrix.AnyMissing()) {
ExpandTree<true>(gmat, column_matrix, p_fmat, p_tree, *gpair_ptr);
} else {
ExpandTree<false>(gmat, column_matrix, p_fmat, p_tree, *gpair_ptr);
}
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg;
p_tree->Stat(nid).base_weight = snode_[nid].weight; p_tree->Stat(nid).base_weight = snode_[nid].weight;
@ -867,165 +874,6 @@ void QuantileHistMaker::Builder<GradientSumT>::EvaluateSplits(
builder_monitor_.Stop("EvaluateSplits"); builder_monitor_.Stop("EvaluateSplits");
} }
// split row indexes (rid_span) to 2 parts (left_part, right_part) depending
// on comparison of indexes values (idx_span) and split point (split_cond)
// Handle dense columns
// Analog of std::stable_partition, but in no-inplace manner
template <bool default_left, bool any_missing, typename BinIdxType>
inline std::pair<size_t, size_t> PartitionDenseKernel(const common::DenseColumn<BinIdxType>& column,
common::Span<const size_t> rid_span, const int32_t split_cond,
common::Span<size_t> left_part, common::Span<size_t> right_part) {
const int32_t offset = column.GetBaseIdx();
const BinIdxType* idx = column.GetFeatureBinIdxPtr().data();
size_t* p_left_part = left_part.data();
size_t* p_right_part = right_part.data();
size_t nleft_elems = 0;
size_t nright_elems = 0;
if (any_missing) {
for (auto rid : rid_span) {
if (column.IsMissing(rid)) {
if (default_left) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
} else {
if ((static_cast<int32_t>(idx[rid]) + offset) <= split_cond) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
}
}
} else {
for (auto rid : rid_span) {
if ((static_cast<int32_t>(idx[rid]) + offset) <= split_cond) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
}
}
return {nleft_elems, nright_elems};
}
// Split row indexes (rid_span) to 2 parts (left_part, right_part) depending
// on comparison of indexes values (idx_span) and split point (split_cond).
// Handle sparse columns
template<bool default_left, typename BinIdxType>
inline std::pair<size_t, size_t> PartitionSparseKernel(
const common::SparseColumn<BinIdxType>& column,
common::Span<const size_t> rid_span, const int32_t split_cond,
common::Span<size_t> left_part, common::Span<size_t> right_part) {
size_t* p_left_part = left_part.data();
size_t* p_right_part = right_part.data();
size_t nleft_elems = 0;
size_t nright_elems = 0;
const size_t* row_data = column.GetRowData();
const size_t column_size = column.Size();
if (rid_span.size()) { // ensure that rid_span is nonempty range
// search first nonzero row with index >= rid_span.front()
const size_t* p = std::lower_bound(row_data, row_data + column_size,
rid_span.front());
if (p != row_data + column_size && *p <= rid_span.back()) {
size_t cursor = p - row_data;
for (auto rid : rid_span) {
while (cursor < column_size
&& column.GetRowIdx(cursor) < rid
&& column.GetRowIdx(cursor) <= rid_span.back()) {
++cursor;
}
if (cursor < column_size && column.GetRowIdx(cursor) == rid) {
if (static_cast<int32_t>(column.GetGlobalBinIdx(cursor)) <= split_cond) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
++cursor;
} else {
// missing value
if (default_left) {
p_left_part[nleft_elems++] = rid;
} else {
p_right_part[nright_elems++] = rid;
}
}
}
} else { // all rows in rid_span have missing values
if (default_left) {
std::copy(rid_span.begin(), rid_span.end(), p_left_part);
nleft_elems = rid_span.size();
} else {
std::copy(rid_span.begin(), rid_span.end(), p_right_part);
nright_elems = rid_span.size();
}
}
}
return {nleft_elems, nright_elems};
}
template <typename GradientSumT>
template <typename BinIdxType>
void QuantileHistMaker::Builder<GradientSumT>::PartitionKernel(
const size_t node_in_set, const size_t nid, const common::Range1d range,
const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree) {
const size_t* rid = row_set_collection_[nid].begin;
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
common::Span<size_t> left = partition_builder_.GetLeftBuffer(node_in_set,
range.begin(), range.end());
common::Span<size_t> right = partition_builder_.GetRightBuffer(node_in_set,
range.begin(), range.end());
const bst_uint fid = tree[nid].SplitIndex();
const bool default_left = tree[nid].DefaultLeft();
const auto column_ptr = column_matrix.GetColumn<BinIdxType>(fid);
std::pair<size_t, size_t> child_nodes_sizes;
if (column_ptr->GetType() == xgboost::common::kDenseColumn) {
const common::DenseColumn<BinIdxType>& column =
static_cast<const common::DenseColumn<BinIdxType>& >(*(column_ptr.get()));
if (default_left) {
if (column_matrix.AnyMissing()) {
child_nodes_sizes = PartitionDenseKernel<true, true>(column, rid_span,
split_cond, left, right);
} else {
child_nodes_sizes = PartitionDenseKernel<true, false>(column, rid_span,
split_cond, left, right);
}
} else {
if (column_matrix.AnyMissing()) {
child_nodes_sizes = PartitionDenseKernel<false, true>(column, rid_span,
split_cond, left, right);
} else {
child_nodes_sizes = PartitionDenseKernel<false, false>(column, rid_span,
split_cond, left, right);
}
}
} else {
const common::SparseColumn<BinIdxType>& column
= static_cast<const common::SparseColumn<BinIdxType>& >(*(column_ptr.get()));
if (default_left) {
child_nodes_sizes = PartitionSparseKernel<true>(column, rid_span,
split_cond, left, right);
} else {
child_nodes_sizes = PartitionSparseKernel<false>(column, rid_span,
split_cond, left, right);
}
}
const size_t n_left = child_nodes_sizes.first;
const size_t n_right = child_nodes_sizes.second;
partition_builder_.SetNLeftElems(node_in_set, range.begin(), range.end(), n_left);
partition_builder_.SetNRightElems(node_in_set, range.begin(), range.end(), n_right);
}
template <typename GradientSumT> template <typename GradientSumT>
void QuantileHistMaker::Builder<GradientSumT>::FindSplitConditions( void QuantileHistMaker::Builder<GradientSumT>::FindSplitConditions(
const std::vector<CPUExpandEntry>& nodes, const std::vector<CPUExpandEntry>& nodes,
@ -1070,6 +918,7 @@ void QuantileHistMaker::Builder<GradientSumT>::AddSplitsToRowSet(
} }
template <typename GradientSumT> template <typename GradientSumT>
template <bool any_missing>
void QuantileHistMaker::Builder<GradientSumT>::ApplySplit(const std::vector<CPUExpandEntry> nodes, void QuantileHistMaker::Builder<GradientSumT>::ApplySplit(const std::vector<CPUExpandEntry> nodes,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
const ColumnMatrix& column_matrix, const ColumnMatrix& column_matrix,
@ -1102,16 +951,19 @@ void QuantileHistMaker::Builder<GradientSumT>::ApplySplit(const std::vector<CPUE
partition_builder_.AllocateForTask(task_id); partition_builder_.AllocateForTask(task_id);
switch (column_matrix.GetTypeSize()) { switch (column_matrix.GetTypeSize()) {
case common::kUint8BinsTypeSize: case common::kUint8BinsTypeSize:
PartitionKernel<uint8_t>(node_in_set, nid, r, partition_builder_.template Partition<uint8_t, any_missing>(node_in_set, nid, r,
split_conditions[node_in_set], column_matrix, *p_tree); split_conditions[node_in_set], column_matrix,
*p_tree, row_set_collection_[nid].begin);
break; break;
case common::kUint16BinsTypeSize: case common::kUint16BinsTypeSize:
PartitionKernel<uint16_t>(node_in_set, nid, r, partition_builder_.template Partition<uint16_t, any_missing>(node_in_set, nid, r,
split_conditions[node_in_set], column_matrix, *p_tree); split_conditions[node_in_set], column_matrix,
*p_tree, row_set_collection_[nid].begin);
break; break;
case common::kUint32BinsTypeSize: case common::kUint32BinsTypeSize:
PartitionKernel<uint32_t>(node_in_set, nid, r, partition_builder_.template Partition<uint32_t, any_missing>(node_in_set, nid, r,
split_conditions[node_in_set], column_matrix, *p_tree); split_conditions[node_in_set], column_matrix,
*p_tree, row_set_collection_[nid].begin);
break; break;
default: default:
CHECK(false); // no default behavior CHECK(false); // no default behavior
@ -1268,24 +1120,6 @@ GradStats QuantileHistMaker::Builder<GradientSumT>::EnumerateSplit(
template struct QuantileHistMaker::Builder<float>; template struct QuantileHistMaker::Builder<float>;
template struct QuantileHistMaker::Builder<double>; template struct QuantileHistMaker::Builder<double>;
template void QuantileHistMaker::Builder<float>::PartitionKernel<uint8_t>(
const size_t node_in_set, const size_t nid, common::Range1d range,
const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree);
template void QuantileHistMaker::Builder<float>::PartitionKernel<uint16_t>(
const size_t node_in_set, const size_t nid, common::Range1d range,
const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree);
template void QuantileHistMaker::Builder<float>::PartitionKernel<uint32_t>(
const size_t node_in_set, const size_t nid, common::Range1d range,
const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree);
template void QuantileHistMaker::Builder<double>::PartitionKernel<uint8_t>(
const size_t node_in_set, const size_t nid, common::Range1d range,
const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree);
template void QuantileHistMaker::Builder<double>::PartitionKernel<uint16_t>(
const size_t node_in_set, const size_t nid, common::Range1d range,
const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree);
template void QuantileHistMaker::Builder<double>::PartitionKernel<uint32_t>(
const size_t node_in_set, const size_t nid, common::Range1d range,
const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree);
XGBOOST_REGISTER_TREE_UPDATER(FastHistMaker, "grow_fast_histmaker") XGBOOST_REGISTER_TREE_UPDATER(FastHistMaker, "grow_fast_histmaker")
.describe("(Deprecated, use grow_quantile_histmaker instead.)" .describe("(Deprecated, use grow_quantile_histmaker instead.)"

View File

@ -28,6 +28,7 @@
#include "../common/timer.h" #include "../common/timer.h"
#include "../common/hist_util.h" #include "../common/hist_util.h"
#include "../common/row_set.h" #include "../common/row_set.h"
#include "../common/partition_builder.h"
#include "../common/column_matrix.h" #include "../common/column_matrix.h"
namespace xgboost { namespace xgboost {
@ -291,14 +292,6 @@ class QuantileHistMaker: public TreeUpdater {
DMatrix* p_fmat, DMatrix* p_fmat,
RegTree* p_tree); RegTree* p_tree);
inline void BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRowT hist) {
hist_builder_.BuildHist(gpair, row_indices, gmat, hist,
data_layout_ != DataLayout::kSparseData);
}
inline void SubtractionTrick(GHistRowT self, inline void SubtractionTrick(GHistRowT self,
GHistRowT sibling, GHistRowT sibling,
GHistRowT parent) { GHistRowT parent) {
@ -338,17 +331,13 @@ class QuantileHistMaker: public TreeUpdater {
const HistCollection<GradientSumT>& hist, const HistCollection<GradientSumT>& hist,
const RegTree& tree); const RegTree& tree);
template <bool any_missing>
void ApplySplit(std::vector<CPUExpandEntry> nodes, void ApplySplit(std::vector<CPUExpandEntry> nodes,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
const ColumnMatrix& column_matrix, const ColumnMatrix& column_matrix,
const HistCollection<GradientSumT>& hist, const HistCollection<GradientSumT>& hist,
RegTree* p_tree); RegTree* p_tree);
template <typename BinIdxType>
void PartitionKernel(const size_t node_in_set, const size_t nid, const common::Range1d range,
const int32_t split_cond,
const ColumnMatrix& column_matrix, const RegTree& tree);
void AddSplitsToRowSet(const std::vector<CPUExpandEntry>& nodes, RegTree* p_tree); void AddSplitsToRowSet(const std::vector<CPUExpandEntry>& nodes, RegTree* p_tree);
@ -376,10 +365,11 @@ class QuantileHistMaker: public TreeUpdater {
// else - there are missing values // else - there are missing values
bool SplitContainsMissingValues(const GradStats e, const NodeEntry& snode); bool SplitContainsMissingValues(const GradStats e, const NodeEntry& snode);
template <bool any_missing>
void BuildLocalHistograms(const GHistIndexMatrix &gmat, void BuildLocalHistograms(const GHistIndexMatrix &gmat,
RegTree *p_tree, RegTree *p_tree,
const std::vector<GradientPair> &gpair_h); const std::vector<GradientPair> &gpair_h);
template <bool any_missing>
void InitRoot(const GHistIndexMatrix &gmat, void InitRoot(const GHistIndexMatrix &gmat,
const DMatrix& fmat, const DMatrix& fmat,
RegTree *p_tree, RegTree *p_tree,
@ -402,7 +392,7 @@ class QuantileHistMaker: public TreeUpdater {
const DMatrix& fmat, const DMatrix& fmat,
const std::vector<GradientPair> &gpair_h, const std::vector<GradientPair> &gpair_h,
const std::vector<CPUExpandEntry>& nodes_for_apply_split, RegTree *p_tree); const std::vector<CPUExpandEntry>& nodes_for_apply_split, RegTree *p_tree);
template <bool any_missing>
void ExpandTree(const GHistIndexMatrix& gmat, void ExpandTree(const GHistIndexMatrix& gmat,
const ColumnMatrix& column_matrix, const ColumnMatrix& column_matrix,
DMatrix* p_fmat, DMatrix* p_fmat,

View File

@ -23,19 +23,19 @@ TEST(DenseColumn, Test) {
for (auto j = 0ull; j < dmat->Info().num_col_; j++) { for (auto j = 0ull; j < dmat->Info().num_col_; j++) {
switch (column_matrix.GetTypeSize()) { switch (column_matrix.GetTypeSize()) {
case kUint8BinsTypeSize: { case kUint8BinsTypeSize: {
auto col = column_matrix.GetColumn<uint8_t>(j); auto col = column_matrix.GetColumn<uint8_t, false>(j);
ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j], ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j],
(*col.get()).GetGlobalBinIdx(i)); (*col.get()).GetGlobalBinIdx(i));
} }
break; break;
case kUint16BinsTypeSize: { case kUint16BinsTypeSize: {
auto col = column_matrix.GetColumn<uint16_t>(j); auto col = column_matrix.GetColumn<uint16_t, false>(j);
ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j], ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j],
(*col.get()).GetGlobalBinIdx(i)); (*col.get()).GetGlobalBinIdx(i));
} }
break; break;
case kUint32BinsTypeSize: { case kUint32BinsTypeSize: {
auto col = column_matrix.GetColumn<uint32_t>(j); auto col = column_matrix.GetColumn<uint32_t, false>(j);
ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j], ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j],
(*col.get()).GetGlobalBinIdx(i)); (*col.get()).GetGlobalBinIdx(i));
} }
@ -68,17 +68,17 @@ TEST(SparseColumn, Test) {
column_matrix.Init(gmat, 0.5); column_matrix.Init(gmat, 0.5);
switch (column_matrix.GetTypeSize()) { switch (column_matrix.GetTypeSize()) {
case kUint8BinsTypeSize: { case kUint8BinsTypeSize: {
auto col = column_matrix.GetColumn<uint8_t>(0); auto col = column_matrix.GetColumn<uint8_t, true>(0);
CheckSparseColumn(*col.get(), gmat); CheckSparseColumn(*col.get(), gmat);
} }
break; break;
case kUint16BinsTypeSize: { case kUint16BinsTypeSize: {
auto col = column_matrix.GetColumn<uint16_t>(0); auto col = column_matrix.GetColumn<uint16_t, true>(0);
CheckSparseColumn(*col.get(), gmat); CheckSparseColumn(*col.get(), gmat);
} }
break; break;
case kUint32BinsTypeSize: { case kUint32BinsTypeSize: {
auto col = column_matrix.GetColumn<uint32_t>(0); auto col = column_matrix.GetColumn<uint32_t, true>(0);
CheckSparseColumn(*col.get(), gmat); CheckSparseColumn(*col.get(), gmat);
} }
break; break;
@ -89,7 +89,7 @@ TEST(SparseColumn, Test) {
template<typename BinIdxType> template<typename BinIdxType>
inline void CheckColumWithMissingValue(const Column<BinIdxType>& col_input, inline void CheckColumWithMissingValue(const Column<BinIdxType>& col_input,
const GHistIndexMatrix& gmat) { const GHistIndexMatrix& gmat) {
const DenseColumn<BinIdxType>& col = static_cast<const DenseColumn<BinIdxType>& >(col_input); const DenseColumn<BinIdxType, true>& col = static_cast<const DenseColumn<BinIdxType, true>& >(col_input);
for (auto i = 0ull; i < col.Size(); i++) { for (auto i = 0ull; i < col.Size(); i++) {
if (col.IsMissing(i)) continue; if (col.IsMissing(i)) continue;
EXPECT_EQ(gmat.index[gmat.row_ptr[i]], EXPECT_EQ(gmat.index[gmat.row_ptr[i]],
@ -109,17 +109,17 @@ TEST(DenseColumnWithMissing, Test) {
column_matrix.Init(gmat, 0.2); column_matrix.Init(gmat, 0.2);
switch (column_matrix.GetTypeSize()) { switch (column_matrix.GetTypeSize()) {
case kUint8BinsTypeSize: { case kUint8BinsTypeSize: {
auto col = column_matrix.GetColumn<uint8_t>(0); auto col = column_matrix.GetColumn<uint8_t, true>(0);
CheckColumWithMissingValue(*col.get(), gmat); CheckColumWithMissingValue(*col.get(), gmat);
} }
break; break;
case kUint16BinsTypeSize: { case kUint16BinsTypeSize: {
auto col = column_matrix.GetColumn<uint16_t>(0); auto col = column_matrix.GetColumn<uint16_t, true>(0);
CheckColumWithMissingValue(*col.get(), gmat); CheckColumWithMissingValue(*col.get(), gmat);
} }
break; break;
case kUint32BinsTypeSize: { case kUint32BinsTypeSize: {
auto col = column_matrix.GetColumn<uint32_t>(0); auto col = column_matrix.GetColumn<uint32_t, true>(0);
CheckColumWithMissingValue(*col.get(), gmat); CheckColumWithMissingValue(*col.get(), gmat);
} }
break; break;

View File

@ -4,6 +4,7 @@
#include <utility> #include <utility>
#include "../../../src/common/row_set.h" #include "../../../src/common/row_set.h"
#include "../../../src/common/partition_builder.h"
#include "../helpers.h" #include "../helpers.h"
namespace xgboost { namespace xgboost {

View File

@ -309,7 +309,7 @@ class QuantileHistMock : public QuantileHistMaker {
RealImpl::InitData(gmat, fmat, tree, &gpair); RealImpl::InitData(gmat, fmat, tree, &gpair);
this->hist_.AddHistRow(nid); this->hist_.AddHistRow(nid);
this->hist_.AllocateAllData(); this->hist_.AllocateAllData();
this->BuildHist(gpair, this->row_set_collection_[nid], this->hist_builder_.template BuildHist<true>(gpair, this->row_set_collection_[nid],
gmat, this->hist_[nid]); gmat, this->hist_[nid]);
// Check if number of histogram bins is correct // Check if number of histogram bins is correct
@ -350,7 +350,7 @@ class QuantileHistMock : public QuantileHistMaker {
RealImpl::InitData(gmat, *dmat, tree, &row_gpairs); RealImpl::InitData(gmat, *dmat, tree, &row_gpairs);
this->hist_.AddHistRow(0); this->hist_.AddHistRow(0);
this->hist_.AllocateAllData(); this->hist_.AllocateAllData();
this->BuildHist(row_gpairs, this->row_set_collection_[0], this->hist_builder_.template BuildHist<false>(row_gpairs, this->row_set_collection_[0],
gmat, this->hist_[0]); gmat, this->hist_[0]);
RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree); RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree);
@ -482,8 +482,13 @@ class QuantileHistMock : public QuantileHistMaker {
}); });
const size_t task_id = RealImpl::partition_builder_.GetTaskIdx(0, 0); const size_t task_id = RealImpl::partition_builder_.GetTaskIdx(0, 0);
RealImpl::partition_builder_.AllocateForTask(task_id); RealImpl::partition_builder_.AllocateForTask(task_id);
this->template PartitionKernel<uint8_t>(0, 0, common::Range1d(0, kNRows), if (cm.AnyMissing()) {
split, cm, tree); RealImpl::partition_builder_.template Partition<uint8_t, true>(0, 0, common::Range1d(0, kNRows),
split, cm, tree, this->row_set_collection_[0].begin);
} else {
RealImpl::partition_builder_.template Partition<uint8_t, false>(0, 0, common::Range1d(0, kNRows),
split, cm, tree, this->row_set_collection_[0].begin);
}
RealImpl::partition_builder_.CalculateRowOffsets(); RealImpl::partition_builder_.CalculateRowOffsets();
ASSERT_EQ(RealImpl::partition_builder_.GetNLeftElems(0), left_cnt); ASSERT_EQ(RealImpl::partition_builder_.GetNLeftElems(0), left_cnt);
ASSERT_EQ(RealImpl::partition_builder_.GetNRightElems(0), right_cnt); ASSERT_EQ(RealImpl::partition_builder_.GetNRightElems(0), right_cnt);