Simplify sparse and dense CPU hist kernels (#7029)
* Simplify sparse and dense kernels * Extract row partitioner. Co-authored-by: Kirill Shvets <kirill.shvets@intel.com>
This commit is contained in:
@@ -287,57 +287,18 @@ struct Prefetch {
|
||||
constexpr size_t Prefetch::kNoPrefetchSize;
|
||||
|
||||
|
||||
template<typename FPType, bool do_prefetch, typename BinIdxType>
|
||||
void BuildHistDenseKernel(const std::vector<GradientPair>& gpair,
|
||||
template<typename FPType, bool do_prefetch, typename BinIdxType, bool any_missing = true>
|
||||
void BuildHistKernel(const std::vector<GradientPair>& gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat,
|
||||
const size_t n_features,
|
||||
GHistRow<FPType> hist) {
|
||||
const size_t size = row_indices.Size();
|
||||
const size_t* rid = row_indices.begin;
|
||||
const float* pgh = reinterpret_cast<const float*>(gpair.data());
|
||||
const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
|
||||
const uint32_t* offsets = gmat.index.Offset();
|
||||
FPType* hist_data = reinterpret_cast<FPType*>(hist.data());
|
||||
const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains
|
||||
// 2 FP values: gradient and hessian.
|
||||
// So we need to multiply each row-index/bin-index by 2
|
||||
// to work with gradient pairs as a singe row FP array
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
const size_t icol_start = rid[i] * n_features;
|
||||
const size_t idx_gh = two * rid[i];
|
||||
|
||||
if (do_prefetch) {
|
||||
const size_t icol_start_prefetch = rid[i + Prefetch::kPrefetchOffset] * n_features;
|
||||
|
||||
PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
|
||||
for (size_t j = icol_start_prefetch; j < icol_start_prefetch + n_features;
|
||||
j += Prefetch::GetPrefetchStep<BinIdxType>()) {
|
||||
PREFETCH_READ_T0(gradient_index + j);
|
||||
}
|
||||
}
|
||||
const BinIdxType* gr_index_local = gradient_index + icol_start;
|
||||
for (size_t j = 0; j < n_features; ++j) {
|
||||
const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[j]) +
|
||||
offsets[j]);
|
||||
|
||||
hist_data[idx_bin] += pgh[idx_gh];
|
||||
hist_data[idx_bin+1] += pgh[idx_gh+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FPType, bool do_prefetch>
|
||||
void BuildHistSparseKernel(const std::vector<GradientPair>& gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat,
|
||||
GHistRow<FPType> hist) {
|
||||
const size_t size = row_indices.Size();
|
||||
const size_t* rid = row_indices.begin;
|
||||
const float* pgh = reinterpret_cast<const float*>(gpair.data());
|
||||
const uint32_t* gradient_index = gmat.index.data<uint32_t>();
|
||||
const size_t* row_ptr = gmat.row_ptr.data();
|
||||
const uint32_t* offsets = gmat.index.Offset();
|
||||
const size_t n_features = row_ptr[row_indices.begin[0]+1] - row_ptr[row_indices.begin[0]];
|
||||
FPType* hist_data = reinterpret_cast<FPType*>(hist.data());
|
||||
const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains
|
||||
// 2 FP values: gradient and hessian.
|
||||
@@ -345,13 +306,16 @@ void BuildHistSparseKernel(const std::vector<GradientPair>& gpair,
|
||||
// to work with gradient pairs as a singe row FP array
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
const size_t icol_start = row_ptr[rid[i]];
|
||||
const size_t icol_end = row_ptr[rid[i]+1];
|
||||
const size_t icol_start = any_missing ? row_ptr[rid[i]] : rid[i] * n_features;
|
||||
const size_t icol_end = any_missing ? row_ptr[rid[i]+1] : icol_start + n_features;
|
||||
const size_t row_size = icol_end - icol_start;
|
||||
const size_t idx_gh = two * rid[i];
|
||||
|
||||
if (do_prefetch) {
|
||||
const size_t icol_start_prftch = row_ptr[rid[i+Prefetch::kPrefetchOffset]];
|
||||
const size_t icol_end_prefect = row_ptr[rid[i+Prefetch::kPrefetchOffset]+1];
|
||||
const size_t icol_start_prftch = any_missing ? row_ptr[rid[i+Prefetch::kPrefetchOffset]] :
|
||||
rid[i + Prefetch::kPrefetchOffset] * n_features;
|
||||
const size_t icol_end_prefect = any_missing ? row_ptr[rid[i+Prefetch::kPrefetchOffset]+1] :
|
||||
icol_start_prftch + n_features;
|
||||
|
||||
PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
|
||||
for (size_t j = icol_start_prftch; j < icol_end_prefect;
|
||||
@@ -359,47 +323,34 @@ void BuildHistSparseKernel(const std::vector<GradientPair>& gpair,
|
||||
PREFETCH_READ_T0(gradient_index + j);
|
||||
}
|
||||
}
|
||||
for (size_t j = icol_start; j < icol_end; ++j) {
|
||||
const uint32_t idx_bin = two * gradient_index[j];
|
||||
const BinIdxType* gr_index_local = gradient_index + icol_start;
|
||||
|
||||
for (size_t j = 0; j < row_size; ++j) {
|
||||
const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[j]) + (
|
||||
any_missing ? 0 : offsets[j]));
|
||||
|
||||
hist_data[idx_bin] += pgh[idx_gh];
|
||||
hist_data[idx_bin+1] += pgh[idx_gh+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename FPType, bool do_prefetch, typename BinIdxType>
|
||||
void BuildHistDispatchKernel(const std::vector<GradientPair>& gpair,
|
||||
template<typename FPType, bool do_prefetch, bool any_missing>
|
||||
void BuildHistDispatch(const std::vector<GradientPair>& gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat, GHistRow<FPType> hist, bool isDense) {
|
||||
if (isDense) {
|
||||
const size_t* row_ptr = gmat.row_ptr.data();
|
||||
const size_t n_features = row_ptr[row_indices.begin[0]+1] - row_ptr[row_indices.begin[0]];
|
||||
BuildHistDenseKernel<FPType, do_prefetch, BinIdxType>(gpair, row_indices,
|
||||
gmat, n_features, hist);
|
||||
} else {
|
||||
BuildHistSparseKernel<FPType, do_prefetch>(gpair, row_indices,
|
||||
gmat, hist);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FPType, bool do_prefetch>
|
||||
void BuildHistKernel(const std::vector<GradientPair>& gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat, const bool isDense, GHistRow<FPType> hist) {
|
||||
const bool is_dense = row_indices.Size() && isDense;
|
||||
const GHistIndexMatrix& gmat, GHistRow<FPType> hist) {
|
||||
switch (gmat.index.GetBinTypeSize()) {
|
||||
case kUint8BinsTypeSize:
|
||||
BuildHistDispatchKernel<FPType, do_prefetch, uint8_t>(gpair, row_indices,
|
||||
gmat, hist, is_dense);
|
||||
BuildHistKernel<FPType, do_prefetch, uint8_t, any_missing>(gpair, row_indices,
|
||||
gmat, hist);
|
||||
break;
|
||||
case kUint16BinsTypeSize:
|
||||
BuildHistDispatchKernel<FPType, do_prefetch, uint16_t>(gpair, row_indices,
|
||||
gmat, hist, is_dense);
|
||||
BuildHistKernel<FPType, do_prefetch, uint16_t, any_missing>(gpair, row_indices,
|
||||
gmat, hist);
|
||||
break;
|
||||
case kUint32BinsTypeSize:
|
||||
BuildHistDispatchKernel<FPType, do_prefetch, uint32_t>(gpair, row_indices,
|
||||
gmat, hist, is_dense);
|
||||
BuildHistKernel<FPType, do_prefetch, uint32_t, any_missing>(gpair, row_indices,
|
||||
gmat, hist);
|
||||
break;
|
||||
default:
|
||||
CHECK(false); // no default behavior
|
||||
@@ -407,10 +358,12 @@ void BuildHistKernel(const std::vector<GradientPair>& gpair,
|
||||
}
|
||||
|
||||
template <typename GradientSumT>
|
||||
template <bool any_missing>
|
||||
void GHistBuilder<GradientSumT>::BuildHist(
|
||||
const std::vector<GradientPair> &gpair,
|
||||
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
|
||||
GHistRowT hist, bool isDense) {
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix &gmat,
|
||||
GHistRowT hist) {
|
||||
const size_t nrows = row_indices.Size();
|
||||
const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);
|
||||
|
||||
@@ -419,28 +372,36 @@ void GHistBuilder<GradientSumT>::BuildHist(
|
||||
|
||||
if (contiguousBlock) {
|
||||
// contiguous memory access, built-in HW prefetching is enough
|
||||
BuildHistKernel<GradientSumT, false>(gpair, row_indices, gmat, isDense, hist);
|
||||
BuildHistDispatch<GradientSumT, false, any_missing>(gpair, row_indices, gmat, hist);
|
||||
} else {
|
||||
const RowSetCollection::Elem span1(row_indices.begin, row_indices.end - no_prefetch_size);
|
||||
const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size, row_indices.end);
|
||||
|
||||
BuildHistKernel<GradientSumT, true>(gpair, span1, gmat, isDense, hist);
|
||||
BuildHistDispatch<GradientSumT, true, any_missing>(gpair, span1, gmat, hist);
|
||||
// no prefetching to avoid loading extra memory
|
||||
BuildHistKernel<GradientSumT, false>(gpair, span2, gmat, isDense, hist);
|
||||
BuildHistDispatch<GradientSumT, false, any_missing>(gpair, span2, gmat, hist);
|
||||
}
|
||||
}
|
||||
template
|
||||
void GHistBuilder<float>::BuildHist(const std::vector<GradientPair>& gpair,
|
||||
void GHistBuilder<float>::BuildHist<true>(const std::vector<GradientPair>& gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat,
|
||||
GHistRow<float> hist,
|
||||
bool isDense);
|
||||
GHistRow<float> hist);
|
||||
template
|
||||
void GHistBuilder<double>::BuildHist(const std::vector<GradientPair>& gpair,
|
||||
void GHistBuilder<float>::BuildHist<false>(const std::vector<GradientPair>& gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat,
|
||||
GHistRow<double> hist,
|
||||
bool isDense);
|
||||
GHistRow<float> hist);
|
||||
template
|
||||
void GHistBuilder<double>::BuildHist<true>(const std::vector<GradientPair>& gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat,
|
||||
GHistRow<double> hist);
|
||||
template
|
||||
void GHistBuilder<double>::BuildHist<false>(const std::vector<GradientPair>& gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat,
|
||||
GHistRow<double> hist);
|
||||
|
||||
template<typename GradientSumT>
|
||||
void GHistBuilder<GradientSumT>::SubtractionTrick(GHistRowT self,
|
||||
|
||||
Reference in New Issue
Block a user