Small cleanup for histogram routines. (#9427)
* Small cleanup for histogram routines. - Extract hist train param from GPU hist. - Make histogram const after construction. - Unify parameter names.
This commit is contained in:
@@ -81,11 +81,11 @@ void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end) {
|
||||
/*!
|
||||
* \brief Increment hist as dst += add in range [begin, end)
|
||||
*/
|
||||
void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end) {
|
||||
double* pdst = reinterpret_cast<double*>(dst.data());
|
||||
void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end) {
|
||||
double *pdst = reinterpret_cast<double *>(dst.data());
|
||||
const double *padd = reinterpret_cast<const double *>(add.data());
|
||||
|
||||
for (size_t i = 2 * begin; i < 2 * end; ++i) {
|
||||
for (std::size_t i = 2 * begin; i < 2 * end; ++i) {
|
||||
pdst[i] += padd[i];
|
||||
}
|
||||
}
|
||||
@@ -207,18 +207,23 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
|
||||
|
||||
const size_t size = row_indices.Size();
|
||||
const size_t *rid = row_indices.begin;
|
||||
auto const *pgh = reinterpret_cast<const float *>(gpair.data());
|
||||
auto const *p_gpair = reinterpret_cast<const float *>(gpair.data());
|
||||
const BinIdxType *gradient_index = gmat.index.data<BinIdxType>();
|
||||
|
||||
auto const &row_ptr = gmat.row_ptr.data();
|
||||
auto base_rowid = gmat.base_rowid;
|
||||
const uint32_t *offsets = gmat.index.Offset();
|
||||
auto get_row_ptr = [&](size_t ridx) {
|
||||
uint32_t const *offsets = gmat.index.Offset();
|
||||
// There's no feature-based compression if missing value is present.
|
||||
if (kAnyMissing) {
|
||||
CHECK(!offsets);
|
||||
} else {
|
||||
CHECK(offsets);
|
||||
}
|
||||
|
||||
auto get_row_ptr = [&](bst_row_t ridx) {
|
||||
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
|
||||
};
|
||||
auto get_rid = [&](size_t ridx) {
|
||||
return kFirstPage ? ridx : (ridx - base_rowid);
|
||||
};
|
||||
auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
|
||||
|
||||
const size_t n_features =
|
||||
get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
|
||||
@@ -228,7 +233,7 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
|
||||
// So we need to multiply each row-index/bin-index by 2
|
||||
// to work with gradient pairs as a singe row FP array
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
for (std::size_t i = 0; i < size; ++i) {
|
||||
const size_t icol_start =
|
||||
kAnyMissing ? get_row_ptr(rid[i]) : get_rid(rid[i]) * n_features;
|
||||
const size_t icol_end =
|
||||
@@ -246,7 +251,7 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
|
||||
kAnyMissing ? get_row_ptr(rid[i + Prefetch::kPrefetchOffset] + 1)
|
||||
: icol_start_prefetch + n_features;
|
||||
|
||||
PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
|
||||
PREFETCH_READ_T0(p_gpair + two * rid[i + Prefetch::kPrefetchOffset]);
|
||||
for (size_t j = icol_start_prefetch; j < icol_end_prefetch;
|
||||
j += Prefetch::GetPrefetchStep<uint32_t>()) {
|
||||
PREFETCH_READ_T0(gradient_index + j);
|
||||
@@ -255,12 +260,12 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
|
||||
const BinIdxType *gr_index_local = gradient_index + icol_start;
|
||||
|
||||
// The trick with pgh_t buffer helps the compiler to generate faster binary.
|
||||
const float pgh_t[] = {pgh[idx_gh], pgh[idx_gh + 1]};
|
||||
const float pgh_t[] = {p_gpair[idx_gh], p_gpair[idx_gh + 1]};
|
||||
for (size_t j = 0; j < row_size; ++j) {
|
||||
const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[j]) +
|
||||
(kAnyMissing ? 0 : offsets[j]));
|
||||
const uint32_t idx_bin =
|
||||
two * (static_cast<uint32_t>(gr_index_local[j]) + (kAnyMissing ? 0 : offsets[j]));
|
||||
auto hist_local = hist_data + idx_bin;
|
||||
*(hist_local) += pgh_t[0];
|
||||
*(hist_local) += pgh_t[0];
|
||||
*(hist_local + 1) += pgh_t[1];
|
||||
}
|
||||
}
|
||||
@@ -281,12 +286,10 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair,
|
||||
auto const &row_ptr = gmat.row_ptr.data();
|
||||
auto base_rowid = gmat.base_rowid;
|
||||
const uint32_t *offsets = gmat.index.Offset();
|
||||
auto get_row_ptr = [&](size_t ridx) {
|
||||
auto get_row_ptr = [&](bst_row_t ridx) {
|
||||
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
|
||||
};
|
||||
auto get_rid = [&](size_t ridx) {
|
||||
return kFirstPage ? ridx : (ridx - base_rowid);
|
||||
};
|
||||
auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
|
||||
|
||||
const size_t n_features = gmat.cut.Ptrs().size() - 1;
|
||||
const size_t n_columns = n_features;
|
||||
|
||||
@@ -362,6 +362,7 @@ bst_bin_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(std::size_t begin, std::size_t
|
||||
}
|
||||
|
||||
using GHistRow = Span<xgboost::GradientPairPrecise>;
|
||||
using ConstGHistRow = Span<xgboost::GradientPairPrecise const>;
|
||||
|
||||
/*!
|
||||
* \brief fill a histogram by zeros
|
||||
@@ -371,7 +372,7 @@ void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end);
|
||||
/*!
|
||||
* \brief Increment hist as dst += add in range [begin, end)
|
||||
*/
|
||||
void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end);
|
||||
void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end);
|
||||
|
||||
/*!
|
||||
* \brief Copy hist from src to dst in range [begin, end)
|
||||
|
||||
@@ -136,7 +136,7 @@ class BlockedSpace2d {
|
||||
// Wrapper to implement nested parallelism with simple omp parallel for
|
||||
template <typename Func>
|
||||
void ParallelFor2d(const BlockedSpace2d& space, int nthreads, Func func) {
|
||||
const size_t num_blocks_in_space = space.Size();
|
||||
std::size_t n_blocks_in_space = space.Size();
|
||||
CHECK_GE(nthreads, 1);
|
||||
|
||||
dmlc::OMPException exc;
|
||||
@@ -144,11 +144,10 @@ void ParallelFor2d(const BlockedSpace2d& space, int nthreads, Func func) {
|
||||
{
|
||||
exc.Run([&]() {
|
||||
size_t tid = omp_get_thread_num();
|
||||
size_t chunck_size =
|
||||
num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads);
|
||||
size_t chunck_size = n_blocks_in_space / nthreads + !!(n_blocks_in_space % nthreads);
|
||||
|
||||
size_t begin = chunck_size * tid;
|
||||
size_t end = std::min(begin + chunck_size, num_blocks_in_space);
|
||||
std::size_t begin = chunck_size * tid;
|
||||
std::size_t end = std::min(begin + chunck_size, n_blocks_in_space);
|
||||
for (auto i = begin; i < end; i++) {
|
||||
func(space.GetFirstDimension(i), space.GetRange(i));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user