From 55e645c5f56559def985c0450ca747c00a538239 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 26 May 2019 08:57:41 +0800 Subject: [PATCH] Revert hist init optimization. (#4502) --- src/common/hist_util.cc | 103 ++++++++++------------------------------ 1 file changed, 26 insertions(+), 77 deletions(-) diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index a473f9240..1ac058645 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -50,7 +50,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { constexpr int kFactor = 8; std::vector sketchs; - const size_t nthread = omp_get_max_threads(); + const int nthread = omp_get_max_threads(); unsigned const nstep = static_cast((info.num_col_ + nthread - 1) / nthread); @@ -68,85 +68,34 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { // Use group index for weights? bool const use_group_ind = num_groups != 0 && weights.size() != info.num_row_; - if (use_group_ind) { - for (const auto &batch : p_fmat->GetRowBatches()) { - size_t group_ind = this->SearchGroupIndFromBaseRow(group_ptr, batch.base_rowid); - #pragma omp parallel num_threads(nthread) firstprivate(group_ind, use_group_ind) - { - CHECK_EQ(nthread, omp_get_num_threads()); - auto tid = static_cast(omp_get_thread_num()); - unsigned begin = std::min(nstep * tid, ncol); - unsigned end = std::min(nstep * (tid + 1), ncol); - - // do not iterate if no columns are assigned to the thread - if (begin < end && end <= ncol) { - for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*) - size_t const ridx = batch.base_rowid + i; - SparsePage::Inst const inst = batch[i]; - if (group_ptr[group_ind] == ridx && - // maximum equals to weights.size() - 1 - group_ind < num_groups - 1) { - // move to next group - group_ind++; - } - for (auto const& entry : inst) { - if (entry.index >= begin && entry.index < end) { - size_t w_idx = group_ind; - sketchs[entry.index].Push(entry.fvalue, info.GetWeight(w_idx)); - } - } - } - } - } + for (const auto &batch : p_fmat->GetRowBatches()) { + size_t group_ind = 0; + if (use_group_ind) { + group_ind = this->SearchGroupIndFromBaseRow(group_ptr, batch.base_rowid); } - } else { - for (const auto &batch : p_fmat->GetRowBatches()) { - const size_t size = batch.Size(); - const size_t block_size = 512; - const size_t block_size_iter = block_size * nthread; - const size_t n_blocks = size / block_size_iter + !!(size % block_size_iter); +#pragma omp parallel num_threads(nthread) firstprivate(group_ind, use_group_ind) + { + CHECK_EQ(nthread, omp_get_num_threads()); + auto tid = static_cast(omp_get_thread_num()); + unsigned begin = std::min(nstep * tid, ncol); + unsigned end = std::min(nstep * (tid + 1), ncol); - std::vector>> buff(nthread); - for (size_t tid = 0; tid < nthread; ++tid) { - buff[tid].resize(block_size * ncol); - } - - std::vector sizes(nthread * ncol, 0); - - for (size_t iblock = 0; iblock < n_blocks; ++iblock) { - #pragma omp parallel num_threads(nthread) - { - int tid = omp_get_thread_num(); - - const size_t ibegin = iblock * block_size_iter + tid * block_size; - const size_t iend = std::min(ibegin + block_size, size); - - auto* p_sizes = sizes.data() + ncol * tid; - auto* p_buff = buff[tid].data(); - - for (size_t i = ibegin; i < iend; ++i) { - size_t const ridx = batch.base_rowid + i; - bst_float w = info.GetWeight(ridx); - SparsePage::Inst const inst = batch[i]; - - for (auto const& entry : inst) { - const size_t idx = entry.index; - p_buff[idx * block_size + p_sizes[idx]] = { entry.fvalue, w }; - p_sizes[idx]++; - } + // do not iterate if no columns are assigned to the thread + if (begin < end && end <= ncol) { + for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*) + size_t const ridx = batch.base_rowid + i; + SparsePage::Inst const inst = batch[i]; + if (use_group_ind && + group_ptr[group_ind] == ridx && + // maximum equals to weights.size() - 1 + group_ind < num_groups - 1) { + // move to next group + group_ind++; } - #pragma omp barrier - #pragma omp for schedule(static) - for (int32_t icol = 0; icol < static_cast(ncol); ++icol) { - for (size_t tid = 0; tid < nthread; ++tid) { - auto* p_sizes = sizes.data() + ncol * tid; - auto* p_buff = buff[tid].data() + icol * block_size; - - for (size_t i = 0; i < p_sizes[icol]; ++i) { - sketchs[icol].Push(p_buff[i].first, p_buff[i].second); - } - - p_sizes[icol] = 0; + for (auto const& entry : inst) { + if (entry.index >= begin && entry.index < end) { + size_t w_idx = use_group_ind ? group_ind : ridx; + sketchs[entry.index].Push(entry.fvalue, info.GetWeight(w_idx)); } } }