Revert hist init optimization. (#4502)
This commit is contained in:
parent
8ddd2715ee
commit
55e645c5f5
@ -50,7 +50,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
|
|||||||
constexpr int kFactor = 8;
|
constexpr int kFactor = 8;
|
||||||
std::vector<WXQSketch> sketchs;
|
std::vector<WXQSketch> sketchs;
|
||||||
|
|
||||||
const size_t nthread = omp_get_max_threads();
|
const int nthread = omp_get_max_threads();
|
||||||
|
|
||||||
unsigned const nstep =
|
unsigned const nstep =
|
||||||
static_cast<unsigned>((info.num_col_ + nthread - 1) / nthread);
|
static_cast<unsigned>((info.num_col_ + nthread - 1) / nthread);
|
||||||
@ -68,85 +68,34 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
|
|||||||
// Use group index for weights?
|
// Use group index for weights?
|
||||||
bool const use_group_ind = num_groups != 0 && weights.size() != info.num_row_;
|
bool const use_group_ind = num_groups != 0 && weights.size() != info.num_row_;
|
||||||
|
|
||||||
if (use_group_ind) {
|
for (const auto &batch : p_fmat->GetRowBatches()) {
|
||||||
for (const auto &batch : p_fmat->GetRowBatches()) {
|
size_t group_ind = 0;
|
||||||
size_t group_ind = this->SearchGroupIndFromBaseRow(group_ptr, batch.base_rowid);
|
if (use_group_ind) {
|
||||||
#pragma omp parallel num_threads(nthread) firstprivate(group_ind, use_group_ind)
|
group_ind = this->SearchGroupIndFromBaseRow(group_ptr, batch.base_rowid);
|
||||||
{
|
|
||||||
CHECK_EQ(nthread, omp_get_num_threads());
|
|
||||||
auto tid = static_cast<unsigned>(omp_get_thread_num());
|
|
||||||
unsigned begin = std::min(nstep * tid, ncol);
|
|
||||||
unsigned end = std::min(nstep * (tid + 1), ncol);
|
|
||||||
|
|
||||||
// do not iterate if no columns are assigned to the thread
|
|
||||||
if (begin < end && end <= ncol) {
|
|
||||||
for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*)
|
|
||||||
size_t const ridx = batch.base_rowid + i;
|
|
||||||
SparsePage::Inst const inst = batch[i];
|
|
||||||
if (group_ptr[group_ind] == ridx &&
|
|
||||||
// maximum equals to weights.size() - 1
|
|
||||||
group_ind < num_groups - 1) {
|
|
||||||
// move to next group
|
|
||||||
group_ind++;
|
|
||||||
}
|
|
||||||
for (auto const& entry : inst) {
|
|
||||||
if (entry.index >= begin && entry.index < end) {
|
|
||||||
size_t w_idx = group_ind;
|
|
||||||
sketchs[entry.index].Push(entry.fvalue, info.GetWeight(w_idx));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
#pragma omp parallel num_threads(nthread) firstprivate(group_ind, use_group_ind)
|
||||||
for (const auto &batch : p_fmat->GetRowBatches()) {
|
{
|
||||||
const size_t size = batch.Size();
|
CHECK_EQ(nthread, omp_get_num_threads());
|
||||||
const size_t block_size = 512;
|
auto tid = static_cast<unsigned>(omp_get_thread_num());
|
||||||
const size_t block_size_iter = block_size * nthread;
|
unsigned begin = std::min(nstep * tid, ncol);
|
||||||
const size_t n_blocks = size / block_size_iter + !!(size % block_size_iter);
|
unsigned end = std::min(nstep * (tid + 1), ncol);
|
||||||
|
|
||||||
std::vector<std::vector<std::pair<float, float>>> buff(nthread);
|
// do not iterate if no columns are assigned to the thread
|
||||||
for (size_t tid = 0; tid < nthread; ++tid) {
|
if (begin < end && end <= ncol) {
|
||||||
buff[tid].resize(block_size * ncol);
|
for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*)
|
||||||
}
|
size_t const ridx = batch.base_rowid + i;
|
||||||
|
SparsePage::Inst const inst = batch[i];
|
||||||
std::vector<size_t> sizes(nthread * ncol, 0);
|
if (use_group_ind &&
|
||||||
|
group_ptr[group_ind] == ridx &&
|
||||||
for (size_t iblock = 0; iblock < n_blocks; ++iblock) {
|
// maximum equals to weights.size() - 1
|
||||||
#pragma omp parallel num_threads(nthread)
|
group_ind < num_groups - 1) {
|
||||||
{
|
// move to next group
|
||||||
int tid = omp_get_thread_num();
|
group_ind++;
|
||||||
|
|
||||||
const size_t ibegin = iblock * block_size_iter + tid * block_size;
|
|
||||||
const size_t iend = std::min(ibegin + block_size, size);
|
|
||||||
|
|
||||||
auto* p_sizes = sizes.data() + ncol * tid;
|
|
||||||
auto* p_buff = buff[tid].data();
|
|
||||||
|
|
||||||
for (size_t i = ibegin; i < iend; ++i) {
|
|
||||||
size_t const ridx = batch.base_rowid + i;
|
|
||||||
bst_float w = info.GetWeight(ridx);
|
|
||||||
SparsePage::Inst const inst = batch[i];
|
|
||||||
|
|
||||||
for (auto const& entry : inst) {
|
|
||||||
const size_t idx = entry.index;
|
|
||||||
p_buff[idx * block_size + p_sizes[idx]] = { entry.fvalue, w };
|
|
||||||
p_sizes[idx]++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#pragma omp barrier
|
for (auto const& entry : inst) {
|
||||||
#pragma omp for schedule(static)
|
if (entry.index >= begin && entry.index < end) {
|
||||||
for (int32_t icol = 0; icol < static_cast<int32_t>(ncol); ++icol) {
|
size_t w_idx = use_group_ind ? group_ind : ridx;
|
||||||
for (size_t tid = 0; tid < nthread; ++tid) {
|
sketchs[entry.index].Push(entry.fvalue, info.GetWeight(w_idx));
|
||||||
auto* p_sizes = sizes.data() + ncol * tid;
|
|
||||||
auto* p_buff = buff[tid].data() + icol * block_size;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < p_sizes[icol]; ++i) {
|
|
||||||
sketchs[icol].Push(p_buff[i].first, p_buff[i].second);
|
|
||||||
}
|
|
||||||
|
|
||||||
p_sizes[icol] = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user