Clean up training code. (#3825)

* Remove GHistRow, GHistEntry, GHistIndexRow.
* Remove kSimpleStats.
* Remove CheckInfo, SetLeafVec in GradStats and in SKStats.
* Clean up the GradStats.
* Cleanup calcgain.
* Move LossChangeMissing out of common.
* Remove [] operator from GHistIndexBlock.
This commit is contained in:
Jiaming Yuan
2019-02-07 14:22:13 +08:00
committed by GitHub
parent 325b16bccd
commit 017c97b8ce
19 changed files with 306 additions and 406 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017 by Contributors
* Copyright 2017-2018 by Contributors
* \file hist_util.h
* \brief Utilities to store histograms
* \author Philip Cho, Tianqi Chen
@@ -417,7 +417,7 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
const size_t* row_ptr = gmat.row_ptr.data();
const float* pgh = reinterpret_cast<const float*>(gpair.data());
double* hist_data = reinterpret_cast<double*>(hist.begin);
double* hist_data = reinterpret_cast<double*>(hist.data());
double* data = reinterpret_cast<double*>(data_.data());
const size_t block_size = 512;
@@ -432,11 +432,11 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
size_t no_prefetch_size = prefetch_offset + cache_line_size/sizeof(*rid);
no_prefetch_size = no_prefetch_size > nrows ? nrows : no_prefetch_size;
#pragma omp parallel for num_threads(nthread_to_process) schedule(guided)
#pragma omp parallel for num_threads(nthread_to_process) schedule(guided)
for (bst_omp_uint iblock = 0; iblock < n_blocks; iblock++) {
dmlc::omp_uint tid = omp_get_thread_num();
double* data_local_hist = ((nthread_to_process == 1) ? hist_data :
reinterpret_cast<double*>(data_.data() + tid * nbins_));
reinterpret_cast<double*>(data_.data() + tid * nbins_));
if (!thread_init_[tid]) {
memset(data_local_hist, '\0', 2*nbins_*sizeof(double));
@@ -477,7 +477,7 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
}
}
#pragma omp parallel for num_threads(std::min(nthread, n_blocks)) schedule(guided)
#pragma omp parallel for num_threads(std::min(nthread, n_blocks)) schedule(guided)
for (bst_omp_uint iblock = 0; iblock < n_blocks; iblock++) {
const size_t istart = iblock * block_size;
const size_t iend = (((iblock + 1) * block_size > size) ? size : istart + block_size);
@@ -507,8 +507,9 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
#if defined(_OPENMP)
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
#endif
tree::GradStats* p_hist = hist.data();
#pragma omp parallel for num_threads(nthread) schedule(guided)
#pragma omp parallel for num_threads(nthread) schedule(guided)
for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
auto gmat = gmatb[bid];
@@ -517,20 +518,17 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
size_t ibegin[kUnroll];
size_t iend[kUnroll];
GradientPair stat[kUnroll];
for (int k = 0; k < kUnroll; ++k) {
rid[k] = row_indices.begin[i + k];
}
for (int k = 0; k < kUnroll; ++k) {
ibegin[k] = gmat.row_ptr[rid[k]];
iend[k] = gmat.row_ptr[rid[k] + 1];
}
for (int k = 0; k < kUnroll; ++k) {
stat[k] = gpair[rid[k]];
}
for (int k = 0; k < kUnroll; ++k) {
for (size_t j = ibegin[k]; j < iend[k]; ++j) {
const uint32_t bin = gmat.index[j];
hist.begin[bin].Add(stat[k]);
p_hist[bin].Add(stat[k]);
}
}
}
@@ -541,7 +539,7 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
const GradientPair stat = gpair[rid];
for (size_t j = ibegin; j < iend; ++j) {
const uint32_t bin = gmat.index[j];
hist.begin[bin].Add(stat);
p_hist[bin].Add(stat);
}
}
}
@@ -555,24 +553,27 @@ void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow pa
#if defined(_OPENMP)
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
#endif
tree::GradStats* p_self = self.data();
tree::GradStats* p_sibling = sibling.data();
tree::GradStats* p_parent = parent.data();
#pragma omp parallel for num_threads(nthread) schedule(static)
#pragma omp parallel for num_threads(nthread) schedule(static)
for (bst_omp_uint bin_id = 0;
bin_id < static_cast<bst_omp_uint>(nbins - rest); bin_id += kUnroll) {
GHistEntry pb[kUnroll];
GHistEntry sb[kUnroll];
tree::GradStats pb[kUnroll];
tree::GradStats sb[kUnroll];
for (int k = 0; k < kUnroll; ++k) {
pb[k] = parent.begin[bin_id + k];
pb[k] = p_parent[bin_id + k];
}
for (int k = 0; k < kUnroll; ++k) {
sb[k] = sibling.begin[bin_id + k];
sb[k] = p_sibling[bin_id + k];
}
for (int k = 0; k < kUnroll; ++k) {
self.begin[bin_id + k].SetSubtract(pb[k], sb[k]);
p_self[bin_id + k].SetSubstract(pb[k], sb[k]);
}
}
for (uint32_t bin_id = nbins - rest; bin_id < nbins; ++bin_id) {
self.begin[bin_id].SetSubtract(parent.begin[bin_id], sibling.begin[bin_id]);
p_self[bin_id].SetSubstract(p_parent[bin_id], p_sibling[bin_id]);
}
}