Support external memory in CPU histogram building. (#7372)

This commit is contained in:
Jiaming Yuan
2021-11-23 01:13:33 +08:00
committed by GitHub
parent d33854af1b
commit 176110a22d
6 changed files with 305 additions and 162 deletions

View File

@@ -460,7 +460,7 @@ class ParallelGHistBuilder {
}
// Reduce following bins (begin, end] for nid-node in dst across threads
void ReduceHist(size_t nid, size_t begin, size_t end) {
void ReduceHist(size_t nid, size_t begin, size_t end) const {
CHECK_GT(end, begin);
CHECK_LT(nid, nodes_);
@@ -486,7 +486,6 @@ class ParallelGHistBuilder {
}
}
protected:
void MatchThreadsToNodes(const BlockedSpace2d& space) {
const size_t space_size = space.Size();
const size_t chunck_size = space_size / nthreads_ + !!(space_size % nthreads_);
@@ -533,6 +532,7 @@ class ParallelGHistBuilder {
}
}
private:
void MatchNodeNidPairToHist() {
size_t hist_allocated_additionally = 0;
@@ -586,26 +586,18 @@ class GHistBuilder {
using GHistRowT = GHistRow<GradientSumT>;
GHistBuilder() = default;
GHistBuilder(size_t nthread, uint32_t nbins) : nthread_{nthread}, nbins_{nbins} {}
explicit GHistBuilder(uint32_t nbins): nbins_{nbins} {}
// construct a histogram via histogram aggregation
template <bool any_missing>
void BuildHist(const std::vector<GradientPair>& gpair,
void BuildHist(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRowT hist);
// construct a histogram via subtraction trick
void SubtractionTrick(GHistRowT self,
GHistRowT sibling,
GHistRowT parent);
const GHistIndexMatrix &gmat, GHistRowT hist) const;
uint32_t GetNumBins() const {
return nbins_;
}
private:
/*! \brief number of threads for parallel computation */
size_t nthread_ { 0 };
/*! \brief number of all bins over all features */
uint32_t nbins_ { 0 };
};