Extract Sketch Entry from hist maker. (#7503)

* Extract Sketch Entry from hist maker.

* Add a new sketch container for sorted inputs.
* Optimize bin search.
This commit is contained in:
Jiaming Yuan
2021-12-18 05:36:56 +08:00
committed by GitHub
parent b4a1236cfc
commit 9ab73f737e
15 changed files with 393 additions and 217 deletions

View File

@@ -118,7 +118,7 @@ void GHistIndexMatrix::PushBatch(SparsePage const &batch,
[](auto idx, auto) { return idx; });
}
common::ParallelFor(bst_omp_uint(nbins), n_threads, [&](bst_omp_uint idx) {
common::ParallelFor(nbins, n_threads, [&](bst_omp_uint idx) {
for (int32_t tid = 0; tid < n_threads; ++tid) {
hit_count[idx] += hit_count_tloc_[tid * nbins + idx];
hit_count_tloc_[tid * nbins + idx] = 0; // reset for next batch
@@ -126,8 +126,11 @@ void GHistIndexMatrix::PushBatch(SparsePage const &batch,
});
}
void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_bins, common::Span<float> hess) {
cut = common::SketchOnDMatrix(p_fmat, max_bins, hess);
void GHistIndexMatrix::Init(DMatrix *p_fmat, int max_bins, bool sorted_sketch,
common::Span<float> hess) {
// We use sorted sketching for approx tree method since it's more efficient in
// computation time (but higher memory usage).
cut = common::SketchOnDMatrix(p_fmat, max_bins, sorted_sketch, hess);
max_num_bins = max_bins;
const int32_t nthread = omp_get_max_threads();