Small refactor for hist builder. (#8698)

- Use span instead of vector as parameter. No perf change as the builder work on pointer.
- Use const pointer for reg tree.
This commit is contained in:
Jiaming Yuan
2023-01-30 14:06:41 +08:00
committed by GitHub
parent 8af98e30fc
commit 21a28f2cc5
3 changed files with 95 additions and 120 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2020 by Contributors
/**
* Copyright 2017-2023 by XGBoost Contributors
* \file hist_util.cc
*/
#include <dmlc/timer.h>
@@ -193,9 +193,9 @@ class GHistBuildingManager {
};
template <bool do_prefetch, class BuildingManager>
void RowsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
constexpr bool kAnyMissing = BuildingManager::kAnyMissing;
constexpr bool kFirstPage = BuildingManager::kFirstPage;
using BinIdxType = typename BuildingManager::BinIdxType;
@@ -262,9 +262,9 @@ void RowsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
}
template <class BuildingManager>
void ColsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
void ColsWiseBuildHistKernel(Span<GradientPair const> gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
constexpr bool kAnyMissing = BuildingManager::kAnyMissing;
constexpr bool kFirstPage = BuildingManager::kFirstPage;
using BinIdxType = typename BuildingManager::BinIdxType;
@@ -315,9 +315,8 @@ void ColsWiseBuildHistKernel(const std::vector<GradientPair> &gpair,
}
template <class BuildingManager>
void BuildHistDispatch(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
void BuildHistDispatch(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat, GHistRow hist) {
if (BuildingManager::kReadByColumn) {
ColsWiseBuildHistKernel<BuildingManager>(gpair, row_indices, gmat, hist);
} else {
@@ -344,33 +343,31 @@ void BuildHistDispatch(const std::vector<GradientPair> &gpair,
}
template <bool any_missing>
void GHistBuilder::BuildHist(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
void GHistBuilder::BuildHist(Span<GradientPair const> gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist, bool force_read_by_column) const {
/* force_read_by_column is used for testing the columnwise building of histograms.
* default force_read_by_column = false
*/
constexpr double kAdhocL2Size = 1024 * 1024 * 0.8;
const bool hist_fit_to_l2 = kAdhocL2Size > 2*sizeof(float)*gmat.cut.Ptrs().back();
const bool hist_fit_to_l2 = kAdhocL2Size > 2 * sizeof(float) * gmat.cut.Ptrs().back();
bool first_page = gmat.base_rowid == 0;
bool read_by_column = !hist_fit_to_l2 && !any_missing;
auto bin_type_size = gmat.index.GetBinTypeSize();
GHistBuildingManager<any_missing>::DispatchAndExecute(
{first_page, read_by_column || force_read_by_column, bin_type_size},
[&](auto t) {
using BuildingManager = decltype(t);
BuildHistDispatch<BuildingManager>(gpair, row_indices, gmat, hist);
});
{first_page, read_by_column || force_read_by_column, bin_type_size}, [&](auto t) {
using BuildingManager = decltype(t);
BuildHistDispatch<BuildingManager>(gpair, row_indices, gmat, hist);
});
}
template void GHistBuilder::BuildHist<true>(const std::vector<GradientPair> &gpair,
template void GHistBuilder::BuildHist<true>(Span<GradientPair const> gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat, GHistRow hist,
bool force_read_by_column) const;
template void GHistBuilder::BuildHist<false>(const std::vector<GradientPair> &gpair,
template void GHistBuilder::BuildHist<false>(Span<GradientPair const> gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat, GHistRow hist,
bool force_read_by_column) const;