Optional by-column histogram build. (#8233)

Co-authored-by: dmitry.razdoburdin <drazdobu@jfldaal005.jf.intel.com>
This commit is contained in:
Dmitry Razdoburdin
2022-09-21 23:16:13 +02:00
committed by GitHub
parent b791446623
commit eb7bbee2c9
5 changed files with 152 additions and 70 deletions

View File

@@ -59,7 +59,8 @@ class HistogramBuilder {
GHistIndexMatrix const &gidx,
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
common::RowSetCollection const &row_set_collection,
const std::vector<GradientPair> &gpair_h) {
const std::vector<GradientPair> &gpair_h,
bool force_read_by_column) {
const size_t n_nodes = nodes_for_explicit_hist_build.size();
CHECK_GT(n_nodes, 0);
@@ -86,7 +87,8 @@ class HistogramBuilder {
elem.begin + end_of_row_set, nid);
auto hist = buffer_.GetInitializedHist(tid, nid_in_set);
if (rid_set.Size() != 0) {
builder_.template BuildHist<any_missing>(gpair_h, rid_set, gidx, hist);
builder_.template BuildHist<any_missing>(gpair_h, rid_set, gidx, hist,
force_read_by_column);
}
});
}
@@ -112,7 +114,8 @@ class HistogramBuilder {
RegTree *p_tree, common::RowSetCollection const &row_set_collection,
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
std::vector<GradientPair> const &gpair) {
std::vector<GradientPair> const &gpair,
bool force_read_by_column = false) {
int starting_index = std::numeric_limits<int>::max();
int sync_count = 0;
if (page_id == 0) {
@@ -123,11 +126,13 @@ class HistogramBuilder {
if (gidx.IsDense()) {
this->BuildLocalHistograms<false>(page_id, space, gidx,
nodes_for_explicit_hist_build,
row_set_collection, gpair);
row_set_collection, gpair,
force_read_by_column);
} else {
this->BuildLocalHistograms<true>(page_id, space, gidx,
nodes_for_explicit_hist_build,
row_set_collection, gpair);
row_set_collection, gpair,
force_read_by_column);
}
CHECK_GE(n_batches_, 1);
@@ -148,7 +153,8 @@ class HistogramBuilder {
common::RowSetCollection const &row_set_collection,
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
std::vector<GradientPair> const &gpair) {
std::vector<GradientPair> const &gpair,
bool force_read_by_column = false) {
const size_t n_nodes = nodes_for_explicit_hist_build.size();
// create space of size (# rows in each node)
common::BlockedSpace2d space(
@@ -160,7 +166,7 @@ class HistogramBuilder {
256);
this->BuildHist(page_id, space, gidx, p_tree, row_set_collection,
nodes_for_explicit_hist_build, nodes_for_subtraction_trick,
gpair);
gpair, force_read_by_column);
}
void SyncHistogramDistributed(