From 149f209af6e332487abb9f1bb02153c589e21897 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 9 Aug 2021 21:15:21 +0800 Subject: [PATCH] Extract histogram builder from CPU Hist. (#7152) * Extract the CPU histogram builder. * Fix tests. * Reduce number of histograms being built. --- src/common/hist_util.h | 2 +- src/tree/hist/histogram.h | 302 ++++++++++++++++++++++++++ src/tree/updater_quantile_hist.cc | 283 ++++-------------------- src/tree/updater_quantile_hist.h | 142 +----------- tests/cpp/tree/hist/test_histogram.cc | 298 +++++++++++++++++++++++++ tests/cpp/tree/test_quantile_hist.cc | 296 ------------------------- 6 files changed, 647 insertions(+), 676 deletions(-) create mode 100644 src/tree/hist/histogram.h create mode 100644 tests/cpp/tree/hist/test_histogram.cc diff --git a/src/common/hist_util.h b/src/common/hist_util.h index be42e197f..04dd3e3d9 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -301,7 +301,7 @@ class HistCollection { // access histogram for i-th node GHistRowT operator[](bst_uint nid) const { constexpr uint32_t kMax = std::numeric_limits::max(); - const size_t id = row_ptr_[nid]; + const size_t id = row_ptr_.at(nid); CHECK_NE(id, kMax); GradientPairT* ptr = nullptr; if (contiguous_allocation_) { diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h new file mode 100644 index 000000000..70c756e76 --- /dev/null +++ b/src/tree/hist/histogram.h @@ -0,0 +1,302 @@ +/*! + * Copyright 2021 by XGBoost Contributors + */ +#ifndef XGBOOST_TREE_HIST_HISTOGRAM_H_ +#define XGBOOST_TREE_HIST_HISTOGRAM_H_ + +#include +#include +#include + +#include "rabit/rabit.h" +#include "xgboost/tree_model.h" +#include "../../common/hist_util.h" + +namespace xgboost { +namespace tree { +template class HistogramBuilder { + using GradientPairT = xgboost::detail::GradientPairInternal; + using GHistRowT = common::GHistRow; + + /*! \brief culmulative histogram of gradients. */ + common::HistCollection hist_; + /*! \brief culmulative local parent histogram of gradients. */ + common::HistCollection hist_local_worker_; + common::GHistBuilder builder_; + common::ParallelGHistBuilder buffer_; + rabit::Reducer reducer_; + int32_t max_bin_ {-1}; + int32_t n_threads_ {-1}; + // Whether XGBoost is running in distributed environment. + bool is_distributed_ {false}; + + public: + /** + * \param total_bins Total number of bins across all features + * \param max_bin_per_feat Maximum number of bins per feature, same as the `max_bin` + * training parameter. + * \param n_threads Number of threads. + * \param is_distributed Mostly used for testing to allow injecting parameters instead + * of using global rabit variable. + */ + void Reset(uint32_t total_bins, int32_t max_bin_per_feat, int32_t n_threads, + bool is_distributed = rabit::IsDistributed()) { + CHECK_GE(n_threads, 1); + n_threads_ = n_threads; + CHECK_GE(max_bin_per_feat, 2); + max_bin_ = max_bin_per_feat; + hist_.Init(total_bins); + hist_local_worker_.Init(total_bins); + buffer_.Init(total_bins); + builder_ = common::GHistBuilder(n_threads, total_bins); + is_distributed_ = is_distributed; + } + + template + void + BuildLocalHistograms(DMatrix *p_fmat, + std::vector nodes_for_explicit_hist_build, + common::RowSetCollection const &row_set_collection, + const std::vector &gpair_h) { + const size_t n_nodes = nodes_for_explicit_hist_build.size(); + + // create space of size (# rows in each node) + common::BlockedSpace2d space( + n_nodes, + [&](size_t node) { + const int32_t nid = nodes_for_explicit_hist_build[node].nid; + return row_set_collection[nid].Size(); + }, + 256); + + std::vector target_hists(n_nodes); + for (size_t i = 0; i < n_nodes; ++i) { + const int32_t nid = nodes_for_explicit_hist_build[i].nid; + target_hists[i] = hist_[nid]; + } + buffer_.Reset(this->n_threads_, n_nodes, space, target_hists); + + // Parallel processing by nodes and data in each node + for (auto const &gmat : p_fmat->GetBatches( + BatchParam{GenericParameter::kCpuId, max_bin_})) { + common::ParallelFor2d( + space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { + const auto tid = static_cast(omp_get_thread_num()); + const int32_t nid = nodes_for_explicit_hist_build[nid_in_set].nid; + + auto start_of_row_set = row_set_collection[nid].begin; + auto rid_set = common::RowSetCollection::Elem( + start_of_row_set + r.begin(), start_of_row_set + r.end(), nid); + builder_.template BuildHist( + gpair_h, rid_set, gmat, + buffer_.GetInitializedHist(tid, nid_in_set)); + }); + } + } + + void + AddHistRows(int *starting_index, int *sync_count, + std::vector const &nodes_for_explicit_hist_build, + std::vector const &nodes_for_subtraction_trick, + RegTree *p_tree) { + if (is_distributed_) { + this->AddHistRowsDistributed(starting_index, sync_count, + nodes_for_explicit_hist_build, + nodes_for_subtraction_trick, p_tree); + } else { + this->AddHistRowsLocal(starting_index, sync_count, + nodes_for_explicit_hist_build, + nodes_for_subtraction_trick); + } + } + + /* Main entry point of this class, build histogram for tree nodes. */ + void BuildHist(DMatrix *p_fmat, RegTree *p_tree, + common::RowSetCollection const &row_set_collection, + std::vector const &nodes_for_explicit_hist_build, + std::vector const &nodes_for_subtraction_trick, + std::vector const &gpair) { + int starting_index = std::numeric_limits::max(); + int sync_count = 0; + this->AddHistRows(&starting_index, &sync_count, + nodes_for_explicit_hist_build, + nodes_for_subtraction_trick, p_tree); + if (p_fmat->IsDense()) { + BuildLocalHistograms(p_fmat, nodes_for_explicit_hist_build, + row_set_collection, gpair); + } else { + BuildLocalHistograms(p_fmat, nodes_for_explicit_hist_build, + row_set_collection, gpair); + } + if (is_distributed_) { + this->SyncHistogramDistributed(p_tree, nodes_for_explicit_hist_build, + nodes_for_subtraction_trick, + starting_index, sync_count); + } else { + this->SyncHistogramLocal(p_tree, nodes_for_explicit_hist_build, + nodes_for_subtraction_trick, starting_index, + sync_count); + } + } + + void SyncHistogramDistributed( + RegTree *p_tree, + std::vector const &nodes_for_explicit_hist_build, + std::vector const &nodes_for_subtraction_trick, + int starting_index, int sync_count) { + const size_t nbins = builder_.GetNumBins(); + common::BlockedSpace2d space( + nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; }, + 1024); + common::ParallelFor2d( + space, n_threads_, [&](size_t node, common::Range1d r) { + const auto &entry = nodes_for_explicit_hist_build[node]; + auto this_hist = this->hist_[entry.nid]; + // Merging histograms from each thread into once + buffer_.ReduceHist(node, r.begin(), r.end()); + // Store posible parent node + auto this_local = hist_local_worker_[entry.nid]; + common::CopyHist(this_local, this_hist, r.begin(), r.end()); + + if (!(*p_tree)[entry.nid].IsRoot()) { + const size_t parent_id = (*p_tree)[entry.nid].Parent(); + const int subtraction_node_id = + nodes_for_subtraction_trick[node].nid; + auto parent_hist = this->hist_local_worker_[parent_id]; + auto sibling_hist = this->hist_[subtraction_node_id]; + common::SubtractionHist(sibling_hist, parent_hist, this_hist, + r.begin(), r.end()); + // Store posible parent node + auto sibling_local = hist_local_worker_[subtraction_node_id]; + common::CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); + } + }); + + reducer_.Allreduce(this->hist_[starting_index].data(), + builder_.GetNumBins() * sync_count); + + ParallelSubtractionHist(space, nodes_for_explicit_hist_build, + nodes_for_subtraction_trick, p_tree); + + common::BlockedSpace2d space2( + nodes_for_subtraction_trick.size(), [&](size_t) { return nbins; }, + 1024); + ParallelSubtractionHist(space2, nodes_for_subtraction_trick, + nodes_for_explicit_hist_build, p_tree); + } + + void SyncHistogramLocal( + RegTree *p_tree, + std::vector const &nodes_for_explicit_hist_build, + std::vector const &nodes_for_subtraction_trick, + int starting_index, int sync_count) { + const size_t nbins = this->builder_.GetNumBins(); + common::BlockedSpace2d space( + nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; }, + 1024); + + common::ParallelFor2d( + space, this->n_threads_, [&](size_t node, common::Range1d r) { + const auto &entry = nodes_for_explicit_hist_build[node]; + auto this_hist = this->hist_[entry.nid]; + // Merging histograms from each thread into once + this->buffer_.ReduceHist(node, r.begin(), r.end()); + + if (!(*p_tree)[entry.nid].IsRoot()) { + const size_t parent_id = (*p_tree)[entry.nid].Parent(); + const int subtraction_node_id = + nodes_for_subtraction_trick[node].nid; + auto parent_hist = this->hist_[parent_id]; + auto sibling_hist = this->hist_[subtraction_node_id]; + common::SubtractionHist(sibling_hist, parent_hist, this_hist, + r.begin(), r.end()); + } + }); + } + + public: + /* Getters for tests. */ + common::HistCollection const& Histogram() { + return hist_; + } + auto& Buffer() { return buffer_; } + + private: + void + ParallelSubtractionHist(const common::BlockedSpace2d &space, + const std::vector &nodes, + const std::vector &subtraction_nodes, + const RegTree *p_tree) { + common::ParallelFor2d( + space, this->n_threads_, [&](size_t node, common::Range1d r) { + const auto &entry = nodes[node]; + if (!((*p_tree)[entry.nid].IsLeftChild())) { + auto this_hist = this->hist_[entry.nid]; + + if (!(*p_tree)[entry.nid].IsRoot()) { + const int subtraction_node_id = subtraction_nodes[node].nid; + auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; + auto sibling_hist = hist_[subtraction_node_id]; + common::SubtractionHist(this_hist, parent_hist, sibling_hist, + r.begin(), r.end()); + } + } + }); + } + + // Add a tree node to histogram buffer in local training environment. + void AddHistRowsLocal( + int *starting_index, int *sync_count, + std::vector const &nodes_for_explicit_hist_build, + std::vector const &nodes_for_subtraction_trick) { + for (auto const &entry : nodes_for_explicit_hist_build) { + int nid = entry.nid; + this->hist_.AddHistRow(nid); + (*starting_index) = std::min(nid, (*starting_index)); + } + (*sync_count) = nodes_for_explicit_hist_build.size(); + + for (auto const &node : nodes_for_subtraction_trick) { + this->hist_.AddHistRow(node.nid); + } + this->hist_.AllocateAllData(); + } + + void AddHistRowsDistributed( + int *starting_index, int *sync_count, + std::vector const &nodes_for_explicit_hist_build, + std::vector const &nodes_for_subtraction_trick, + RegTree *p_tree) { + const size_t explicit_size = nodes_for_explicit_hist_build.size(); + const size_t subtaction_size = nodes_for_subtraction_trick.size(); + std::vector merged_node_ids(explicit_size + subtaction_size); + for (size_t i = 0; i < explicit_size; ++i) { + merged_node_ids[i] = nodes_for_explicit_hist_build[i].nid; + } + for (size_t i = 0; i < subtaction_size; ++i) { + merged_node_ids[explicit_size + i] = nodes_for_subtraction_trick[i].nid; + } + std::sort(merged_node_ids.begin(), merged_node_ids.end()); + int n_left = 0; + for (auto const &nid : merged_node_ids) { + if ((*p_tree)[nid].IsLeftChild()) { + this->hist_.AddHistRow(nid); + (*starting_index) = std::min(nid, (*starting_index)); + n_left++; + this->hist_local_worker_.AddHistRow(nid); + } + } + for (auto const &nid : merged_node_ids) { + if (!((*p_tree)[nid].IsLeftChild())) { + this->hist_.AddHistRow(nid); + this->hist_local_worker_.AddHistRow(nid); + } + } + this->hist_.AllocateAllData(); + this->hist_local_worker_.AllocateAllData(); + (*sync_count) = std::max(1, n_left); + } +}; +} // namespace tree +} // namespace xgboost +#endif // XGBOOST_TREE_HIST_HISTOGRAM_H_ diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 7b156d325..9c946fc5f 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -53,13 +53,6 @@ void QuantileHistMaker::SetBuilder(const size_t n_trees, DMatrix *dmat) { builder->reset( new Builder(n_trees, param_, std::move(pruner_), dmat)); - if (rabit::IsDistributed()) { - (*builder)->SetHistSynchronizer(new DistributedHistSynchronizer()); - (*builder)->SetHistRowsAdder(new DistributedHistRowsAdder()); - } else { - (*builder)->SetHistSynchronizer(new BatchHistSynchronizer()); - (*builder)->SetHistRowsAdder(new BatchHistRowsAdder()); - } } template @@ -96,7 +89,7 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, const size_t n_trees = trees.size(); if (hist_maker_param_.single_precision_histogram) { if (!float_builder_) { - SetBuilder(n_trees, &float_builder_, dmat); + this->SetBuilder(n_trees, &float_builder_, dmat); } CallBuilderUpdate(float_builder_, gpair, dmat, gmat, trees); } else { @@ -123,199 +116,34 @@ bool QuantileHistMaker::UpdatePredictionCache( } template -void BatchHistSynchronizer::SyncHistograms(BuilderT *builder, - int, - int, - RegTree *p_tree) { - builder->builder_monitor_.Start("SyncHistograms"); - const size_t nbins = builder->hist_builder_.GetNumBins(); - common::BlockedSpace2d space(builder->nodes_for_explicit_hist_build_.size(), [&](size_t) { - return nbins; - }, 1024); +QuantileHistMaker::Builder::~Builder() = default; - common::ParallelFor2d(space, builder->nthread_, [&](size_t node, common::Range1d r) { - const auto& entry = builder->nodes_for_explicit_hist_build_[node]; - auto this_hist = builder->hist_[entry.nid]; - // Merging histograms from each thread into once - builder->hist_buffer_.ReduceHist(node, r.begin(), r.end()); - - if (!(*p_tree)[entry.nid].IsRoot()) { - const size_t parent_id = (*p_tree)[entry.nid].Parent(); - const int subtraction_node_id = builder->nodes_for_subtraction_trick_[node].nid; - auto parent_hist = builder->hist_[parent_id]; - auto sibling_hist = builder->hist_[subtraction_node_id]; - SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); - } - }); - builder->builder_monitor_.Stop("SyncHistograms"); -} - -template -void DistributedHistSynchronizer::SyncHistograms(BuilderT* builder, - int starting_index, - int sync_count, - RegTree *p_tree) { - builder->builder_monitor_.Start("SyncHistograms"); - const size_t nbins = builder->hist_builder_.GetNumBins(); - common::BlockedSpace2d space(builder->nodes_for_explicit_hist_build_.size(), [&](size_t) { - return nbins; - }, 1024); - common::ParallelFor2d(space, builder->nthread_, [&](size_t node, common::Range1d r) { - const auto& entry = builder->nodes_for_explicit_hist_build_[node]; - auto this_hist = builder->hist_[entry.nid]; - // Merging histograms from each thread into once - builder->hist_buffer_.ReduceHist(node, r.begin(), r.end()); - // Store posible parent node - auto this_local = builder->hist_local_worker_[entry.nid]; - CopyHist(this_local, this_hist, r.begin(), r.end()); - - if (!(*p_tree)[entry.nid].IsRoot()) { - const size_t parent_id = (*p_tree)[entry.nid].Parent(); - const int subtraction_node_id = builder->nodes_for_subtraction_trick_[node].nid; - auto parent_hist = builder->hist_local_worker_[parent_id]; - auto sibling_hist = builder->hist_[subtraction_node_id]; - SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); - // Store posible parent node - auto sibling_local = builder->hist_local_worker_[subtraction_node_id]; - CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); - } - }); - builder->builder_monitor_.Start("SyncHistogramsAllreduce"); - - builder->histred_.Allreduce(builder->hist_[starting_index].data(), - builder->hist_builder_.GetNumBins() * sync_count); - - builder->builder_monitor_.Stop("SyncHistogramsAllreduce"); - - ParallelSubtractionHist(builder, space, builder->nodes_for_explicit_hist_build_, - builder->nodes_for_subtraction_trick_, p_tree); - - common::BlockedSpace2d space2(builder->nodes_for_subtraction_trick_.size(), [&](size_t) { - return nbins; - }, 1024); - ParallelSubtractionHist(builder, space2, builder->nodes_for_subtraction_trick_, - builder->nodes_for_explicit_hist_build_, p_tree); - builder->builder_monitor_.Stop("SyncHistograms"); -} - -template -void DistributedHistSynchronizer::ParallelSubtractionHist( - BuilderT* builder, - const common::BlockedSpace2d& space, - const std::vector& nodes, - const std::vector& subtraction_nodes, - const RegTree * p_tree) { - common::ParallelFor2d(space, builder->nthread_, [&](size_t node, common::Range1d r) { - const auto& entry = nodes[node]; - if (!((*p_tree)[entry.nid].IsLeftChild())) { - auto this_hist = builder->hist_[entry.nid]; - - if (!(*p_tree)[entry.nid].IsRoot()) { - const int subtraction_node_id = subtraction_nodes[node].nid; - auto parent_hist = builder->hist_[(*p_tree)[entry.nid].Parent()]; - auto sibling_hist = builder->hist_[subtraction_node_id]; - SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); - } - } - }); -} - -template -void BatchHistRowsAdder::AddHistRows(BuilderT *builder, - int *starting_index, - int *sync_count, - RegTree *) { - builder->builder_monitor_.Start("AddHistRows"); - - for (auto const& entry : builder->nodes_for_explicit_hist_build_) { - int nid = entry.nid; - builder->hist_.AddHistRow(nid); - (*starting_index) = std::min(nid, (*starting_index)); - } - (*sync_count) = builder->nodes_for_explicit_hist_build_.size(); - - for (auto const& node : builder->nodes_for_subtraction_trick_) { - builder->hist_.AddHistRow(node.nid); - } - builder->hist_.AllocateAllData(); - builder->builder_monitor_.Stop("AddHistRows"); -} - -template -void DistributedHistRowsAdder::AddHistRows(BuilderT *builder, - int *starting_index, - int *sync_count, - RegTree *p_tree) { - builder->builder_monitor_.Start("AddHistRows"); - const size_t explicit_size = builder->nodes_for_explicit_hist_build_.size(); - const size_t subtaction_size = builder->nodes_for_subtraction_trick_.size(); - std::vector merged_node_ids(explicit_size + subtaction_size); - for (size_t i = 0; i < explicit_size; ++i) { - merged_node_ids[i] = builder->nodes_for_explicit_hist_build_[i].nid; - } - for (size_t i = 0; i < subtaction_size; ++i) { - merged_node_ids[explicit_size + i] = - builder->nodes_for_subtraction_trick_[i].nid; - } - std::sort(merged_node_ids.begin(), merged_node_ids.end()); - int n_left = 0; - for (auto const& nid : merged_node_ids) { - if ((*p_tree)[nid].IsLeftChild()) { - builder->hist_.AddHistRow(nid); - (*starting_index) = std::min(nid, (*starting_index)); - n_left++; - builder->hist_local_worker_.AddHistRow(nid); - } - } - for (auto const& nid : merged_node_ids) { - if (!((*p_tree)[nid].IsLeftChild())) { - builder->hist_.AddHistRow(nid); - builder->hist_local_worker_.AddHistRow(nid); - } - } - builder->hist_.AllocateAllData(); - builder->hist_local_worker_.AllocateAllData(); - (*sync_count) = std::max(1, n_left); - builder->builder_monitor_.Stop("AddHistRows"); -} - -template -void QuantileHistMaker::Builder::SetHistSynchronizer( - HistSynchronizer *sync) { - hist_synchronizer_.reset(sync); -} - -template -void QuantileHistMaker::Builder::SetHistRowsAdder( - HistRowsAdder *adder) { - hist_rows_adder_.reset(adder); -} template template void QuantileHistMaker::Builder::InitRoot( - const GHistIndexMatrix &gmat, const DMatrix &fmat, RegTree *p_tree, - const std::vector &gpair_h, int *num_leaves, - std::vector *expand) { + DMatrix *p_fmat, RegTree *p_tree, const std::vector &gpair_h, + int *num_leaves, std::vector *expand) { CPUExpandEntry node(CPUExpandEntry::kRootNid, p_tree->GetDepth(0), 0.0f); nodes_for_explicit_hist_build_.clear(); nodes_for_subtraction_trick_.clear(); nodes_for_explicit_hist_build_.push_back(node); - int starting_index = std::numeric_limits::max(); - int sync_count = 0; - - hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); - BuildLocalHistograms(gmat, p_tree, gpair_h); - hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); + this->histogram_builder_->BuildHist(p_fmat, p_tree, row_set_collection_, + nodes_for_explicit_hist_build_, + nodes_for_subtraction_trick_, gpair_h); { auto nid = CPUExpandEntry::kRootNid; - GHistRowT hist = hist_[nid]; + GHistRowT hist = this->histogram_builder_->Histogram()[nid]; GradientPairT grad_stat; if (data_layout_ == DataLayout::kDenseDataZeroBased || data_layout_ == DataLayout::kDenseDataOneBased) { + auto const &gmat = *(p_fmat + ->GetBatches(BatchParam{ + GenericParameter::kCpuId, param_.max_bin}) + .begin()); const std::vector &row_ptr = gmat.cut.Ptrs(); const uint32_t ibegin = row_ptr[fid_least_bins_]; const uint32_t iend = row_ptr[fid_least_bins_ + 1]; @@ -329,7 +157,8 @@ void QuantileHistMaker::Builder::InitRoot( for (const size_t *it = e.begin; it < e.end; ++it) { grad_stat.Add(gpair_h[*it].GetGrad(), gpair_h[*it].GetHess()); } - histred_.Allreduce(&grad_stat, 1); + rabit::Allreduce( + reinterpret_cast(&grad_stat), 2); } auto weight = evaluator_->InitRoot(GradStats{grad_stat}); @@ -339,7 +168,10 @@ void QuantileHistMaker::Builder::InitRoot( std::vector entries{node}; builder_monitor_.Start("EvaluateSplits"); - evaluator_->EvaluateSplits(hist_, gmat, *p_tree, &entries); + for (auto const &gmat : p_fmat->GetBatches( + BatchParam{GenericParameter::kCpuId, param_.max_bin})) { + evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat, *p_tree, &entries); + } builder_monitor_.Stop("EvaluateSplits"); node = entries.front(); } @@ -348,46 +180,6 @@ void QuantileHistMaker::Builder::InitRoot( ++(*num_leaves); } -template -template -void QuantileHistMaker::Builder::BuildLocalHistograms( - const GHistIndexMatrix &gmat, - RegTree *p_tree, - const std::vector &gpair_h) { - builder_monitor_.Start("BuildLocalHistograms"); - - const size_t n_nodes = nodes_for_explicit_hist_build_.size(); - - // create space of size (# rows in each node) - common::BlockedSpace2d space(n_nodes, [&](size_t node) { - const int32_t nid = nodes_for_explicit_hist_build_[node].nid; - return row_set_collection_[nid].Size(); - }, 256); - - std::vector target_hists(n_nodes); - for (size_t i = 0; i < n_nodes; ++i) { - const int32_t nid = nodes_for_explicit_hist_build_[i].nid; - target_hists[i] = hist_[nid]; - } - - hist_buffer_.Reset(this->nthread_, n_nodes, space, target_hists); - - // Parallel processing by nodes and data in each node - common::ParallelFor2d(space, this->nthread_, [&](size_t nid_in_set, common::Range1d r) { - const auto tid = static_cast(omp_get_thread_num()); - const int32_t nid = nodes_for_explicit_hist_build_[nid_in_set].nid; - - auto start_of_row_set = row_set_collection_[nid].begin; - auto rid_set = RowSetCollection::Elem(start_of_row_set + r.begin(), - start_of_row_set + r.end(), - nid); - hist_builder_.template BuildHist(gpair_h, rid_set, gmat, - hist_buffer_.GetInitializedHist(tid, nid_in_set)); - }); - - builder_monitor_.Stop("BuildLocalHistograms"); -} - template void QuantileHistMaker::Builder::AddSplitsToTree( const std::vector& expand, @@ -448,10 +240,10 @@ void QuantileHistMaker::Builder::ExpandTree( Driver driver(static_cast(param_.grow_policy)); std::vector expand; - InitRoot(gmat, *p_fmat, p_tree, gpair_h, &num_leaves, &expand); + InitRoot(p_fmat, p_tree, gpair_h, &num_leaves, &expand); driver.Push(expand[0]); - int depth = 0; + int32_t depth = 0; while (!driver.IsEmpty()) { expand = driver.Pop(); depth = expand[0].depth + 1; @@ -463,19 +255,24 @@ void QuantileHistMaker::Builder::ExpandTree( AddSplitsToTree(expand, p_tree, &num_leaves, &nodes_for_apply_split); if (nodes_for_apply_split.size() != 0) { - ApplySplit(nodes_for_apply_split, gmat, column_matrix, hist_, p_tree); + ApplySplit(nodes_for_apply_split, gmat, column_matrix, p_tree); SplitSiblings(nodes_for_apply_split, &nodes_to_evaluate, p_tree); - int starting_index = std::numeric_limits::max(); - int sync_count = 0; - hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); if (depth < param_.max_depth) { - BuildLocalHistograms(gmat, p_tree, gpair_h); - hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); + this->histogram_builder_->BuildHist( + p_fmat, p_tree, row_set_collection_, nodes_for_explicit_hist_build_, + nodes_for_subtraction_trick_, gpair_h); + } else { + int starting_index = std::numeric_limits::max(); + int sync_count = 0; + this->histogram_builder_->AddHistRows( + &starting_index, &sync_count, nodes_for_explicit_hist_build_, + nodes_for_subtraction_trick_, p_tree); } builder_monitor_.Start("EvaluateSplits"); - evaluator_->EvaluateSplits(hist_, gmat, *p_tree, &nodes_to_evaluate); + evaluator_->EvaluateSplits(this->histogram_builder_->Histogram(), gmat, + *p_tree, &nodes_to_evaluate); builder_monitor_.Stop("EvaluateSplits"); for (size_t i = 0; i < nodes_for_apply_split.size(); ++i) { @@ -606,11 +403,10 @@ size_t QuantileHistMaker::Builder::GetNumberOfTrees() { return n_trees_; } -template -void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, - const DMatrix& fmat, - const RegTree& tree, - std::vector* gpair) { +template +void QuantileHistMaker::Builder::InitData( + const GHistIndexMatrix &gmat, const DMatrix &fmat, const RegTree &tree, + std::vector *gpair) { CHECK((param_.max_depth > 0 || param_.max_leaves > 0)) << "max_depth or max_leaves cannot be both 0 (unlimited); " << "at least one should be a positive quantity."; @@ -626,10 +422,6 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& row_set_collection_.Clear(); // initialize histogram collection uint32_t nbins = gmat.cut.Ptrs().back(); - hist_.Init(nbins); - hist_local_worker_.Init(nbins); - hist_buffer_.Init(nbins); - // initialize histogram builder dmlc::OMPException exc; #pragma omp parallel @@ -639,7 +431,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& }); } exc.Rethrow(); - hist_builder_ = GHistBuilder(this->nthread_, nbins); + this->histogram_builder_->Reset(nbins, param_.max_bin, this->nthread_); std::vector& row_indices = *row_set_collection_.Data(); row_indices.resize(info.num_row_); @@ -815,7 +607,6 @@ template void QuantileHistMaker::Builder::ApplySplit(const std::vector nodes, const GHistIndexMatrix& gmat, const ColumnMatrix& column_matrix, - const HistCollection& hist, RegTree* p_tree) { builder_monitor_.Start("ApplySplit"); // 1. Find split condition for each split diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 3c82f57f9..69e42b90d 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -22,6 +22,7 @@ #include "xgboost/json.h" #include "hist/evaluate_splits.h" +#include "hist/histogram.h" #include "constraints.h" #include "./param.h" #include "./driver.h" @@ -88,24 +89,6 @@ using xgboost::common::GHistBuilder; using xgboost::common::ColumnMatrix; using xgboost::common::Column; -template -class HistSynchronizer; - -template -class BatchHistSynchronizer; - -template -class DistributedHistSynchronizer; - -template -class HistRowsAdder; - -template -class BatchHistRowsAdder; - -template -class DistributedHistRowsAdder; - // training parameters specific to this algorithm struct CPUHistMakerTrainParam : public XGBoostParameter { @@ -198,20 +181,6 @@ class QuantileHistMaker: public TreeUpdater { } protected: - template - friend class HistSynchronizer; - template - friend class BatchHistSynchronizer; - template - friend class DistributedHistSynchronizer; - - template - friend class HistRowsAdder; - template - friend class BatchHistRowsAdder; - template - friend class DistributedHistRowsAdder; - CPUHistMakerTrainParam hist_maker_param_; // training parameter TrainParam param_; @@ -230,9 +199,12 @@ class QuantileHistMaker: public TreeUpdater { explicit Builder(const size_t n_trees, const TrainParam ¶m, std::unique_ptr pruner, DMatrix const *fmat) : n_trees_(n_trees), param_(param), pruner_(std::move(pruner)), - p_last_tree_(nullptr), p_last_fmat_(fmat) { + p_last_tree_(nullptr), p_last_fmat_(fmat), + histogram_builder_{ + new HistogramBuilder} { builder_monitor_.Init("Quantile::Builder"); } + ~Builder(); // update one tree, growing virtual void Update(const GHistIndexMatrix& gmat, const ColumnMatrix& column_matrix, @@ -240,28 +212,10 @@ class QuantileHistMaker: public TreeUpdater { DMatrix* p_fmat, RegTree* p_tree); - inline void SubtractionTrick(GHistRowT self, - GHistRowT sibling, - GHistRowT parent) { - builder_monitor_.Start("SubtractionTrick"); - hist_builder_.SubtractionTrick(self, sibling, parent); - builder_monitor_.Stop("SubtractionTrick"); - } - bool UpdatePredictionCache(const DMatrix* data, VectorView out_preds); - void SetHistSynchronizer(HistSynchronizer* sync); - void SetHistRowsAdder(HistRowsAdder* adder); - protected: - friend class HistSynchronizer; - friend class BatchHistSynchronizer; - friend class DistributedHistSynchronizer; - friend class HistRowsAdder; - friend class BatchHistRowsAdder; - friend class DistributedHistRowsAdder; - // initialize temp data structure void InitData(const GHistIndexMatrix& gmat, const DMatrix& fmat, @@ -278,7 +232,6 @@ class QuantileHistMaker: public TreeUpdater { void ApplySplit(std::vector nodes, const GHistIndexMatrix& gmat, const ColumnMatrix& column_matrix, - const HistCollection& hist, RegTree* p_tree); void AddSplitsToRowSet(const std::vector& nodes, RegTree* p_tree); @@ -287,14 +240,8 @@ class QuantileHistMaker: public TreeUpdater { void FindSplitConditions(const std::vector& nodes, const RegTree& tree, const GHistIndexMatrix& gmat, std::vector* split_conditions); - template - void BuildLocalHistograms(const GHistIndexMatrix &gmat, - RegTree *p_tree, - const std::vector &gpair_h); - template - void InitRoot(const GHistIndexMatrix &gmat, - const DMatrix& fmat, + void InitRoot(DMatrix* p_fmat, RegTree *p_tree, const std::vector &gpair_h, int *num_leaves, std::vector *expand); @@ -330,15 +277,11 @@ class QuantileHistMaker: public TreeUpdater { // the internal row sets RowSetCollection row_set_collection_; std::vector gpair_local_; - /*! \brief culmulative histogram of gradients. */ - HistCollection hist_; - /*! \brief culmulative local parent histogram of gradients. */ - HistCollection hist_local_worker_; + /*! \brief feature with least # of bins. to be used for dense specialization of InitNewNode() */ uint32_t fid_least_bins_; - GHistBuilder hist_builder_; std::unique_ptr pruner_; std::unique_ptr> evaluator_; @@ -358,12 +301,10 @@ class QuantileHistMaker: public TreeUpdater { enum class DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData }; DataLayout data_layout_; + std::unique_ptr> + histogram_builder_; common::Monitor builder_monitor_; - common::ParallelGHistBuilder hist_buffer_; - rabit::Reducer histred_; - std::unique_ptr> hist_synchronizer_; - std::unique_ptr> hist_rows_adder_; }; common::Monitor updater_monitor_; @@ -383,71 +324,6 @@ class QuantileHistMaker: public TreeUpdater { std::unique_ptr pruner_; }; - -template -class HistSynchronizer { - public: - using BuilderT = QuantileHistMaker::Builder; - - virtual void SyncHistograms(BuilderT* builder, - int starting_index, - int sync_count, - RegTree *p_tree) = 0; - virtual ~HistSynchronizer() = default; -}; - -template -class BatchHistSynchronizer: public HistSynchronizer { - public: - using BuilderT = QuantileHistMaker::Builder; - void SyncHistograms(BuilderT* builder, - int starting_index, - int sync_count, - RegTree *p_tree) override; -}; - -template -class DistributedHistSynchronizer: public HistSynchronizer { - public: - using BuilderT = QuantileHistMaker::Builder; - - void SyncHistograms(BuilderT* builder, int starting_index, - int sync_count, RegTree *p_tree) override; - - void ParallelSubtractionHist(BuilderT* builder, - const common::BlockedSpace2d& space, - const std::vector& nodes, - const std::vector& subtraction_nodes, - const RegTree * p_tree); -}; - -template -class HistRowsAdder { - public: - using BuilderT = QuantileHistMaker::Builder; - - virtual void AddHistRows(BuilderT* builder, int *starting_index, - int *sync_count, RegTree *p_tree) = 0; - virtual ~HistRowsAdder() = default; -}; - -template -class BatchHistRowsAdder: public HistRowsAdder { - public: - using BuilderT = QuantileHistMaker::Builder; - void AddHistRows(BuilderT*, int *starting_index, - int *sync_count, RegTree *p_tree) override; -}; - -template -class DistributedHistRowsAdder: public HistRowsAdder { - public: - using BuilderT = QuantileHistMaker::Builder; - void AddHistRows(BuilderT*, int *starting_index, - int *sync_count, RegTree *p_tree) override; -}; - - } // namespace tree } // namespace xgboost diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc new file mode 100644 index 000000000..a75ce70d4 --- /dev/null +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -0,0 +1,298 @@ +/*! + * Copyright 2018-2021 by Contributors + */ +#include +#include "../../helpers.h" +#include "../../../../src/tree/hist/histogram.h" +#include "../../../../src/tree/updater_quantile_hist.h" + +namespace xgboost { +namespace tree { +template +void TestAddHistRows(bool is_distributed) { + std::vector nodes_for_explicit_hist_build_; + std::vector nodes_for_subtraction_trick_; + int starting_index = std::numeric_limits::max(); + int sync_count = 0; + + size_t constexpr kNRows = 8, kNCols = 16; + int32_t constexpr kMaxBins = 4; + auto p_fmat = + RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto const &gmat = *(p_fmat + ->GetBatches( + BatchParam{GenericParameter::kCpuId, kMaxBins}) + .begin()); + + RegTree tree; + + tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f); + nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4), 0.0f); + nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5), 0.0f); + nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f); + + HistogramBuilder histogram_builder; + histogram_builder.Reset(gmat.cut.TotalBins(), kMaxBins, omp_get_max_threads(), + is_distributed); + histogram_builder.AddHistRows(&starting_index, &sync_count, + nodes_for_explicit_hist_build_, + nodes_for_subtraction_trick_, &tree); + + ASSERT_EQ(sync_count, 2); + ASSERT_EQ(starting_index, 3); + + for (const CPUExpandEntry &node : nodes_for_explicit_hist_build_) { + ASSERT_EQ(histogram_builder.Histogram().RowExists(node.nid), true); + } + for (const CPUExpandEntry &node : nodes_for_subtraction_trick_) { + ASSERT_EQ(histogram_builder.Histogram().RowExists(node.nid), true); + } +} + + +TEST(CPUHistogram, AddRows) { + TestAddHistRows(true); + TestAddHistRows(true); + + TestAddHistRows(false); + TestAddHistRows(false); +} + +template +void TestSyncHist(bool is_distributed) { + size_t constexpr kNRows = 8, kNCols = 16; + int32_t constexpr kMaxBins = 4; + + std::vector nodes_for_explicit_hist_build_; + std::vector nodes_for_subtraction_trick_; + int starting_index = std::numeric_limits::max(); + int sync_count = 0; + RegTree tree; + + auto p_fmat = + RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto const &gmat = *(p_fmat + ->GetBatches( + BatchParam{GenericParameter::kCpuId, kMaxBins}) + .begin()); + + HistogramBuilder histogram; + uint32_t total_bins = gmat.cut.Ptrs().back(); + histogram.Reset(total_bins, kMaxBins, omp_get_max_threads(), is_distributed); + + RowSetCollection row_set_collection_; + { + row_set_collection_.Clear(); + std::vector &row_indices = *row_set_collection_.Data(); + row_indices.resize(kNRows); + std::iota(row_indices.begin(), row_indices.end(), 0); + row_set_collection_.Init(); + } + + // level 0 + nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0), 0.0f); + histogram.AddHistRows(&starting_index, &sync_count, + nodes_for_explicit_hist_build_, + nodes_for_subtraction_trick_, &tree); + + tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + nodes_for_explicit_hist_build_.clear(); + nodes_for_subtraction_trick_.clear(); + + // level 1 + nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(), + tree.GetDepth(1), 0.0f); + nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(), + tree.GetDepth(2), 0.0f); + + histogram.AddHistRows(&starting_index, &sync_count, + nodes_for_explicit_hist_build_, + nodes_for_subtraction_trick_, &tree); + + tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); + + nodes_for_explicit_hist_build_.clear(); + nodes_for_subtraction_trick_.clear(); + // level 2 + nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f); + nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4), 0.0f); + nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5), 0.0f); + nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f); + + histogram.AddHistRows(&starting_index, &sync_count, + nodes_for_explicit_hist_build_, + nodes_for_subtraction_trick_, &tree); + + const size_t n_nodes = nodes_for_explicit_hist_build_.size(); + ASSERT_EQ(n_nodes, 2ul); + row_set_collection_.AddSplit(0, tree[0].LeftChild(), tree[0].RightChild(), 4, + 4); + row_set_collection_.AddSplit(1, tree[1].LeftChild(), tree[1].RightChild(), 2, + 2); + row_set_collection_.AddSplit(2, tree[2].LeftChild(), tree[2].RightChild(), 2, + 2); + + common::BlockedSpace2d space( + n_nodes, + [&](size_t node) { + const int32_t nid = nodes_for_explicit_hist_build_[node].nid; + return row_set_collection_[nid].Size(); + }, + 256); + + std::vector> target_hists(n_nodes); + for (size_t i = 0; i < nodes_for_explicit_hist_build_.size(); ++i) { + const int32_t nid = nodes_for_explicit_hist_build_[i].nid; + target_hists[i] = histogram.Histogram()[nid]; + } + + // set values to specific nodes hist + std::vector n_ids = {1, 2}; + for (size_t i : n_ids) { + auto this_hist = histogram.Histogram()[i]; + GradientSumT *p_hist = reinterpret_cast(this_hist.data()); + for (size_t bin_id = 0; bin_id < 2 * total_bins; ++bin_id) { + p_hist[bin_id] = 2 * bin_id; + } + } + n_ids[0] = 3; + n_ids[1] = 5; + for (size_t i : n_ids) { + auto this_hist = histogram.Histogram()[i]; + GradientSumT *p_hist = reinterpret_cast(this_hist.data()); + for (size_t bin_id = 0; bin_id < 2 * total_bins; ++bin_id) { + p_hist[bin_id] = bin_id; + } + } + + histogram.Buffer().Reset(1, n_nodes, space, target_hists); + // sync hist + if (is_distributed) { + histogram.SyncHistogramDistributed(&tree, nodes_for_explicit_hist_build_, + nodes_for_subtraction_trick_, + starting_index, sync_count); + } else { + histogram.SyncHistogramLocal(&tree, nodes_for_explicit_hist_build_, + nodes_for_subtraction_trick_, starting_index, + sync_count); + } + + using GHistRowT = common::GHistRow; + auto check_hist = [](const GHistRowT parent, const GHistRowT left, + const GHistRowT right, size_t begin, size_t end) { + const GradientSumT *p_parent = + reinterpret_cast(parent.data()); + const GradientSumT *p_left = + reinterpret_cast(left.data()); + const GradientSumT *p_right = + reinterpret_cast(right.data()); + for (size_t i = 2 * begin; i < 2 * end; ++i) { + ASSERT_EQ(p_parent[i], p_left[i] + p_right[i]); + } + }; + size_t node_id = 0; + for (const CPUExpandEntry &node : nodes_for_explicit_hist_build_) { + auto this_hist = histogram.Histogram()[node.nid]; + const size_t parent_id = tree[node.nid].Parent(); + const size_t subtraction_node_id = + nodes_for_subtraction_trick_[node_id].nid; + auto parent_hist = histogram.Histogram()[parent_id]; + auto sibling_hist = histogram.Histogram()[subtraction_node_id]; + + check_hist(parent_hist, this_hist, sibling_hist, 0, total_bins); + ++node_id; + } + node_id = 0; + for (const CPUExpandEntry &node : nodes_for_subtraction_trick_) { + auto this_hist = histogram.Histogram()[node.nid]; + const size_t parent_id = tree[node.nid].Parent(); + const size_t subtraction_node_id = + nodes_for_explicit_hist_build_[node_id].nid; + auto parent_hist = histogram.Histogram()[parent_id]; + auto sibling_hist = histogram.Histogram()[subtraction_node_id]; + + check_hist(parent_hist, this_hist, sibling_hist, 0, total_bins); + ++node_id; + } +} + +TEST(CPUHistogram, SyncHist) { + TestSyncHist(true); + TestSyncHist(true); + + TestSyncHist(false); + TestSyncHist(false); +} + +template +void TestBuildHistogram(bool is_distributed) { + size_t constexpr kNRows = 8, kNCols = 16; + int32_t constexpr kMaxBins = 4; + auto p_fmat = + RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto const &gmat = *(p_fmat + ->GetBatches( + BatchParam{GenericParameter::kCpuId, kMaxBins}) + .begin()); + uint32_t total_bins = gmat.cut.Ptrs().back(); + + static double constexpr kEps = 1e-6; + std::vector gpair = { + {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f}, + {0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f}}; + + bst_node_t nid = 0; + HistogramBuilder histogram; + histogram.Reset(total_bins, kMaxBins, omp_get_max_threads(), is_distributed); + + RegTree tree; + + RowSetCollection row_set_collection_; + row_set_collection_.Clear(); + std::vector &row_indices = *row_set_collection_.Data(); + row_indices.resize(kNRows); + std::iota(row_indices.begin(), row_indices.end(), 0); + row_set_collection_.Init(); + + CPUExpandEntry node(CPUExpandEntry::kRootNid, tree.GetDepth(0), 0.0f); + std::vector nodes_for_explicit_hist_build_; + nodes_for_explicit_hist_build_.push_back(node); + histogram.BuildHist(p_fmat.get(), &tree, row_set_collection_, + nodes_for_explicit_hist_build_, {}, gpair); + + // Check if number of histogram bins is correct + ASSERT_EQ(histogram.Histogram()[nid].size(), gmat.cut.Ptrs().back()); + std::vector histogram_expected(histogram.Histogram()[nid].size()); + + // Compute the correct histogram (histogram_expected) + CHECK_EQ(gpair.size(), kNRows); + for (size_t rid = 0; rid < kNRows; ++rid) { + const size_t ibegin = gmat.row_ptr[rid]; + const size_t iend = gmat.row_ptr[rid + 1]; + for (size_t i = ibegin; i < iend; ++i) { + const size_t bin_id = gmat.index[i]; + histogram_expected[bin_id] += GradientPairPrecise(gpair[rid]); + } + } + + // Now validate the computed histogram returned by BuildHist + for (size_t i = 0; i < histogram.Histogram()[nid].size(); ++i) { + GradientPairPrecise sol = histogram_expected[i]; + ASSERT_NEAR(sol.GetGrad(), histogram.Histogram()[nid][i].GetGrad(), kEps); + ASSERT_NEAR(sol.GetHess(), histogram.Histogram()[nid][i].GetHess(), kEps); + } +} + +TEST(CPUHistogram, BuildHist) { + TestBuildHistogram(true); + TestBuildHistogram(true); + + TestBuildHistogram(false); + TestBuildHistogram(false); +} +} // namespace tree +} // namespace xgboost diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index decde1db1..938205aae 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -151,188 +151,6 @@ class QuantileHistMock : public QuantileHistMaker { omp_set_num_threads(nthreads); } - void TestAddHistRows(const GHistIndexMatrix& gmat, - std::vector* gpair, - DMatrix* p_fmat, - RegTree* tree) { - RealImpl::InitData(gmat, *p_fmat, *tree, gpair); - - int starting_index = std::numeric_limits::max(); - int sync_count = 0; - this->nodes_for_explicit_hist_build_.clear(); - this->nodes_for_subtraction_trick_.clear(); - - tree->ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); - tree->ExpandNode((*tree)[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); - tree->ExpandNode((*tree)[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); - this->nodes_for_explicit_hist_build_.emplace_back(3, tree->GetDepth(3), 0.0f); - this->nodes_for_explicit_hist_build_.emplace_back(4, tree->GetDepth(4), 0.0f); - this->nodes_for_subtraction_trick_.emplace_back(5, tree->GetDepth(5), 0.0f); - this->nodes_for_subtraction_trick_.emplace_back(6, tree->GetDepth(6), 0.0f); - - this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); - ASSERT_EQ(sync_count, 2); - ASSERT_EQ(starting_index, 3); - - for (const CPUExpandEntry& node : this->nodes_for_explicit_hist_build_) { - ASSERT_EQ(this->hist_.RowExists(node.nid), true); - } - for (const CPUExpandEntry& node : this->nodes_for_subtraction_trick_) { - ASSERT_EQ(this->hist_.RowExists(node.nid), true); - } - } - - - void TestSyncHistograms(const GHistIndexMatrix& gmat, - std::vector* gpair, - DMatrix* p_fmat, - RegTree* tree) { - // init - RealImpl::InitData(gmat, *p_fmat, *tree, gpair); - - int starting_index = std::numeric_limits::max(); - int sync_count = 0; - this->nodes_for_explicit_hist_build_.clear(); - this->nodes_for_subtraction_trick_.clear(); - // level 0 - this->nodes_for_explicit_hist_build_.emplace_back(0, tree->GetDepth(0), 0.0f); - this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); - tree->ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); - - this->nodes_for_explicit_hist_build_.clear(); - this->nodes_for_subtraction_trick_.clear(); - // level 1 - this->nodes_for_explicit_hist_build_.emplace_back((*tree)[0].LeftChild(), - tree->GetDepth(1), 0.0f); - this->nodes_for_subtraction_trick_.emplace_back((*tree)[0].RightChild(), - tree->GetDepth(2), 0.0f); - this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); - tree->ExpandNode((*tree)[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); - tree->ExpandNode((*tree)[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); - - this->nodes_for_explicit_hist_build_.clear(); - this->nodes_for_subtraction_trick_.clear(); - // level 2 - this->nodes_for_explicit_hist_build_.emplace_back(3, tree->GetDepth(3), 0.0f); - this->nodes_for_subtraction_trick_.emplace_back(4, tree->GetDepth(4), 0.0f); - this->nodes_for_explicit_hist_build_.emplace_back(5, tree->GetDepth(5), 0.0f); - this->nodes_for_subtraction_trick_.emplace_back(6, tree->GetDepth(6), 0.0f); - this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); - - const size_t n_nodes = this->nodes_for_explicit_hist_build_.size(); - ASSERT_EQ(n_nodes, 2ul); - this->row_set_collection_.AddSplit(0, (*tree)[0].LeftChild(), - (*tree)[0].RightChild(), 4, 4); - this->row_set_collection_.AddSplit(1, (*tree)[1].LeftChild(), - (*tree)[1].RightChild(), 2, 2); - this->row_set_collection_.AddSplit(2, (*tree)[2].LeftChild(), - (*tree)[2].RightChild(), 2, 2); - - common::BlockedSpace2d space(n_nodes, [&](size_t node) { - const int32_t nid = this->nodes_for_explicit_hist_build_[node].nid; - return this->row_set_collection_[nid].Size(); - }, 256); - - std::vector target_hists(n_nodes); - for (size_t i = 0; i < this->nodes_for_explicit_hist_build_.size(); ++i) { - const int32_t nid = this->nodes_for_explicit_hist_build_[i].nid; - target_hists[i] = this->hist_[nid]; - } - - const size_t nbins = this->hist_builder_.GetNumBins(); - // set values to specific nodes hist - std::vector n_ids = {1, 2}; - for (size_t i : n_ids) { - auto this_hist = this->hist_[i]; - GradientSumT* p_hist = reinterpret_cast(this_hist.data()); - for (size_t bin_id = 0; bin_id < 2*nbins; ++bin_id) { - p_hist[bin_id] = 2*bin_id; - } - } - n_ids[0] = 3; - n_ids[1] = 5; - for (size_t i : n_ids) { - auto this_hist = this->hist_[i]; - GradientSumT* p_hist = reinterpret_cast(this_hist.data()); - for (size_t bin_id = 0; bin_id < 2*nbins; ++bin_id) { - p_hist[bin_id] = bin_id; - } - } - - this->hist_buffer_.Reset(1, n_nodes, space, target_hists); - // sync hist - this->hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, tree); - - auto check_hist = [] (const GHistRowT parent, const GHistRowT left, - const GHistRowT right, size_t begin, size_t end) { - const GradientSumT* p_parent = reinterpret_cast(parent.data()); - const GradientSumT* p_left = reinterpret_cast(left.data()); - const GradientSumT* p_right = reinterpret_cast(right.data()); - for (size_t i = 2 * begin; i < 2 * end; ++i) { - ASSERT_EQ(p_parent[i], p_left[i] + p_right[i]); - } - }; - size_t node_id = 0; - for (const CPUExpandEntry& node : this->nodes_for_explicit_hist_build_) { - auto this_hist = this->hist_[node.nid]; - const size_t parent_id = (*tree)[node.nid].Parent(); - const size_t subtraction_node_id = this->nodes_for_subtraction_trick_[node_id].nid; - auto parent_hist = this->hist_[parent_id]; - auto sibling_hist = this->hist_[subtraction_node_id]; - - check_hist(parent_hist, this_hist, sibling_hist, 0, nbins); - ++node_id; - } - node_id = 0; - for (const CPUExpandEntry& node : this->nodes_for_subtraction_trick_) { - auto this_hist = this->hist_[node.nid]; - const size_t parent_id = (*tree)[node.nid].Parent(); - const size_t subtraction_node_id = this->nodes_for_explicit_hist_build_[node_id].nid; - auto parent_hist = this->hist_[parent_id]; - auto sibling_hist = this->hist_[subtraction_node_id]; - - check_hist(parent_hist, this_hist, sibling_hist, 0, nbins); - ++node_id; - } - } - - void TestBuildHist(int nid, - const GHistIndexMatrix& gmat, - const DMatrix& fmat, - const RegTree& tree) { - std::vector gpair = - { {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f}, - {0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} }; - RealImpl::InitData(gmat, fmat, tree, &gpair); - this->hist_.AddHistRow(nid); - this->hist_.AllocateAllData(); - this->hist_builder_.template BuildHist(gpair, this->row_set_collection_[nid], - gmat, this->hist_[nid]); - - // Check if number of histogram bins is correct - ASSERT_EQ(this->hist_[nid].size(), gmat.cut.Ptrs().back()); - std::vector histogram_expected(this->hist_[nid].size()); - - // Compute the correct histogram (histogram_expected) - const size_t num_row = fmat.Info().num_row_; - CHECK_EQ(gpair.size(), num_row); - for (size_t rid = 0; rid < num_row; ++rid) { - const size_t ibegin = gmat.row_ptr[rid]; - const size_t iend = gmat.row_ptr[rid + 1]; - for (size_t i = ibegin; i < iend; ++i) { - const size_t bin_id = gmat.index[i]; - histogram_expected[bin_id] += GradientPairPrecise(gpair[rid]); - } - } - - // Now validate the computed histogram returned by BuildHist - for (size_t i = 0; i < this->hist_[nid].size(); ++i) { - GradientPairPrecise sol = histogram_expected[i]; - ASSERT_NEAR(sol.GetGrad(), this->hist_[nid][i].GetGrad(), kEps); - ASSERT_NEAR(sol.GetHess(), this->hist_[nid][i].GetHess(), kEps); - } - } - void TestApplySplit(const RegTree& tree) { std::vector row_gpairs = { {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f}, @@ -350,9 +168,6 @@ class QuantileHistMock : public QuantileHistMaker { // treat everything as dense, as this is what we intend to test here cm.Init(gmat, 0.0); RealImpl::InitData(gmat, *dmat, tree, &row_gpairs); - this->hist_.AddHistRow(0); - this->hist_.AllocateAllData(); - const size_t num_row = dmat->Info().num_row_; // split by feature 0 const size_t bin_id_min = gmat.cut.Ptrs()[0]; @@ -424,26 +239,12 @@ class QuantileHistMock : public QuantileHistMaker { param_, std::move(pruner_), dmat_.get())); - if (batch) { - float_builder_->SetHistSynchronizer(new BatchHistSynchronizer()); - float_builder_->SetHistRowsAdder(new BatchHistRowsAdder()); - } else { - float_builder_->SetHistSynchronizer(new DistributedHistSynchronizer()); - float_builder_->SetHistRowsAdder(new DistributedHistRowsAdder()); - } } else { double_builder_.reset( new BuilderMock( param_, std::move(pruner_), dmat_.get())); - if (batch) { - double_builder_->SetHistSynchronizer(new BatchHistSynchronizer()); - double_builder_->SetHistRowsAdder(new BatchHistRowsAdder()); - } else { - double_builder_->SetHistSynchronizer(new DistributedHistSynchronizer()); - double_builder_->SetHistRowsAdder(new DistributedHistRowsAdder()); - } } } ~QuantileHistMock() override = default; @@ -484,52 +285,6 @@ class QuantileHistMock : public QuantileHistMaker { } } - void TestAddHistRows() { - size_t constexpr kMaxBins = 4; - GHistIndexMatrix gmat(dmat_.get(), kMaxBins); - - RegTree tree = RegTree(); - tree.param.UpdateAllowUnknown(cfg_); - std::vector gpair = - { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, - {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; - if (double_builder_) { - double_builder_->TestAddHistRows(gmat, &gpair, dmat_.get(), &tree); - } else { - float_builder_->TestAddHistRows(gmat, &gpair, dmat_.get(), &tree); - } - } - - void TestSyncHistograms() { - size_t constexpr kMaxBins = 4; - GHistIndexMatrix gmat(dmat_.get(), kMaxBins); - - RegTree tree = RegTree(); - tree.param.UpdateAllowUnknown(cfg_); - std::vector gpair = - { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, - {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; - if (double_builder_) { - double_builder_->TestSyncHistograms(gmat, &gpair, dmat_.get(), &tree); - } else { - float_builder_->TestSyncHistograms(gmat, &gpair, dmat_.get(), &tree); - } - } - - - void TestBuildHist() { - RegTree tree = RegTree(); - tree.param.UpdateAllowUnknown(cfg_); - - size_t constexpr kMaxBins = 4; - GHistIndexMatrix gmat(dmat_.get(), kMaxBins); - if (double_builder_) { - double_builder_->TestBuildHist(0, gmat, *dmat_, tree); - } else { - float_builder_->TestBuildHist(0, gmat, *dmat_, tree); - } - } - void TestApplySplit() { RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); @@ -563,57 +318,6 @@ TEST(QuantileHist, InitDataSampling) { maker_float.TestInitDataSampling(); } -TEST(QuantileHist, AddHistRows) { - std::vector> cfg - {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; - QuantileHistMock maker(cfg); - maker.TestAddHistRows(); - const bool single_precision_histogram = true; - QuantileHistMock maker_float(cfg, single_precision_histogram); - maker_float.TestAddHistRows(); -} - -TEST(QuantileHist, SyncHistograms) { - std::vector> cfg - {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; - QuantileHistMock maker(cfg); - maker.TestSyncHistograms(); - const bool single_precision_histogram = true; - QuantileHistMock maker_float(cfg, single_precision_histogram); - maker_float.TestSyncHistograms(); -} - -TEST(QuantileHist, DistributedAddHistRows) { - std::vector> cfg - {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; - QuantileHistMock maker(cfg, false); - maker.TestAddHistRows(); - const bool single_precision_histogram = true; - QuantileHistMock maker_float(cfg, single_precision_histogram); - maker_float.TestAddHistRows(); -} - -TEST(QuantileHist, DistributedSyncHistograms) { - std::vector> cfg - {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; - QuantileHistMock maker(cfg, false); - maker.TestSyncHistograms(); - const bool single_precision_histogram = true; - QuantileHistMock maker_float(cfg, single_precision_histogram); - maker_float.TestSyncHistograms(); -} - -TEST(QuantileHist, BuildHist) { - // Don't enable feature grouping - std::vector> cfg - {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; - QuantileHistMock maker(cfg); - maker.TestBuildHist(); - const bool single_precision_histogram = true; - QuantileHistMock maker_float(cfg, single_precision_histogram); - maker_float.TestBuildHist(); -} - TEST(QuantileHist, ApplySplit) { std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())},