diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 140604efb..f06cc76c3 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -92,7 +92,7 @@ void QuantileHistMaker::Builder::SyncHistograms( int starting_index, int sync_count, RegTree *p_tree) { - perf_monitor.TickStart(); + builder_monitor_.Start("SyncHistograms"); this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count); // use Subtraction Trick for (auto const& node_pair : nodes_for_subtraction_trick_) { @@ -100,7 +100,7 @@ void QuantileHistMaker::Builder::SyncHistograms( SubtractionTrick(hist_[node_pair.first], hist_[node_pair.second], hist_[(*p_tree)[node_pair.first].Parent()]); } - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST); + builder_monitor_.Stop("SyncHistograms"); } void QuantileHistMaker::Builder::BuildLocalHistograms( @@ -110,7 +110,7 @@ void QuantileHistMaker::Builder::BuildLocalHistograms( const GHistIndexBlockMatrix &gmatb, RegTree *p_tree, const std::vector &gpair_h) { - perf_monitor.TickStart(); + builder_monitor_.Start("BuildLocalHistograms"); for (auto const& entry : qexpand_depth_wise_) { int nid = entry.nid; RegTree::Node &node = (*p_tree)[nid]; @@ -150,7 +150,7 @@ void QuantileHistMaker::Builder::BuildLocalHistograms( } } } - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST); + builder_monitor_.Stop("BuildLocalHistograms"); } void QuantileHistMaker::Builder::BuildNodeStats( @@ -158,7 +158,7 @@ void QuantileHistMaker::Builder::BuildNodeStats( DMatrix *p_fmat, RegTree *p_tree, const std::vector &gpair_h) { - perf_monitor.TickStart(); + builder_monitor_.Start("BuildNodeStats"); for (auto const& entry : qexpand_depth_wise_) { int nid = entry.nid; this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree); @@ -172,7 +172,7 @@ void QuantileHistMaker::Builder::BuildNodeStats( snode_[left_sibling_id].weight, snode_[nid].weight); } } - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE); + builder_monitor_.Stop("BuildNodeStats"); } void QuantileHistMaker::Builder::EvaluateSplits( @@ -186,17 +186,13 @@ void QuantileHistMaker::Builder::EvaluateSplits( std::vector *temp_qexpand_depth) { for (auto const& entry : qexpand_depth_wise_) { int nid = entry.nid; - perf_monitor.TickStart(); this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree); - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT); if (snode_[nid].best.loss_chg < kRtEps || (param_.max_depth > 0 && depth == param_.max_depth) || (param_.max_leaves > 0 && (*num_leaves) == param_.max_leaves)) { (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); } else { - perf_monitor.TickStart(); this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree); - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT); int left_id = (*p_tree)[nid].LeftChild(); int right_id = (*p_tree)[nid].RightChild(); temp_qexpand_depth->push_back(ExpandEntry(left_id, @@ -255,18 +251,12 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( int num_leaves = 0; for (int nid = 0; nid < p_tree->param.num_roots; ++nid) { - perf_monitor.TickStart(); hist_.AddHistRow(nid); BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid], true); - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST); - perf_monitor.TickStart(); this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree); - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE); - perf_monitor.TickStart(); this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree); - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT); qexpand_loss_guided_->push(ExpandEntry(nid, p_tree->GetDepth(nid), snode_[nid].best.loss_chg, timestamp++)); @@ -282,16 +272,13 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( || (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) { (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); } else { - perf_monitor.TickStart(); this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree); - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT); const int cleft = (*p_tree)[nid].LeftChild(); const int cright = (*p_tree)[nid].RightChild(); hist_.AddHistRow(cleft); hist_.AddHistRow(cright); - perf_monitor.TickStart(); if (rabit::IsDistributed()) { // in distributed mode, we need to keep consistent across workers BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, hist_[cleft], true); @@ -305,20 +292,15 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]); } } - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST); - perf_monitor.TickStart(); this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree); this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree); bst_uint featureid = snode_[nid].best.SplitIndex(); spliteval_->AddSplit(nid, cleft, cright, featureid, snode_[cleft].weight, snode_[cright].weight); - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE); - perf_monitor.TickStart(); this->EvaluateSplit(cleft, gmat, hist_, *p_fmat, *p_tree); this->EvaluateSplit(cright, gmat, hist_, *p_fmat, *p_tree); - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT); qexpand_loss_guided_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft), snode_[cleft].best.loss_chg, @@ -338,15 +320,13 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, HostDeviceVector* gpair, DMatrix* p_fmat, RegTree* p_tree) { - perf_monitor.StartPerfMonitor(); + builder_monitor_.Start("Update"); const std::vector& gpair_h = gpair->ConstHostVector(); spliteval_->Reset(); - perf_monitor.TickStart(); this->InitData(gmat, gpair_h, *p_fmat, *p_tree); - perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_DATA); if (param_.grow_policy == TrainParam::kLossGuide) { ExpandWithLossGuide(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h); @@ -362,7 +342,7 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, pruner_->Update(gpair, p_fmat, std::vector{p_tree}); - perf_monitor.EndPerfMonitor(); + builder_monitor_.Stop("Update"); } bool QuantileHistMaker::Builder::UpdatePredictionCache( @@ -419,6 +399,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, CHECK(param_.max_depth > 0) << "max_depth cannot be 0 (unlimited) " << "when grow_policy is depthwise."; } + builder_monitor_.Start("InitData"); const auto& info = fmat.Info(); { @@ -519,6 +500,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, qexpand_depth_wise_.clear(); } } + builder_monitor_.Stop("InitData"); } void QuantileHistMaker::Builder::EvaluateSplit(const int nid, @@ -526,6 +508,7 @@ void QuantileHistMaker::Builder::EvaluateSplit(const int nid, const HistCollection& hist, const DMatrix& fmat, const RegTree& tree) { + builder_monitor_.Start("EvaluateSplit"); // start enumeration const MetaInfo& info = fmat.Info(); auto p_feature_set = column_sampler_.GetFeatureSet(tree.GetDepth(nid)); @@ -550,6 +533,7 @@ void QuantileHistMaker::Builder::EvaluateSplit(const int nid, for (unsigned tid = 0; tid < nthread; ++tid) { snode_[nid].best.Update(best_split_tloc_[tid]); } + builder_monitor_.Stop("EvaluateSplit"); } void QuantileHistMaker::Builder::ApplySplit(int nid, @@ -558,6 +542,7 @@ void QuantileHistMaker::Builder::ApplySplit(int nid, const HistCollection& hist, const DMatrix& fmat, RegTree* p_tree) { + builder_monitor_.Start("ApplySplit"); // TODO(hcho3): support feature sampling by levels /* 1. Create child nodes */ @@ -606,6 +591,7 @@ void QuantileHistMaker::Builder::ApplySplit(int nid, row_set_collection_.AddSplit( nid, row_split_tloc_, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild()); + builder_monitor_.Stop("ApplySplit"); } void QuantileHistMaker::Builder::ApplySplitDenseData( @@ -744,6 +730,7 @@ void QuantileHistMaker::Builder::InitNewNode(int nid, const std::vector& gpair, const DMatrix& fmat, const RegTree& tree) { + builder_monitor_.Start("InitNewNode"); { snode_.resize(tree.param.num_nodes, NodeEntry(param_)); } @@ -786,6 +773,7 @@ void QuantileHistMaker::Builder::InitNewNode(int nid, snode_[nid].root_gain = static_cast( spliteval_->ComputeScore(parentid, snode_[nid].stats, snode_[nid].weight)); } + builder_monitor_.Stop("InitNewNode"); } // enumerate the split values of specific feature diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index c78dcad90..7f346fe58 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -22,6 +22,7 @@ #include "./param.h" #include "./split_evaluator.h" #include "../common/random.h" +#include "../common/timer.h" #include "../common/hist_util.h" #include "../common/row_set.h" #include "../common/column_matrix.h" @@ -87,7 +88,9 @@ class QuantileHistMaker: public TreeUpdater { std::unique_ptr spliteval) : param_(param), pruner_(std::move(pruner)), spliteval_(std::move(spliteval)), p_last_tree_(nullptr), - p_last_fmat_(nullptr) {} + p_last_fmat_(nullptr) { + builder_monitor_.Init("Quantile::Builder"); + } // update one tree, growing virtual void Update(const GHistIndexMatrix& gmat, const GHistIndexBlockMatrix& gmatb, @@ -102,6 +105,7 @@ class QuantileHistMaker: public TreeUpdater { const GHistIndexBlockMatrix& gmatb, GHistRow hist, bool sync_hist) { + builder_monitor_.Start("BuildHist"); if (param_.enable_feature_grouping > 0) { hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist); } else { @@ -110,10 +114,13 @@ class QuantileHistMaker: public TreeUpdater { if (sync_hist) { this->histred_.Allreduce(hist.data(), hist_builder_.GetNumBins()); } + builder_monitor_.Stop("BuildHist"); } inline void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) { + builder_monitor_.Start("SubtractionTrick"); hist_builder_.SubtractionTrick(self, sibling, parent); + builder_monitor_.Stop("SubtractionTrick"); } bool UpdatePredictionCache(const DMatrix* data, @@ -130,84 +137,6 @@ class QuantileHistMaker: public TreeUpdater { : nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {} }; - struct TreeGrowingPerfMonitor { - enum timer_name {INIT_DATA, INIT_NEW_NODE, BUILD_HIST, EVALUATE_SPLIT, APPLY_SPLIT}; - - double global_start; - - // performance counters - double tstart; - double time_init_data = 0; - double time_init_new_node = 0; - double time_build_hist = 0; - double time_evaluate_split = 0; - double time_apply_split = 0; - - inline void StartPerfMonitor() { - global_start = dmlc::GetTime(); - } - - inline void EndPerfMonitor() { - CHECK_GT(global_start, 0); - double total_time = dmlc::GetTime() - global_start; - LOG(INFO) << "\nInitData: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_init_data - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_init_data / total_time * 100 << "%)\n" - << "InitNewNode: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_init_new_node - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_init_new_node / total_time * 100 << "%)\n" - << "BuildHist: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_build_hist - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_build_hist / total_time * 100 << "%)\n" - << "EvaluateSplit: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_evaluate_split - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_evaluate_split / total_time * 100 << "%)\n" - << "ApplySplit: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_apply_split - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_apply_split / total_time * 100 << "%)\n" - << "========================================\n" - << "Total: " - << std::fixed << std::setw(6) << std::setprecision(4) << total_time; - // clear performance counters - time_init_data = 0; - time_init_new_node = 0; - time_build_hist = 0; - time_evaluate_split = 0; - time_apply_split = 0; - } - - inline void TickStart() { - tstart = dmlc::GetTime(); - } - - inline void UpdatePerfTimer(const timer_name &timer_name) { - CHECK_GT(tstart, 0); - switch (timer_name) { - case INIT_DATA: - time_init_data += dmlc::GetTime() - tstart; - break; - case INIT_NEW_NODE: - time_init_new_node += dmlc::GetTime() - tstart; - break; - case BUILD_HIST: - time_build_hist += dmlc::GetTime() - tstart; - break; - case EVALUATE_SPLIT: - time_evaluate_split += dmlc::GetTime() - tstart; - break; - case APPLY_SPLIT: - time_apply_split += dmlc::GetTime() - tstart; - break; - } - tstart = -1; - } - }; - // initialize temp data structure void InitData(const GHistIndexMatrix& gmat, const std::vector& gpair, @@ -347,7 +276,7 @@ class QuantileHistMaker: public TreeUpdater { enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData }; DataLayout data_layout_; - TreeGrowingPerfMonitor perf_monitor; + common::Monitor builder_monitor_; rabit::Reducer histred_; };