Use Monitor in quantile hist. (#4273)

This commit is contained in:
Jiaming Yuan 2019-03-20 09:26:22 +08:00 committed by GitHub
parent 00465d243d
commit 09bd9e68cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 108 deletions

View File

@ -92,7 +92,7 @@ void QuantileHistMaker::Builder::SyncHistograms(
int starting_index,
int sync_count,
RegTree *p_tree) {
perf_monitor.TickStart();
builder_monitor_.Start("SyncHistograms");
this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count);
// use Subtraction Trick
for (auto const& node_pair : nodes_for_subtraction_trick_) {
@ -100,7 +100,7 @@ void QuantileHistMaker::Builder::SyncHistograms(
SubtractionTrick(hist_[node_pair.first], hist_[node_pair.second],
hist_[(*p_tree)[node_pair.first].Parent()]);
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
builder_monitor_.Stop("SyncHistograms");
}
void QuantileHistMaker::Builder::BuildLocalHistograms(
@ -110,7 +110,7 @@ void QuantileHistMaker::Builder::BuildLocalHistograms(
const GHistIndexBlockMatrix &gmatb,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h) {
perf_monitor.TickStart();
builder_monitor_.Start("BuildLocalHistograms");
for (auto const& entry : qexpand_depth_wise_) {
int nid = entry.nid;
RegTree::Node &node = (*p_tree)[nid];
@ -150,7 +150,7 @@ void QuantileHistMaker::Builder::BuildLocalHistograms(
}
}
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
builder_monitor_.Stop("BuildLocalHistograms");
}
void QuantileHistMaker::Builder::BuildNodeStats(
@ -158,7 +158,7 @@ void QuantileHistMaker::Builder::BuildNodeStats(
DMatrix *p_fmat,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h) {
perf_monitor.TickStart();
builder_monitor_.Start("BuildNodeStats");
for (auto const& entry : qexpand_depth_wise_) {
int nid = entry.nid;
this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree);
@ -172,7 +172,7 @@ void QuantileHistMaker::Builder::BuildNodeStats(
snode_[left_sibling_id].weight, snode_[nid].weight);
}
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE);
builder_monitor_.Stop("BuildNodeStats");
}
void QuantileHistMaker::Builder::EvaluateSplits(
@ -186,17 +186,13 @@ void QuantileHistMaker::Builder::EvaluateSplits(
std::vector<ExpandEntry> *temp_qexpand_depth) {
for (auto const& entry : qexpand_depth_wise_) {
int nid = entry.nid;
perf_monitor.TickStart();
this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);
if (snode_[nid].best.loss_chg < kRtEps ||
(param_.max_depth > 0 && depth == param_.max_depth) ||
(param_.max_leaves > 0 && (*num_leaves) == param_.max_leaves)) {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} else {
perf_monitor.TickStart();
this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT);
int left_id = (*p_tree)[nid].LeftChild();
int right_id = (*p_tree)[nid].RightChild();
temp_qexpand_depth->push_back(ExpandEntry(left_id,
@ -255,18 +251,12 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide(
int num_leaves = 0;
for (int nid = 0; nid < p_tree->param.num_roots; ++nid) {
perf_monitor.TickStart();
hist_.AddHistRow(nid);
BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid], true);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
perf_monitor.TickStart();
this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE);
perf_monitor.TickStart();
this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);
qexpand_loss_guided_->push(ExpandEntry(nid, p_tree->GetDepth(nid),
snode_[nid].best.loss_chg,
timestamp++));
@ -282,16 +272,13 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide(
|| (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} else {
perf_monitor.TickStart();
this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT);
const int cleft = (*p_tree)[nid].LeftChild();
const int cright = (*p_tree)[nid].RightChild();
hist_.AddHistRow(cleft);
hist_.AddHistRow(cright);
perf_monitor.TickStart();
if (rabit::IsDistributed()) {
// in distributed mode, we need to keep consistent across workers
BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, hist_[cleft], true);
@ -305,20 +292,15 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide(
SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]);
}
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
perf_monitor.TickStart();
this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree);
this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree);
bst_uint featureid = snode_[nid].best.SplitIndex();
spliteval_->AddSplit(nid, cleft, cright, featureid,
snode_[cleft].weight, snode_[cright].weight);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE);
perf_monitor.TickStart();
this->EvaluateSplit(cleft, gmat, hist_, *p_fmat, *p_tree);
this->EvaluateSplit(cright, gmat, hist_, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);
qexpand_loss_guided_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft),
snode_[cleft].best.loss_chg,
@ -338,15 +320,13 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat,
HostDeviceVector<GradientPair>* gpair,
DMatrix* p_fmat,
RegTree* p_tree) {
perf_monitor.StartPerfMonitor();
builder_monitor_.Start("Update");
const std::vector<GradientPair>& gpair_h = gpair->ConstHostVector();
spliteval_->Reset();
perf_monitor.TickStart();
this->InitData(gmat, gpair_h, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_DATA);
if (param_.grow_policy == TrainParam::kLossGuide) {
ExpandWithLossGuide(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h);
@ -362,7 +342,7 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat,
pruner_->Update(gpair, p_fmat, std::vector<RegTree*>{p_tree});
perf_monitor.EndPerfMonitor();
builder_monitor_.Stop("Update");
}
bool QuantileHistMaker::Builder::UpdatePredictionCache(
@ -419,6 +399,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
CHECK(param_.max_depth > 0) << "max_depth cannot be 0 (unlimited) "
<< "when grow_policy is depthwise.";
}
builder_monitor_.Start("InitData");
const auto& info = fmat.Info();
{
@ -519,6 +500,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
qexpand_depth_wise_.clear();
}
}
builder_monitor_.Stop("InitData");
}
void QuantileHistMaker::Builder::EvaluateSplit(const int nid,
@ -526,6 +508,7 @@ void QuantileHistMaker::Builder::EvaluateSplit(const int nid,
const HistCollection& hist,
const DMatrix& fmat,
const RegTree& tree) {
builder_monitor_.Start("EvaluateSplit");
// start enumeration
const MetaInfo& info = fmat.Info();
auto p_feature_set = column_sampler_.GetFeatureSet(tree.GetDepth(nid));
@ -550,6 +533,7 @@ void QuantileHistMaker::Builder::EvaluateSplit(const int nid,
for (unsigned tid = 0; tid < nthread; ++tid) {
snode_[nid].best.Update(best_split_tloc_[tid]);
}
builder_monitor_.Stop("EvaluateSplit");
}
void QuantileHistMaker::Builder::ApplySplit(int nid,
@ -558,6 +542,7 @@ void QuantileHistMaker::Builder::ApplySplit(int nid,
const HistCollection& hist,
const DMatrix& fmat,
RegTree* p_tree) {
builder_monitor_.Start("ApplySplit");
// TODO(hcho3): support feature sampling by levels
/* 1. Create child nodes */
@ -606,6 +591,7 @@ void QuantileHistMaker::Builder::ApplySplit(int nid,
row_set_collection_.AddSplit(
nid, row_split_tloc_, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild());
builder_monitor_.Stop("ApplySplit");
}
void QuantileHistMaker::Builder::ApplySplitDenseData(
@ -744,6 +730,7 @@ void QuantileHistMaker::Builder::InitNewNode(int nid,
const std::vector<GradientPair>& gpair,
const DMatrix& fmat,
const RegTree& tree) {
builder_monitor_.Start("InitNewNode");
{
snode_.resize(tree.param.num_nodes, NodeEntry(param_));
}
@ -786,6 +773,7 @@ void QuantileHistMaker::Builder::InitNewNode(int nid,
snode_[nid].root_gain = static_cast<float>(
spliteval_->ComputeScore(parentid, snode_[nid].stats, snode_[nid].weight));
}
builder_monitor_.Stop("InitNewNode");
}
// enumerate the split values of specific feature

View File

@ -22,6 +22,7 @@
#include "./param.h"
#include "./split_evaluator.h"
#include "../common/random.h"
#include "../common/timer.h"
#include "../common/hist_util.h"
#include "../common/row_set.h"
#include "../common/column_matrix.h"
@ -87,7 +88,9 @@ class QuantileHistMaker: public TreeUpdater {
std::unique_ptr<SplitEvaluator> spliteval)
: param_(param), pruner_(std::move(pruner)),
spliteval_(std::move(spliteval)), p_last_tree_(nullptr),
p_last_fmat_(nullptr) {}
p_last_fmat_(nullptr) {
builder_monitor_.Init("Quantile::Builder");
}
// update one tree, growing
virtual void Update(const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb,
@ -102,6 +105,7 @@ class QuantileHistMaker: public TreeUpdater {
const GHistIndexBlockMatrix& gmatb,
GHistRow hist,
bool sync_hist) {
builder_monitor_.Start("BuildHist");
if (param_.enable_feature_grouping > 0) {
hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist);
} else {
@ -110,10 +114,13 @@ class QuantileHistMaker: public TreeUpdater {
if (sync_hist) {
this->histred_.Allreduce(hist.data(), hist_builder_.GetNumBins());
}
builder_monitor_.Stop("BuildHist");
}
inline void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) {
builder_monitor_.Start("SubtractionTrick");
hist_builder_.SubtractionTrick(self, sibling, parent);
builder_monitor_.Stop("SubtractionTrick");
}
bool UpdatePredictionCache(const DMatrix* data,
@ -130,84 +137,6 @@ class QuantileHistMaker: public TreeUpdater {
: nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {}
};
struct TreeGrowingPerfMonitor {
enum timer_name {INIT_DATA, INIT_NEW_NODE, BUILD_HIST, EVALUATE_SPLIT, APPLY_SPLIT};
double global_start;
// performance counters
double tstart;
double time_init_data = 0;
double time_init_new_node = 0;
double time_build_hist = 0;
double time_evaluate_split = 0;
double time_apply_split = 0;
inline void StartPerfMonitor() {
global_start = dmlc::GetTime();
}
inline void EndPerfMonitor() {
CHECK_GT(global_start, 0);
double total_time = dmlc::GetTime() - global_start;
LOG(INFO) << "\nInitData: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_data
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_init_data / total_time * 100 << "%)\n"
<< "InitNewNode: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_new_node
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_init_new_node / total_time * 100 << "%)\n"
<< "BuildHist: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_build_hist
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_build_hist / total_time * 100 << "%)\n"
<< "EvaluateSplit: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_evaluate_split
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_evaluate_split / total_time * 100 << "%)\n"
<< "ApplySplit: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_apply_split
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_apply_split / total_time * 100 << "%)\n"
<< "========================================\n"
<< "Total: "
<< std::fixed << std::setw(6) << std::setprecision(4) << total_time;
// clear performance counters
time_init_data = 0;
time_init_new_node = 0;
time_build_hist = 0;
time_evaluate_split = 0;
time_apply_split = 0;
}
inline void TickStart() {
tstart = dmlc::GetTime();
}
inline void UpdatePerfTimer(const timer_name &timer_name) {
CHECK_GT(tstart, 0);
switch (timer_name) {
case INIT_DATA:
time_init_data += dmlc::GetTime() - tstart;
break;
case INIT_NEW_NODE:
time_init_new_node += dmlc::GetTime() - tstart;
break;
case BUILD_HIST:
time_build_hist += dmlc::GetTime() - tstart;
break;
case EVALUATE_SPLIT:
time_evaluate_split += dmlc::GetTime() - tstart;
break;
case APPLY_SPLIT:
time_apply_split += dmlc::GetTime() - tstart;
break;
}
tstart = -1;
}
};
// initialize temp data structure
void InitData(const GHistIndexMatrix& gmat,
const std::vector<GradientPair>& gpair,
@ -347,7 +276,7 @@ class QuantileHistMaker: public TreeUpdater {
enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData };
DataLayout data_layout_;
TreeGrowingPerfMonitor perf_monitor;
common::Monitor builder_monitor_;
rabit::Reducer<GradStats, GradStats::Reduce> histred_;
};