Separate Depthwidth and Lossguide growing policy in fast histogram (#4102)

* add back train method but mark as deprecated

* add back train method but mark as deprecated

* add back train method but mark as deprecated

* fix scalastyle error

* fix scalastyle error

* fix scalastyle error

* fix scalastyle error

* init

* more changes

* temp

* update

* udpate rabit

* change the histogram

* update kfactor

* sync per node stats

* temp

* update

* final

* code clean

* update rabit

* more cleanup

* fix errors

* fix failed tests

* enforce c++11

* broadcast subsampled feature correctly

* init col

* temp

* col sampling

* fix histmastrix init

* fix col sampling

* remove cout

* fix out of bound access

* fix core dump

remove core dump file

* disbale test temporarily

* update

* add fid

* print perf data

* update

* revert some changes

* temp

* temp

* pass all tests

* bring back some tests

* recover some changes

* fix lint issue

* enable monotone and interaction constraints

* don't specify default for monotone and interactions

* recover column init part

* more recovery

* fix core dumps

* code clean

* revert some changes

* fix test compilation issue

* fix lint issue

* resolve compilation issue

* fix issues of lint caused by rebase

* fix stylistic changes and change variable names

* use regtree internal function

* modularize depth width

* address the comments

* fix failed tests

* wrap perf timers with class

* fix lint

* fix num_leaves count

* fix indention

* Update src/tree/updater_quantile_hist.cc

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.h

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.cc

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.cc

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.cc

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.h

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* merge

* fix compilation
This commit is contained in:
Nan Zhu
2019-02-13 12:56:19 -08:00
committed by GitHub
parent 3be1b9ae30
commit c18a3660fa
8 changed files with 457 additions and 189 deletions

View File

@@ -1,8 +1,6 @@
/*!
* Copyright 2017-2018 by Contributors
* Copyright 2017-2019 by Contributors
* \file hist_util.h
* \brief Utilities to store histograms
* \author Philip Cho, Tianqi Chen
*/
#include <rabit/rabit.h>
#include <dmlc/omp.h>
@@ -161,6 +159,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
SparsePage::Inst inst = batch[i];
CHECK_EQ(ibegin + inst.size(), iend);
for (bst_uint j = 0; j < inst.size(); ++j) {
uint32_t idx = cut.GetBinIdx(inst[j]);

View File

@@ -73,8 +73,7 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair,
std::unique_ptr<SplitEvaluator>(spliteval_->GetHostClone())));
}
for (auto tree : trees) {
builder_->Update
(gmat_, gmatb_, column_matrix_, gpair, dmat, tree);
builder_->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, tree);
}
param_.learning_rate = lr;
}
@@ -89,120 +88,275 @@ bool QuantileHistMaker::UpdatePredictionCache(
}
}
void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb,
const ColumnMatrix& column_matrix,
HostDeviceVector<GradientPair>* gpair,
DMatrix* p_fmat,
RegTree* p_tree) {
double gstart = dmlc::GetTime();
void QuantileHistMaker::Builder::SyncHistograms(
int starting_index,
int sync_count,
RegTree *p_tree) {
perf_monitor.TickStart();
this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count);
// use Subtraction Trick
for (auto local_it = nodes_for_subtraction_trick_.begin();
local_it != nodes_for_subtraction_trick_.end(); local_it++) {
hist_.AddHistRow(local_it->first);
SubtractionTrick(hist_[local_it->first], hist_[local_it->second],
hist_[(*p_tree)[local_it->first].Parent()]);
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
}
int num_leaves = 0;
unsigned timestamp = 0;
void QuantileHistMaker::Builder::BuildLocalHistograms(
int *starting_index,
int *sync_count,
const GHistIndexMatrix &gmat,
const GHistIndexBlockMatrix &gmatb,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h) {
perf_monitor.TickStart();
for (size_t k = 0; k < qexpand_depth_wise_.size(); k++) {
int nid = qexpand_depth_wise_[k].nid;
RegTree::Node &node = (*p_tree)[nid];
if (rabit::IsDistributed()) {
if (node.IsRoot() || node.IsLeftChild()) {
// in distributed setting, we always calcuate from left child or root node
hist_.AddHistRow(nid);
BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid], false);
if (!node.IsRoot()) {
nodes_for_subtraction_trick_[(*p_tree)[node.Parent()].RightChild()] = nid;
}
(*sync_count)++;
(*starting_index) = std::min((*starting_index), nid);
}
} else {
if (!node.IsRoot() && node.IsLeftChild() &&
(row_set_collection_[nid].Size() <
row_set_collection_[(*p_tree)[node.Parent()].RightChild()].Size())) {
hist_.AddHistRow(nid);
BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid], false);
nodes_for_subtraction_trick_[(*p_tree)[node.Parent()].RightChild()] = nid;
(*sync_count)++;
(*starting_index) = std::min((*starting_index), nid);
} else if (!node.IsRoot() && !node.IsLeftChild() &&
(row_set_collection_[nid].Size() <=
row_set_collection_[(*p_tree)[node.Parent()].LeftChild()].Size())) {
hist_.AddHistRow(nid);
BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid], false);
nodes_for_subtraction_trick_[(*p_tree)[node.Parent()].LeftChild()] = nid;
(*sync_count)++;
(*starting_index) = std::min((*starting_index), nid);
} else if (node.IsRoot()) {
// root node
hist_.AddHistRow(nid);
BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid], false);
(*sync_count)++;
(*starting_index) = std::min((*starting_index), nid);
}
}
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
}
double tstart;
double time_init_data = 0;
double time_init_new_node = 0;
double time_build_hist = 0;
double time_evaluate_split = 0;
double time_apply_split = 0;
const std::vector<GradientPair>& gpair_h = gpair->ConstHostVector();
spliteval_->Reset();
tstart = dmlc::GetTime();
this->InitData(gmat, gpair_h, *p_fmat, *p_tree);
time_init_data = dmlc::GetTime() - tstart;
// FIXME(hcho3): this code is broken when param.num_roots > 1. Please fix it
CHECK_EQ(p_tree->param.num_roots, 1)
<< "tree_method=hist does not support multiple roots at this moment";
for (int nid = 0; nid < p_tree->param.num_roots; ++nid) {
tstart = dmlc::GetTime();
hist_.AddHistRow(nid);
BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid]);
time_build_hist += dmlc::GetTime() - tstart;
tstart = dmlc::GetTime();
void QuantileHistMaker::Builder::BuildNodeStats(
const GHistIndexMatrix &gmat,
DMatrix *p_fmat,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h) {
perf_monitor.TickStart();
for (size_t k = 0; k < qexpand_depth_wise_.size(); k++) {
int nid = qexpand_depth_wise_[k].nid;
this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree);
time_init_new_node += dmlc::GetTime() - tstart;
// add constraints
if (!(*p_tree)[nid].IsLeftChild() && !(*p_tree)[nid].IsRoot()) {
// it's a right child
auto parent_id = (*p_tree)[nid].Parent();
auto left_sibling_id = (*p_tree)[parent_id].LeftChild();
auto parent_split_feature_id = snode_[parent_id].best.SplitIndex();
spliteval_->AddSplit(parent_id, left_sibling_id, nid, parent_split_feature_id,
snode_[left_sibling_id].weight, snode_[nid].weight);
}
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE);
}
tstart = dmlc::GetTime();
void QuantileHistMaker::Builder::EvaluateSplits(
const GHistIndexMatrix &gmat,
const ColumnMatrix &column_matrix,
DMatrix *p_fmat,
RegTree *p_tree,
int *num_leaves,
int depth,
unsigned *timestamp,
std::vector<ExpandEntry> *temp_qexpand_depth) {
for (size_t k = 0; k < qexpand_depth_wise_.size(); k++) {
int nid = qexpand_depth_wise_[k].nid;
perf_monitor.TickStart();
this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree);
time_evaluate_split += dmlc::GetTime() - tstart;
qexpand_->push(ExpandEntry(nid, p_tree->GetDepth(nid),
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);
if (snode_[nid].best.loss_chg < kRtEps ||
(param_.max_depth > 0 && depth == param_.max_depth) ||
(param_.max_leaves > 0 && (*num_leaves) == param_.max_leaves)) {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} else {
perf_monitor.TickStart();
this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT);
int left_id = (*p_tree)[nid].LeftChild();
int right_id = (*p_tree)[nid].RightChild();
temp_qexpand_depth->push_back(ExpandEntry(left_id,
p_tree->GetDepth(left_id), 0.0, (*timestamp)++));
temp_qexpand_depth->push_back(ExpandEntry(right_id,
p_tree->GetDepth(right_id), 0.0, (*timestamp)++));
// - 1 parent + 2 new children
(*num_leaves)++;
}
}
}
void QuantileHistMaker::Builder::ExpandWithDepthWidth(
const GHistIndexMatrix &gmat,
const GHistIndexBlockMatrix &gmatb,
const ColumnMatrix &column_matrix,
DMatrix *p_fmat,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h) {
unsigned timestamp = 0;
int num_leaves = 0;
// in depth_wise growing, we feed loss_chg with 0.0 since it is not used anyway
qexpand_depth_wise_.push_back(ExpandEntry(0, p_tree->GetDepth(0), 0.0, timestamp++));
++num_leaves;
for (int depth = 0; depth < param_.max_depth + 1; depth++) {
int starting_index = std::numeric_limits<int>::max();
int sync_count = 0;
std::vector<ExpandEntry> temp_qexpand_depth;
BuildLocalHistograms(&starting_index, &sync_count, gmat, gmatb, p_tree, gpair_h);
SyncHistograms(starting_index, sync_count, p_tree);
BuildNodeStats(gmat, p_fmat, p_tree, gpair_h);
EvaluateSplits(gmat, column_matrix, p_fmat, p_tree, &num_leaves, depth, &timestamp,
&temp_qexpand_depth);
// clean up
qexpand_depth_wise_.clear();
nodes_for_subtraction_trick_.clear();
if (temp_qexpand_depth.empty()) {
break;
} else {
qexpand_depth_wise_ = temp_qexpand_depth;
temp_qexpand_depth.clear();
}
}
}
void QuantileHistMaker::Builder::ExpandWithLossGuide(
const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb,
const ColumnMatrix& column_matrix,
DMatrix* p_fmat,
RegTree* p_tree,
const std::vector<GradientPair>& gpair_h) {
unsigned timestamp = 0;
int num_leaves = 0;
for (int nid = 0; nid < p_tree->param.num_roots; ++nid) {
perf_monitor.TickStart();
hist_.AddHistRow(nid);
BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid], true);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
perf_monitor.TickStart();
this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE);
perf_monitor.TickStart();
this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);
qexpand_loss_guided_->push(ExpandEntry(nid, p_tree->GetDepth(nid),
snode_[nid].best.loss_chg,
timestamp++));
++num_leaves;
}
while (!qexpand_->empty()) {
const ExpandEntry candidate = qexpand_->top();
while (!qexpand_loss_guided_->empty()) {
const ExpandEntry candidate = qexpand_loss_guided_->top();
const int nid = candidate.nid;
qexpand_->pop();
qexpand_loss_guided_->pop();
if (candidate.loss_chg <= kRtEps
|| (param_.max_depth > 0 && candidate.depth == param_.max_depth)
|| (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} else {
tstart = dmlc::GetTime();
perf_monitor.TickStart();
this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
time_apply_split += dmlc::GetTime() - tstart;
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT);
tstart = dmlc::GetTime();
perf_monitor.TickStart();
const int cleft = (*p_tree)[nid].LeftChild();
const int cright = (*p_tree)[nid].RightChild();
hist_.AddHistRow(cleft);
hist_.AddHistRow(cright);
if (rabit::IsDistributed()) {
// in distributed mode, we need to keep consistent across workers
BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, hist_[cleft]);
BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, hist_[cleft], true);
SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]);
} else {
if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) {
BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, hist_[cleft]);
BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, hist_[cleft], true);
SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]);
} else {
BuildHist(gpair_h, row_set_collection_[cright], gmat, gmatb, hist_[cright]);
BuildHist(gpair_h, row_set_collection_[cright], gmat, gmatb, hist_[cright], true);
SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]);
}
}
time_build_hist += dmlc::GetTime() - tstart;
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
tstart = dmlc::GetTime();
perf_monitor.TickStart();
this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree);
this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree);
bst_uint featureid = snode_[nid].best.SplitIndex();
spliteval_->AddSplit(nid, cleft, cright, featureid,
snode_[cleft].weight, snode_[cright].weight);
time_init_new_node += dmlc::GetTime() - tstart;
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT);
tstart = dmlc::GetTime();
perf_monitor.TickStart();
this->EvaluateSplit(cleft, gmat, hist_, *p_fmat, *p_tree);
this->EvaluateSplit(cright, gmat, hist_, *p_fmat, *p_tree);
time_evaluate_split += dmlc::GetTime() - tstart;
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);
qexpand_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft),
qexpand_loss_guided_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft),
snode_[cleft].best.loss_chg,
timestamp++));
qexpand_->push(ExpandEntry(cright, p_tree->GetDepth(cright),
qexpand_loss_guided_->push(ExpandEntry(cright, p_tree->GetDepth(cright),
snode_[cright].best.loss_chg,
timestamp++));
++num_leaves; // give two and take one, as parent is no longer a leaf
}
}
}
// set all the rest expanding nodes to leaf
// This post condition is not needed in current code, but may be necessary
// when there are stopping rule that leaves qexpand non-empty
while (!qexpand_->empty()) {
const int nid = qexpand_->top().nid;
qexpand_->pop();
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb,
const ColumnMatrix& column_matrix,
HostDeviceVector<GradientPair>* gpair,
DMatrix* p_fmat,
RegTree* p_tree) {
perf_monitor.StartPerfMonitor();
const std::vector<GradientPair>& gpair_h = gpair->ConstHostVector();
spliteval_->Reset();
perf_monitor.TickStart();
this->InitData(gmat, gpair_h, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_DATA);
if (param_.grow_policy == TrainParam::kLossGuide) {
ExpandWithLossGuide(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h);
} else {
ExpandWithDepthWidth(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h);
}
// remember auxiliary statistics in the tree node
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg;
p_tree->Stat(nid).base_weight = snode_[nid].weight;
@@ -211,30 +365,7 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat,
pruner_->Update(gpair, p_fmat, std::vector<RegTree*>{p_tree});
double total_time = dmlc::GetTime() - gstart;
LOG(INFO) << "\nInitData: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_data
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_init_data / total_time * 100 << "%)\n"
<< "InitNewNode: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_new_node
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_init_new_node / total_time * 100 << "%)\n"
<< "BuildHist: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_build_hist
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_build_hist / total_time * 100 << "%)\n"
<< "EvaluateSplit: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_evaluate_split
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_evaluate_split / total_time * 100 << "%)\n"
<< "ApplySplit: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_apply_split
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_apply_split / total_time * 100 << "%)\n"
<< "========================================\n"
<< "Total: "
<< std::fixed << std::setw(6) << std::setprecision(4) << total_time;
perf_monitor.EndPerfMonitor();
}
bool QuantileHistMaker::Builder::UpdatePredictionCache(
@@ -353,14 +484,13 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
p_last_tree_ = &tree;
// store a pointer to training data
p_last_fmat_ = &fmat;
// initialize feature index
if (data_layout_ == kDenseDataOneBased) {
column_sampler_.Init(info.num_col_, param_.colsample_bynode,
param_.colsample_bylevel, param_.colsample_bytree, true);
} else {
column_sampler_.Init(info.num_col_, param_.colsample_bynode,
param_.colsample_bylevel, param_.colsample_bytree, false);
}
}
if (data_layout_ == kDenseDataOneBased) {
column_sampler_.Init(info.num_col_, param_.colsample_bynode, param_.colsample_bylevel,
param_.colsample_bytree, true);
} else {
column_sampler_.Init(info.num_col_, param_.colsample_bynode, param_.colsample_bylevel,
param_.colsample_bytree, false);
}
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
/* specialized code for dense data:
@@ -387,9 +517,9 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
}
{
if (param_.grow_policy == TrainParam::kLossGuide) {
qexpand_.reset(new ExpandQueue(LossGuide));
qexpand_loss_guided_.reset(new ExpandQueue(LossGuide));
} else {
qexpand_.reset(new ExpandQueue(DepthWise));
qexpand_depth_wise_.clear();
}
}
}

View File

@@ -7,6 +7,7 @@
#ifndef XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_
#define XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_
#include <dmlc/timer.h>
#include <rabit/rabit.h>
#include <xgboost/tree_updater.h>
@@ -14,6 +15,7 @@
#include <vector>
#include <string>
#include <queue>
#include <iomanip>
#include <utility>
#include "./param.h"
@@ -97,13 +99,16 @@ class QuantileHistMaker: public TreeUpdater {
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb,
GHistRow hist) {
GHistRow hist,
bool sync_hist) {
if (param_.enable_feature_grouping > 0) {
hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist);
} else {
hist_builder_.BuildHist(gpair, row_indices, gmat, hist);
}
this->histred_.Allreduce(hist.data(), hist_builder_.GetNumBins());
if (sync_hist) {
this->histred_.Allreduce(hist.data(), hist_builder_.GetNumBins());
}
}
inline void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) {
@@ -114,6 +119,94 @@ class QuantileHistMaker: public TreeUpdater {
HostDeviceVector<bst_float>* p_out_preds);
protected:
/* tree growing policies */
struct ExpandEntry {
int nid;
int depth;
bst_float loss_chg;
unsigned timestamp;
ExpandEntry(int nid, int depth, bst_float loss_chg, unsigned tstmp)
: nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {}
};
struct TreeGrowingPerfMonitor {
enum timer_name {INIT_DATA, INIT_NEW_NODE, BUILD_HIST, EVALUATE_SPLIT, APPLY_SPLIT};
double global_start;
// performance counters
double tstart;
double time_init_data = 0;
double time_init_new_node = 0;
double time_build_hist = 0;
double time_evaluate_split = 0;
double time_apply_split = 0;
inline void StartPerfMonitor() {
global_start = dmlc::GetTime();
}
inline void EndPerfMonitor() {
CHECK_GT(global_start, 0);
double total_time = dmlc::GetTime() - global_start;
LOG(INFO) << "\nInitData: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_data
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_init_data / total_time * 100 << "%)\n"
<< "InitNewNode: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_new_node
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_init_new_node / total_time * 100 << "%)\n"
<< "BuildHist: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_build_hist
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_build_hist / total_time * 100 << "%)\n"
<< "EvaluateSplit: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_evaluate_split
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_evaluate_split / total_time * 100 << "%)\n"
<< "ApplySplit: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_apply_split
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_apply_split / total_time * 100 << "%)\n"
<< "========================================\n"
<< "Total: "
<< std::fixed << std::setw(6) << std::setprecision(4) << total_time;
// clear performance counters
time_init_data = 0;
time_init_new_node = 0;
time_build_hist = 0;
time_evaluate_split = 0;
time_apply_split = 0;
}
inline void TickStart() {
tstart = dmlc::GetTime();
}
inline void UpdatePerfTimer(const timer_name &timer_name) {
CHECK_GT(tstart, 0);
switch (timer_name) {
case INIT_DATA:
time_init_data += dmlc::GetTime() - tstart;
break;
case INIT_NEW_NODE:
time_init_new_node += dmlc::GetTime() - tstart;
break;
case BUILD_HIST:
time_build_hist += dmlc::GetTime() - tstart;
break;
case EVALUATE_SPLIT:
time_evaluate_split += dmlc::GetTime() - tstart;
break;
case APPLY_SPLIT:
time_apply_split += dmlc::GetTime() - tstart;
break;
}
tstart = -1;
}
};
// initialize temp data structure
void InitData(const GHistIndexMatrix& gmat,
const std::vector<GradientPair>& gpair,
@@ -165,22 +258,45 @@ class QuantileHistMaker: public TreeUpdater {
bst_uint fid,
bst_uint nodeID);
/* tree growing policies */
struct ExpandEntry {
int nid;
int depth;
bst_float loss_chg;
unsigned timestamp;
ExpandEntry(int nid, int depth, bst_float loss_chg, unsigned tstmp)
: nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {}
};
inline static bool DepthWise(ExpandEntry lhs, ExpandEntry rhs) {
if (lhs.depth == rhs.depth) {
return lhs.timestamp > rhs.timestamp; // favor small timestamp
} else {
return lhs.depth > rhs.depth; // favor small depth
}
}
void ExpandWithDepthWidth(const GHistIndexMatrix &gmat,
const GHistIndexBlockMatrix &gmatb,
const ColumnMatrix &column_matrix,
DMatrix *p_fmat,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h);
void BuildLocalHistograms(int *starting_index,
int *sync_count,
const GHistIndexMatrix &gmat,
const GHistIndexBlockMatrix &gmatb,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h);
void SyncHistograms(int starting_index,
int sync_count,
RegTree *p_tree);
void BuildNodeStats(const GHistIndexMatrix &gmat,
DMatrix *p_fmat,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h);
void EvaluateSplits(const GHistIndexMatrix &gmat,
const ColumnMatrix &column_matrix,
DMatrix *p_fmat,
RegTree *p_tree,
int *num_leaves,
int depth,
unsigned *timestamp,
std::vector<ExpandEntry> *temp_qexpand_depth);
void ExpandWithLossGuide(const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb,
const ColumnMatrix& column_matrix,
DMatrix* p_fmat,
RegTree* p_tree,
const std::vector<GradientPair>& gpair_h);
inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) {
if (lhs.loss_chg == rhs.loss_chg) {
return lhs.timestamp > rhs.timestamp; // favor small timestamp
@@ -218,13 +334,20 @@ class QuantileHistMaker: public TreeUpdater {
const DMatrix* p_last_fmat_;
using ExpandQueue =
std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,
std::function<bool(ExpandEntry, ExpandEntry)>>;
std::unique_ptr<ExpandQueue> qexpand_;
std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,
std::function<bool(ExpandEntry, ExpandEntry)>>;
std::unique_ptr<ExpandQueue> qexpand_loss_guided_;
std::vector<ExpandEntry> qexpand_depth_wise_;
// key is the node id which should be calculated by Subtraction Trick, value is the node which
// provides the evidence for substracts
std::unordered_map<int, int> nodes_for_subtraction_trick_;
enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData };
DataLayout data_layout_;
TreeGrowingPerfMonitor perf_monitor;
rabit::Reducer<GradStats, GradStats::Reduce> histred_;
};