1281 lines
46 KiB
C++
1281 lines
46 KiB
C++
/*!
|
|
* Copyright 2017-2019 by Contributors
|
|
* \file updater_quantile_hist.cc
|
|
* \brief use quantized feature values to construct a tree
|
|
* \author Philip Cho, Tianqi Checn, Egor Smirnov
|
|
*/
|
|
#include <dmlc/timer.h>
|
|
#include <rabit/rabit.h>
|
|
#include <xgboost/logging.h>
|
|
#include <xgboost/tree_updater.h>
|
|
|
|
#include <cmath>
|
|
#include <memory>
|
|
#include <vector>
|
|
#include <algorithm>
|
|
#include <queue>
|
|
#include <iomanip>
|
|
#include <numeric>
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include "./param.h"
|
|
#include "./updater_quantile_hist.h"
|
|
#include "./split_evaluator.h"
|
|
#include "../common/random.h"
|
|
#include "../common/hist_util.h"
|
|
#include "../common/row_set.h"
|
|
#include "../common/column_matrix.h"
|
|
|
|
namespace xgboost {
|
|
namespace tree {
|
|
|
|
DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
|
|
|
|
void QuantileHistMaker::Configure(const Args& args) {
|
|
// initialize pruner
|
|
if (!pruner_) {
|
|
pruner_.reset(TreeUpdater::Create("prune", tparam_));
|
|
}
|
|
pruner_->Configure(args);
|
|
param_.InitAllowUnknown(args);
|
|
is_gmat_initialized_ = false;
|
|
|
|
// initialize the split evaluator
|
|
if (!spliteval_) {
|
|
spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
|
|
}
|
|
|
|
spliteval_->Init(args);
|
|
}
|
|
|
|
void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair,
|
|
DMatrix *dmat,
|
|
const std::vector<RegTree *> &trees) {
|
|
// omp_set_nested(1);
|
|
if (is_gmat_initialized_ == false) {
|
|
double tstart = dmlc::GetTime();
|
|
gmat_.Init(dmat, static_cast<uint32_t>(param_.max_bin));
|
|
column_matrix_.Init(gmat_, param_.sparse_threshold);
|
|
if (param_.enable_feature_grouping > 0) {
|
|
gmatb_.Init(gmat_, column_matrix_, param_);
|
|
}
|
|
is_gmat_initialized_ = true;
|
|
LOG(INFO) << "Generating gmat: " << dmlc::GetTime() - tstart << " sec";
|
|
}
|
|
// rescale learning rate according to size of trees
|
|
float lr = param_.learning_rate;
|
|
param_.learning_rate = lr / trees.size();
|
|
// build tree
|
|
if (!builder_) {
|
|
builder_.reset(new Builder(
|
|
param_,
|
|
std::move(pruner_),
|
|
std::unique_ptr<SplitEvaluator>(spliteval_->GetHostClone())));
|
|
}
|
|
for (auto tree : trees) {
|
|
builder_->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, tree);
|
|
}
|
|
param_.learning_rate = lr;
|
|
}
|
|
|
|
bool QuantileHistMaker::UpdatePredictionCache(
|
|
const DMatrix* data,
|
|
HostDeviceVector<bst_float>* out_preds) {
|
|
if (!builder_ || param_.subsample < 1.0f) {
|
|
return false;
|
|
} else {
|
|
return builder_->UpdatePredictionCache(data, out_preds);
|
|
}
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::BuildNodeStat(
|
|
const GHistIndexMatrix &gmat,
|
|
DMatrix *p_fmat,
|
|
RegTree *p_tree,
|
|
const std::vector<GradientPair> &gpair_h,
|
|
int32_t nid) {
|
|
|
|
// add constraints
|
|
if (!(*p_tree)[nid].IsLeftChild() && !(*p_tree)[nid].IsRoot()) {
|
|
auto parent_id = (*p_tree)[nid].Parent();
|
|
// it's a right child
|
|
auto left_sibling_id = (*p_tree)[parent_id].LeftChild();
|
|
auto parent_split_feature_id = snode_[parent_id].best.SplitIndex();
|
|
|
|
spliteval_->AddSplit(parent_id, left_sibling_id, nid, parent_split_feature_id,
|
|
snode_[left_sibling_id].weight, snode_[nid].weight);
|
|
}
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::BuildNodeStatBatch(
|
|
const GHistIndexMatrix &gmat,
|
|
DMatrix *p_fmat,
|
|
RegTree *p_tree,
|
|
const std::vector<GradientPair> &gpair_h,
|
|
const std::vector<ExpandEntry>& nodes) {
|
|
perf_monitor.TickStart();
|
|
for (const auto& node : nodes) {
|
|
const int32_t nid = node.nid;
|
|
const int32_t sibling_nid = node.sibling_nid;
|
|
this->InitNewNode(nid, gmat, gpair_h, *p_fmat, p_tree, &(snode_[nid]), (*p_tree)[nid].Parent());
|
|
if (sibling_nid > -1) {
|
|
this->InitNewNode(nid, gmat, gpair_h, *p_fmat, p_tree,
|
|
&(snode_[sibling_nid]), (*p_tree)[sibling_nid].Parent());
|
|
}
|
|
}
|
|
for (const auto& node : nodes) {
|
|
const int32_t nid = node.nid;
|
|
const int32_t sibling_nid = node.sibling_nid;
|
|
BuildNodeStat(gmat, p_fmat, p_tree, gpair_h, nid);
|
|
if (sibling_nid > -1) {
|
|
BuildNodeStat(gmat, p_fmat, p_tree, gpair_h, sibling_nid);
|
|
}
|
|
}
|
|
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE);
|
|
}
|
|
|
|
template<typename RowIdxType, typename IdxType>
|
|
inline std::pair<size_t, size_t> PartitionDenseLeftDefaultKernel(const RowIdxType* rid,
|
|
const IdxType* idx, const IdxType offset, const int32_t split_cond,
|
|
const size_t istart, const size_t iend, RowIdxType* p_left, RowIdxType* p_right) {
|
|
size_t ileft = 0;
|
|
size_t iright = 0;
|
|
|
|
const IdxType max_val = std::numeric_limits<IdxType>::max();
|
|
|
|
for (size_t i = istart; i < iend; i++) {
|
|
if (idx[rid[i]] == max_val || static_cast<int32_t>(idx[rid[i]] + offset) <= split_cond) {
|
|
p_left[ileft++] = rid[i];
|
|
} else {
|
|
p_right[iright++] = rid[i];
|
|
}
|
|
}
|
|
|
|
return { ileft, iright };
|
|
}
|
|
|
|
template<typename RowIdxType, typename IdxType>
|
|
inline std::pair<size_t, size_t> PartitionDenseRightDefaultKernel(const RowIdxType* rid,
|
|
const IdxType* idx, const IdxType offset, const int32_t split_cond,
|
|
const size_t istart, const size_t iend, RowIdxType* p_left, RowIdxType* p_right) {
|
|
size_t ileft = 0;
|
|
size_t iright = 0;
|
|
|
|
const IdxType max_val = std::numeric_limits<IdxType>::max();
|
|
|
|
for (size_t i = istart; i < iend; i++) {
|
|
if (idx[rid[i]] == max_val || static_cast<int32_t>(idx[rid[i]] + offset) > split_cond) {
|
|
p_right[iright++] = rid[i];
|
|
} else {
|
|
p_left[ileft++] = rid[i];
|
|
}
|
|
}
|
|
return { ileft, iright };
|
|
}
|
|
|
|
template<typename RowIdxType, typename IdxType>
|
|
inline std::pair<size_t, size_t> PartitionSparseKernel(const RowIdxType* rowid,
|
|
const IdxType* idx, const int32_t split_cond, const size_t ibegin,
|
|
const size_t iend, RowIdxType* p_left, RowIdxType* p_right,
|
|
Column column, bool default_left) {
|
|
size_t ileft = 0;
|
|
size_t iright = 0;
|
|
|
|
if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range
|
|
// search first nonzero row with index >= rowid[ibegin]
|
|
const size_t* p = std::lower_bound(column.GetRowData(),
|
|
column.GetRowData() + column.Size(),
|
|
rowid[ibegin]);
|
|
if (p != column.GetRowData() + column.Size() && *p <= rowid[iend - 1]) {
|
|
size_t cursor = p - column.GetRowData();
|
|
|
|
for (size_t i = ibegin; i < iend; ++i) {
|
|
const size_t rid = rowid[i];
|
|
while (cursor < column.Size()
|
|
&& column.GetRowIdx(cursor) < rid
|
|
&& column.GetRowIdx(cursor) <= rowid[iend - 1]) {
|
|
++cursor;
|
|
}
|
|
if (cursor < column.Size() && column.GetRowIdx(cursor) == rid) {
|
|
const uint32_t rbin = column.GetFeatureBinIdx(cursor);
|
|
if (static_cast<int32_t>(rbin + column.GetBaseIdx()) <= split_cond) {
|
|
p_left[ileft++] = rid;
|
|
} else {
|
|
p_right[iright++] = rid;
|
|
}
|
|
++cursor;
|
|
} else {
|
|
// missing value
|
|
if (default_left) {
|
|
p_left[ileft++] = rid;
|
|
} else {
|
|
p_right[iright++] = rid;
|
|
}
|
|
}
|
|
}
|
|
} else { // all rows in [ibegin, iend) have missing values
|
|
if (default_left) {
|
|
for (size_t i = ibegin; i < iend; ++i) {
|
|
const size_t rid = rowid[i];
|
|
p_left[ileft++] = rid;
|
|
}
|
|
} else {
|
|
for (size_t i = ibegin; i < iend; ++i) {
|
|
const size_t rid = rowid[i];
|
|
p_right[iright++] = rid;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return {ileft, iright};
|
|
}
|
|
|
|
|
|
int32_t QuantileHistMaker::Builder::FindSplitCond(int32_t nid,
|
|
RegTree *p_tree,
|
|
const GHistIndexMatrix &gmat) {
|
|
bst_float left_leaf_weight = spliteval_->ComputeWeight(nid,
|
|
snode_[nid].best.left_sum) * param_.learning_rate;
|
|
bst_float right_leaf_weight = spliteval_->ComputeWeight(nid,
|
|
snode_[nid].best.right_sum) * param_.learning_rate;
|
|
p_tree->ExpandNode(nid, snode_[nid].best.SplitIndex(), snode_[nid].best.split_value,
|
|
snode_[nid].best.DefaultLeft(), snode_[nid].weight, left_leaf_weight,
|
|
right_leaf_weight, snode_[nid].best.loss_chg, snode_[nid].stats.sum_hess);
|
|
|
|
RegTree::Node node = (*p_tree)[nid];
|
|
// Categorize member rows
|
|
const bst_uint fid = node.SplitIndex();
|
|
const bst_float split_pt = node.SplitCond();
|
|
const uint32_t lower_bound = gmat.cut.Ptrs()[fid];
|
|
const uint32_t upper_bound = gmat.cut.Ptrs()[fid + 1];
|
|
int32_t split_cond = -1;
|
|
// convert floating-point split_pt into corresponding bin_id
|
|
// split_cond = -1 indicates that split_pt is less than all known cut points
|
|
CHECK_LT(upper_bound,
|
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
|
for (uint32_t i = lower_bound; i < upper_bound; ++i) {
|
|
if (split_pt == gmat.cut.Values()[i]) {
|
|
split_cond = static_cast<int32_t>(i);
|
|
}
|
|
}
|
|
return split_cond;
|
|
}
|
|
|
|
// split rows in each node to blocks of rows
|
|
// for future parallel execution
|
|
template<typename TaskType, typename NodeType>
|
|
void QuantileHistMaker::Builder::CreateTasksForApplySplit(
|
|
const std::vector<ExpandEntry>& nodes,
|
|
const GHistIndexMatrix &gmat,
|
|
RegTree *p_tree,
|
|
int *num_leaves,
|
|
const int depth,
|
|
const size_t block_size,
|
|
std::vector<TaskType>* tasks,
|
|
std::vector<NodeType>* nodes_bounds) {
|
|
size_t* buffer = buffer_for_partition_.data();
|
|
size_t cur_buff_offset = 0;
|
|
|
|
auto create_nodes = [&](int32_t this_nid) {
|
|
if (snode_[this_nid].best.loss_chg < kRtEps ||
|
|
(param_.max_depth > 0 && depth == param_.max_depth) ||
|
|
(param_.max_leaves > 0 && (*num_leaves) == param_.max_leaves)) {
|
|
(*p_tree)[this_nid].SetLeaf(snode_[this_nid].weight * param_.learning_rate);
|
|
} else {
|
|
const size_t nrows = row_set_collection_[this_nid].Size();
|
|
const size_t n_blocks = nrows / block_size + !!(nrows % block_size);
|
|
|
|
nodes_bounds->emplace_back(this_nid, tasks->size(), tasks->size() + n_blocks);
|
|
|
|
const int32_t split_cond = FindSplitCond(this_nid, p_tree, gmat);
|
|
|
|
for (size_t i = 0; i < n_blocks; ++i) {
|
|
const size_t istart = i*block_size;
|
|
const size_t iend = (i == n_blocks-1) ? nrows : istart + block_size;
|
|
|
|
TaskType task {this_nid, split_cond, n_blocks, i, istart, iend, nodes_bounds->size()-1,
|
|
buffer + cur_buff_offset, buffer + cur_buff_offset + (iend-istart), 0, 0, 0, 0};
|
|
tasks->push_back(task);
|
|
cur_buff_offset += 2*(iend-istart);
|
|
}
|
|
}
|
|
};
|
|
for (const auto& node : nodes) {
|
|
const int32_t nid = node.nid;
|
|
const int32_t sibling_nid = node.sibling_nid;
|
|
create_nodes(nid);
|
|
|
|
if (sibling_nid > -1) {
|
|
create_nodes(sibling_nid);
|
|
}
|
|
}
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::CreateNewNodesBatch(
|
|
const std::vector<ExpandEntry>& nodes,
|
|
const GHistIndexMatrix &gmat,
|
|
const ColumnMatrix &column_matrix,
|
|
DMatrix *p_fmat,
|
|
RegTree *p_tree,
|
|
int *num_leaves,
|
|
int depth,
|
|
unsigned *timestamp,
|
|
std::vector<ExpandEntry> *temp_qexpand_depth) {
|
|
perf_monitor.TickStart();
|
|
const size_t block_size = 2048;
|
|
|
|
struct ApplySplitTaskInfo {
|
|
// input
|
|
int32_t nid;
|
|
int32_t split_cond;
|
|
size_t n_blocks_this_node;
|
|
size_t i_block_this_node;
|
|
size_t istart;
|
|
size_t iend;
|
|
size_t inode;
|
|
// result
|
|
size_t* left;
|
|
size_t* right;
|
|
size_t n_left;
|
|
size_t n_right;
|
|
size_t ileft;
|
|
size_t iright;
|
|
};
|
|
|
|
struct NodeBoundsInfo {
|
|
NodeBoundsInfo(int32_t nid, size_t begin, size_t end):
|
|
nid(nid), begin(begin), end(end) {
|
|
}
|
|
|
|
int32_t nid;
|
|
size_t begin;
|
|
size_t end;
|
|
};
|
|
|
|
// create tasks for partition of row_set_collection_
|
|
std::vector<ApplySplitTaskInfo> tasks;
|
|
std::vector<NodeBoundsInfo> nodes_bounds;
|
|
|
|
// 1. Split row-indexes in each nodes to blocks of rows
|
|
CreateTasksForApplySplit(nodes, gmat, p_tree, num_leaves,
|
|
depth, block_size, &tasks, &nodes_bounds);
|
|
|
|
// buffer to store # of rows in left part for each row-block
|
|
std::vector<size_t> left_sizes;
|
|
left_sizes.reserve(nodes_bounds.size());
|
|
const int size = tasks.size();
|
|
|
|
// execute tasks in parallel
|
|
#pragma omp parallel
|
|
{
|
|
// 2. For each block of rows:
|
|
// a) Write row-indexes which should come to the left child - to 1th buffer
|
|
// b) Write row-indexes which should come to the right child - to 2th buffer
|
|
// values in each buffer - sorted in original order
|
|
#pragma omp for
|
|
for (int32_t i = 0; i < size; ++i) {
|
|
const int32_t nid = tasks[i].nid;
|
|
const int32_t split_cond = tasks[i].split_cond;
|
|
const size_t istart = tasks[i].istart;
|
|
const size_t iend = tasks[i].iend;
|
|
|
|
const bst_uint fid = (*p_tree)[nid].SplitIndex();
|
|
const bool default_left = (*p_tree)[nid].DefaultLeft();
|
|
const Column column = column_matrix.GetColumn(fid);
|
|
|
|
const uint32_t* idx = column.GetIndex();
|
|
const size_t* rid = row_set_collection_[nid].begin;
|
|
|
|
if (column.GetType() == xgboost::common::kDenseColumn) {
|
|
if (default_left) {
|
|
auto res = PartitionDenseLeftDefaultKernel<size_t, uint32_t>(
|
|
rid, idx, column.GetBaseIdx(), split_cond, istart, iend,
|
|
tasks[i].left, tasks[i].right);
|
|
tasks[i].n_left = res.first;
|
|
tasks[i].n_right = res.second;
|
|
} else {
|
|
auto res = PartitionDenseRightDefaultKernel<size_t, uint32_t>(
|
|
rid, idx, column.GetBaseIdx(), split_cond, istart, iend,
|
|
tasks[i].left, tasks[i].right);
|
|
tasks[i].n_left = res.first;
|
|
tasks[i].n_right = res.second;
|
|
}
|
|
} else {
|
|
auto res = PartitionSparseKernel<size_t, uint32_t>(
|
|
rid, idx, split_cond, istart, iend, tasks[i].left, tasks[i].right, column, default_left);
|
|
tasks[i].n_left = res.first;
|
|
tasks[i].n_right = res.second;
|
|
}
|
|
}
|
|
|
|
// 3. For each node - find number of elements in left the part
|
|
#pragma omp single
|
|
{
|
|
for (auto& node : nodes_bounds) {
|
|
size_t n_left = 0;
|
|
size_t n_right = 0;
|
|
|
|
for (size_t i = node.begin; i < node.end; ++i) {
|
|
tasks[i].ileft = n_left;
|
|
tasks[i].iright = n_right;
|
|
|
|
n_left += tasks[i].n_left;
|
|
n_right += tasks[i].n_right;
|
|
}
|
|
left_sizes.push_back(n_left);
|
|
}
|
|
}
|
|
|
|
// 4. Copy data from buffers to original row_set_collection_
|
|
#pragma omp for
|
|
for (int32_t i = 0; i < size; ++i) {
|
|
const size_t node_idx = tasks[i].inode;
|
|
const int32_t nid = tasks[i].nid;
|
|
const size_t n_left = left_sizes[node_idx];
|
|
|
|
CHECK_LE(tasks[i].ileft + tasks[i].n_left, row_set_collection_[nid].Size());
|
|
CHECK_LE(n_left + tasks[i].iright + tasks[i].n_right, row_set_collection_[nid].Size());
|
|
|
|
auto* rid = const_cast<size_t*>(row_set_collection_[nid].begin);
|
|
std::memcpy(rid + tasks[i].ileft, tasks[i].left,
|
|
tasks[i].n_left * sizeof(rid[0]));
|
|
std::memcpy(rid + n_left + tasks[i].iright, tasks[i].right,
|
|
tasks[i].n_right * sizeof(rid[0]));
|
|
}
|
|
}
|
|
|
|
// register new nodes
|
|
for (size_t i = 0; i < nodes_bounds.size(); ++i) {
|
|
const int32_t nid = nodes_bounds[i].nid;
|
|
const size_t n_left = left_sizes[i];
|
|
RegTree::Node node = (*p_tree)[nid];
|
|
|
|
const int32_t left_id = node.LeftChild();
|
|
const int32_t right_id = node.RightChild();
|
|
row_set_collection_.AddSplit(nid, n_left, left_id, right_id);
|
|
|
|
if (rabit::IsDistributed() ||
|
|
row_set_collection_[left_id].Size() < row_set_collection_[right_id].Size()) {
|
|
temp_qexpand_depth->push_back(ExpandEntry(left_id, right_id, nid,
|
|
depth + 1, 0.0, (*timestamp)++));
|
|
} else {
|
|
temp_qexpand_depth->push_back(ExpandEntry(right_id, left_id, nid,
|
|
depth + 1, 0.0, (*timestamp)++));
|
|
}
|
|
}
|
|
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT);
|
|
}
|
|
|
|
std::tuple<common::GradStatHist::GradType*, common::GradStatHist*>
|
|
QuantileHistMaker::Builder::GetHistBuffer(
|
|
std::vector<uint8_t>* hist_is_init, std::vector<common::GradStatHist>* grad_stats,
|
|
size_t block_id, size_t nthread, size_t tid,
|
|
std::vector<common::GradStatHist::GradType*>* data_hist, size_t hist_size) {
|
|
|
|
const size_t n_hist_for_current_node = hist_is_init->size();
|
|
const size_t hist_id = ((n_hist_for_current_node == nthread) ? tid : block_id);
|
|
|
|
common::GradStatHist::GradType* local_data_hist = (*data_hist)[hist_id];
|
|
if (!(*hist_is_init)[hist_id]) {
|
|
std::fill(local_data_hist, local_data_hist + hist_size, 0.0f);
|
|
(*hist_is_init)[hist_id] = true;
|
|
}
|
|
|
|
return std::make_tuple(local_data_hist, &(*grad_stats)[hist_id]);
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::CreateTasksForBuildHist(
|
|
size_t block_size_rows,
|
|
size_t nthread,
|
|
const std::vector<ExpandEntry>& nodes,
|
|
std::vector<std::vector<common::GradStatHist::GradType*>>* hist_buffers,
|
|
std::vector<std::vector<uint8_t>>* hist_is_init,
|
|
std::vector<std::vector<common::GradStatHist>>* grad_stats,
|
|
std::vector<int32_t>* task_nid,
|
|
std::vector<int32_t>* task_node_idx,
|
|
std::vector<int32_t>* task_block_idx) {
|
|
size_t i_hist = 0;
|
|
|
|
// prepare tasks for parallel execution
|
|
for (size_t i = 0; i < nodes.size(); ++i) {
|
|
const int32_t nid = nodes[i].nid;
|
|
const int32_t sibling_nid = nodes[i].sibling_nid;
|
|
hist_.AddHistRow(nid);
|
|
if (sibling_nid > -1) {
|
|
hist_.AddHistRow(sibling_nid);
|
|
}
|
|
const size_t nrows = row_set_collection_[nid].Size();
|
|
const size_t n_local_blocks = nrows / block_size_rows + !!(nrows % block_size_rows);
|
|
const size_t n_local_histograms = std::min(nthread, n_local_blocks);
|
|
|
|
task_nid->resize(task_nid->size() + n_local_blocks, nid);
|
|
for (size_t j = 0; j < n_local_blocks; ++j) {
|
|
task_node_idx->push_back(i);
|
|
task_block_idx->push_back(j);
|
|
}
|
|
|
|
(*hist_buffers)[i].clear();
|
|
for (size_t j = 0; j < n_local_histograms; j++) {
|
|
(*hist_buffers)[i].push_back(
|
|
reinterpret_cast<common::GradStatHist::GradType*>(hist_buff_[i_hist++].data()));
|
|
}
|
|
(*hist_is_init)[i].clear();
|
|
(*hist_is_init)[i].resize(n_local_histograms, false);
|
|
(*grad_stats)[i].resize(n_local_histograms);
|
|
}
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::BuildHistsBatch(const std::vector<ExpandEntry>& nodes,
|
|
RegTree* p_tree, const GHistIndexMatrix &gmat, const std::vector<GradientPair>& gpair,
|
|
std::vector<std::vector<common::GradStatHist::GradType*>>* hist_buffers,
|
|
std::vector<std::vector<uint8_t>>* hist_is_init) {
|
|
perf_monitor.TickStart();
|
|
const size_t block_size_rows = 256;
|
|
const size_t nthread = static_cast<size_t>(this->nthread_);
|
|
const size_t nbins = gmat.cut.Ptrs().back();
|
|
const size_t hist_size = 2 * nbins;
|
|
|
|
hist_buffers->resize(nodes.size());
|
|
hist_is_init->resize(nodes.size());
|
|
|
|
// input data for tasks
|
|
std::vector<int32_t> task_nid;
|
|
std::vector<int32_t> task_node_idx;
|
|
std::vector<int32_t> task_block_idx;
|
|
|
|
// result vector
|
|
std::vector<std::vector<common::GradStatHist>> grad_stats(nodes.size());
|
|
|
|
// 1. Create tasks for hist construction by block of rows for each node
|
|
CreateTasksForBuildHist(block_size_rows, nthread, nodes, hist_buffers, hist_is_init, &grad_stats,
|
|
&task_nid, &task_node_idx, &task_block_idx);
|
|
int32_t n_hist_buidling_tasks = task_node_idx.size();
|
|
|
|
const GradientPair::ValueT* const pgh =
|
|
reinterpret_cast<const GradientPair::ValueT*>(gpair.data());
|
|
|
|
// 2. Build partial histograms for each node
|
|
#pragma omp parallel for schedule(static)
|
|
for (int32_t itask = 0; itask < n_hist_buidling_tasks; ++itask) {
|
|
const size_t tid = omp_get_thread_num();
|
|
const int32_t nid = task_nid[itask];
|
|
const int32_t block_id = task_block_idx[itask];
|
|
// node_idx : location of node `nid` within the `nodes` list. In general, node_idx != nid
|
|
const int32_t node_idx = task_node_idx[itask];
|
|
|
|
common::GradStatHist::GradType* data_local_hist;
|
|
common::GradStatHist* grad_stat; // total gradient/hessian value for node `nid`
|
|
std::tie(data_local_hist, grad_stat) = GetHistBuffer(&(*hist_is_init)[node_idx],
|
|
&grad_stats[node_idx], block_id, nthread, tid,
|
|
&(*hist_buffers)[node_idx], hist_size);
|
|
|
|
const size_t* row_ptr = gmat.row_ptr.data();
|
|
const size_t* rid = row_set_collection_[nid].begin;
|
|
|
|
const size_t nrows = row_set_collection_[nid].Size();
|
|
const size_t istart = block_id * block_size_rows;
|
|
const size_t iend = (((block_id+1)*block_size_rows > nrows) ? nrows : istart + block_size_rows);
|
|
|
|
// call hist building kernel depending on bin-matrix layout
|
|
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
|
|
common::BuildHistLocalDense(istart, iend, nrows, rid, gmat.index.data(), pgh,
|
|
row_ptr, data_local_hist, grad_stat);
|
|
} else {
|
|
common::BuildHistLocalSparse(istart, iend, nrows, rid, gmat.index.data(), pgh,
|
|
row_ptr, data_local_hist, grad_stat);
|
|
}
|
|
}
|
|
|
|
// 3. Merge grad stats for each node
|
|
// Sync histograms in case of distributed computation
|
|
SyncHistograms(p_tree, nodes, hist_buffers, hist_is_init, grad_stats);
|
|
|
|
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::SyncHistograms(
|
|
RegTree* p_tree,
|
|
const std::vector<ExpandEntry>& nodes,
|
|
std::vector<std::vector<common::GradStatHist::GradType*>>* hist_buffers,
|
|
std::vector<std::vector<uint8_t>>* hist_is_init,
|
|
const std::vector<std::vector<common::GradStatHist>>& grad_stats) {
|
|
if (rabit::IsDistributed()) {
|
|
const int size = nodes.size();
|
|
#pragma omp parallel for // TODO(egorsmir): replace to n_features * nodes.size()
|
|
for (int i = 0; i < size; ++i) {
|
|
const int32_t nid = nodes[i].nid;
|
|
common::GradStatHist::GradType* hist_data =
|
|
reinterpret_cast<common::GradStatHist::GradType*>(hist_[nid].data());
|
|
|
|
ReduceHistograms(hist_data, nullptr, nullptr, 0, hist_builder_.GetNumBins() * 2, i,
|
|
*hist_is_init, *hist_buffers);
|
|
}
|
|
|
|
for (auto elem : nodes) {
|
|
this->histred_.Allreduce(hist_[elem.nid].data(), hist_builder_.GetNumBins());
|
|
}
|
|
|
|
// TODO(egorsmir): add parallel for
|
|
for (auto elem : nodes) {
|
|
if (elem.sibling_nid > -1) {
|
|
SubtractionTrick(hist_[elem.sibling_nid], hist_[elem.nid],
|
|
hist_[(*p_tree)[elem.sibling_nid].Parent()]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// merge grad stats
|
|
{
|
|
for (size_t inode = 0; inode < nodes.size(); ++inode) {
|
|
const int32_t nid = nodes[inode].nid;
|
|
|
|
if (snode_.size() <= size_t(nid)) {
|
|
snode_.resize(nid + 1, NodeEntry(param_));
|
|
}
|
|
|
|
common::GradStatHist grad_stat;
|
|
for (size_t ihist = 0; ihist < (*hist_is_init)[inode].size(); ++ihist) {
|
|
if ((*hist_is_init)[inode][ihist]) {
|
|
grad_stat.Add(grad_stats[inode][ihist]);
|
|
}
|
|
}
|
|
this->histred_.Allreduce(&grad_stat, 1);
|
|
snode_[nid].stats = grad_stat.ToGradStat();
|
|
|
|
const int32_t sibling_nid = nodes[inode].sibling_nid;
|
|
if (sibling_nid > -1) {
|
|
if (snode_.size() <= size_t(sibling_nid)) {
|
|
snode_.resize(sibling_nid + 1, NodeEntry(param_));
|
|
}
|
|
const int parent_id = (*p_tree)[nid].Parent();
|
|
snode_[sibling_nid].stats.SetSubstract(snode_[parent_id].stats, snode_[nid].stats);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// merge some block of partial histograms
|
|
void QuantileHistMaker::Builder::ReduceHistograms(
|
|
common::GradStatHist::GradType* hist_data,
|
|
common::GradStatHist::GradType* sibling_hist_data,
|
|
common::GradStatHist::GradType* parent_hist_data,
|
|
const size_t ibegin,
|
|
const size_t iend,
|
|
const size_t inode,
|
|
const std::vector<std::vector<uint8_t>>& hist_is_init,
|
|
const std::vector<std::vector<common::GradStatHist::GradType*>>& hist_buffers) {
|
|
bool is_init = false;
|
|
for (size_t ihist = 0; ihist < hist_is_init[inode].size(); ++ihist) {
|
|
common::GradStatHist::GradType* partial_data = hist_buffers[inode][ihist];
|
|
if (hist_is_init[inode][ihist] && is_init) {
|
|
for (size_t i = ibegin; i < iend; ++i) {
|
|
hist_data[i] += partial_data[i];
|
|
}
|
|
} else if (hist_is_init[inode][ihist]) {
|
|
for (size_t i = ibegin; i < iend; ++i) {
|
|
hist_data[i] = partial_data[i];
|
|
}
|
|
is_init = true;
|
|
}
|
|
}
|
|
|
|
if (sibling_hist_data) {
|
|
for (size_t i = ibegin; i < iend; ++i) {
|
|
sibling_hist_data[i] = parent_hist_data[i] - hist_data[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::ExpandWithDepthWise(
|
|
const GHistIndexMatrix &gmat,
|
|
const GHistIndexBlockMatrix &gmatb,
|
|
const ColumnMatrix &column_matrix,
|
|
DMatrix* p_fmat,
|
|
RegTree* p_tree,
|
|
const std::vector<GradientPair> &gpair_h) {
|
|
unsigned timestamp = 0;
|
|
int num_leaves = 0;
|
|
|
|
// in depth_wise growing, we feed loss_chg with 0.0 since it is not used anyway
|
|
qexpand_depth_wise_.emplace_back(0, -1, ROOT_PARENT_ID, p_tree->GetDepth(0), 0.0, timestamp++);
|
|
++num_leaves;
|
|
|
|
for (int depth = 0; depth < param_.max_depth + 1; depth++) {
|
|
std::vector<ExpandEntry> temp_qexpand_depth;
|
|
|
|
// buffer to store partial histograms
|
|
std::vector<std::vector<common::GradStatHist::GradType*>> hist_buffers;
|
|
// uint8_t is used instead of bool due to read/write
|
|
// to std::vector<bool> - thread unsafe
|
|
std::vector<std::vector<uint8_t>> hist_is_init;
|
|
|
|
BuildHistsBatch(qexpand_depth_wise_, p_tree, gmat, gpair_h,
|
|
&hist_buffers, &hist_is_init);
|
|
BuildNodeStatBatch(gmat, p_fmat, p_tree, gpair_h, qexpand_depth_wise_);
|
|
EvaluateSplitsBatch(qexpand_depth_wise_, gmat, *p_fmat, hist_is_init, hist_buffers);
|
|
CreateNewNodesBatch(qexpand_depth_wise_, gmat, column_matrix, p_fmat, p_tree,
|
|
&num_leaves, depth, ×tamp, &temp_qexpand_depth);
|
|
|
|
num_leaves += temp_qexpand_depth.size();
|
|
|
|
// clean up
|
|
qexpand_depth_wise_.clear();
|
|
nodes_for_subtraction_trick_.clear();
|
|
if (temp_qexpand_depth.empty()) {
|
|
break;
|
|
} else {
|
|
qexpand_depth_wise_ = temp_qexpand_depth;
|
|
temp_qexpand_depth.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::ExpandWithLossGuide(
|
|
const GHistIndexMatrix& gmat,
|
|
const GHistIndexBlockMatrix& gmatb,
|
|
const ColumnMatrix& column_matrix,
|
|
DMatrix* p_fmat,
|
|
RegTree* p_tree,
|
|
const std::vector<GradientPair>& gpair_h) {
|
|
unsigned timestamp = 0;
|
|
int num_leaves = 0;
|
|
|
|
std::vector<std::vector<common::GradStatHist::GradType*>> hist_buffers;
|
|
std::vector<std::vector<uint8_t>> hist_is_init;
|
|
|
|
for (int nid = 0; nid < p_tree->param.num_roots; ++nid) {
|
|
std::vector<ExpandEntry> nodes_to_build{ExpandEntry(
|
|
0, -1, ROOT_PARENT_ID, p_tree->GetDepth(0), 0.0, timestamp++)};
|
|
|
|
BuildHistsBatch(nodes_to_build, p_tree, gmat, gpair_h, &hist_buffers, &hist_is_init);
|
|
BuildNodeStatBatch(gmat, p_fmat, p_tree, gpair_h, nodes_to_build);
|
|
EvaluateSplitsBatch(nodes_to_build, gmat, *p_fmat, hist_is_init, hist_buffers);
|
|
|
|
qexpand_loss_guided_->push(ExpandEntry(nid, -1, -1, p_tree->GetDepth(nid),
|
|
snode_[nid].best.loss_chg,
|
|
timestamp++));
|
|
++num_leaves;
|
|
}
|
|
|
|
while (!qexpand_loss_guided_->empty()) {
|
|
const ExpandEntry candidate = qexpand_loss_guided_->top();
|
|
const int32_t nid = candidate.nid;
|
|
qexpand_loss_guided_->pop();
|
|
|
|
std::vector<ExpandEntry> nodes_to_build{candidate};
|
|
std::vector<ExpandEntry> successors;
|
|
|
|
CreateNewNodesBatch(nodes_to_build, gmat, column_matrix, p_fmat, p_tree,
|
|
&num_leaves, candidate.depth, ×tamp, &successors);
|
|
|
|
if (!successors.empty()) {
|
|
BuildHistsBatch(successors, p_tree, gmat, gpair_h, &hist_buffers, &hist_is_init);
|
|
BuildNodeStatBatch(gmat, p_fmat, p_tree, gpair_h, successors);
|
|
EvaluateSplitsBatch(successors, gmat, *p_fmat, hist_is_init, hist_buffers);
|
|
|
|
const int32_t cleft = (*p_tree)[nid].LeftChild();
|
|
const int32_t cright = (*p_tree)[nid].RightChild();
|
|
|
|
qexpand_loss_guided_->push(ExpandEntry(cleft, -1, nid, p_tree->GetDepth(cleft),
|
|
snode_[cleft].best.loss_chg,
|
|
timestamp++));
|
|
qexpand_loss_guided_->push(ExpandEntry(cright, -1, nid, p_tree->GetDepth(cright),
|
|
snode_[cright].best.loss_chg,
|
|
timestamp++));
|
|
++num_leaves; // give two and take one, as parent is no longer a leaf
|
|
}
|
|
}
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat,
|
|
const GHistIndexBlockMatrix& gmatb,
|
|
const ColumnMatrix& column_matrix,
|
|
HostDeviceVector<GradientPair>* gpair,
|
|
DMatrix* p_fmat,
|
|
RegTree* p_tree) {
|
|
perf_monitor.StartPerfMonitor();
|
|
|
|
const std::vector<GradientPair>& gpair_h = gpair->ConstHostVector();
|
|
spliteval_->Reset();
|
|
|
|
perf_monitor.TickStart();
|
|
this->InitData(gmat, gpair_h, *p_fmat, *p_tree);
|
|
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_DATA);
|
|
|
|
if (param_.grow_policy == TrainParam::kLossGuide) {
|
|
ExpandWithLossGuide(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h);
|
|
} else {
|
|
ExpandWithDepthWise(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h);
|
|
}
|
|
|
|
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
|
|
p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg;
|
|
p_tree->Stat(nid).base_weight = snode_[nid].weight;
|
|
p_tree->Stat(nid).sum_hess =
|
|
static_cast<common::GradStatHist::GradType>(snode_[nid].stats.sum_hess);
|
|
}
|
|
|
|
pruner_->Update(gpair, p_fmat, std::vector<RegTree*>{p_tree});
|
|
|
|
perf_monitor.EndPerfMonitor();
|
|
}
|
|
|
|
bool QuantileHistMaker::Builder::UpdatePredictionCache(
|
|
const DMatrix* data,
|
|
HostDeviceVector<bst_float>* p_out_preds) {
|
|
std::vector<bst_float>& out_preds = p_out_preds->HostVector();
|
|
|
|
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
|
|
// conjunction with Update().
|
|
if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) {
|
|
return false;
|
|
}
|
|
|
|
if (leaf_value_cache_.empty()) {
|
|
leaf_value_cache_.resize(p_last_tree_->param.num_nodes,
|
|
std::numeric_limits<float>::infinity());
|
|
}
|
|
|
|
CHECK_GT(out_preds.size(), 0U);
|
|
|
|
const size_t block_size = 2048;
|
|
const size_t n_nodes = row_set_collection_.end() - row_set_collection_.begin();
|
|
std::vector<RowSetCollection::Elem> tasks_elem;
|
|
std::vector<size_t> tasks_iblock;
|
|
std::vector<size_t> tasks_nblock;
|
|
|
|
for (size_t k = 0; k < n_nodes; ++k) {
|
|
const size_t nrows = row_set_collection_[k].Size();
|
|
const size_t nblocks = nrows / block_size + !!(nrows % block_size);
|
|
|
|
for (size_t i = 0; i < nblocks; ++i) {
|
|
tasks_elem.push_back(row_set_collection_[k]);
|
|
tasks_iblock.push_back(i);
|
|
tasks_nblock.push_back(nblocks);
|
|
}
|
|
}
|
|
|
|
#pragma omp parallel for schedule(static)
|
|
for (omp_ulong k = 0; k < tasks_elem.size(); ++k) {
|
|
const RowSetCollection::Elem rowset = tasks_elem[k];
|
|
if (rowset.begin != nullptr && rowset.end != nullptr && rowset.node_id != -1) {
|
|
const size_t nrows = rowset.Size();
|
|
const size_t iblock = tasks_iblock[k];
|
|
const size_t nblocks = tasks_nblock[k];
|
|
|
|
int nid = rowset.node_id;
|
|
bst_float leaf_value;
|
|
// if a node is marked as deleted by the pruner, traverse upward to locate
|
|
// a non-deleted leaf.
|
|
if ((*p_last_tree_)[nid].IsDeleted()) {
|
|
while ((*p_last_tree_)[nid].IsDeleted()) {
|
|
nid = (*p_last_tree_)[nid].Parent();
|
|
}
|
|
CHECK((*p_last_tree_)[nid].IsLeaf());
|
|
}
|
|
leaf_value = (*p_last_tree_)[nid].LeafValue();
|
|
|
|
const size_t istart = iblock*block_size;
|
|
const size_t iend = (iblock == nblocks-1) ? nrows : istart + block_size;
|
|
|
|
for (size_t it = istart; it < iend; ++it) {
|
|
out_preds[rowset.begin[it]] += leaf_value;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
|
|
const std::vector<GradientPair>& gpair,
|
|
const DMatrix& fmat,
|
|
const RegTree& tree) {
|
|
CHECK_EQ(tree.param.num_nodes, tree.param.num_roots)
|
|
<< "ColMakerHist: can only grow new tree";
|
|
CHECK((param_.max_depth > 0 || param_.max_leaves > 0))
|
|
<< "max_depth or max_leaves cannot be both 0 (unlimited); "
|
|
<< "at least one should be a positive quantity.";
|
|
if (param_.grow_policy == TrainParam::kDepthWise) {
|
|
CHECK(param_.max_depth > 0) << "max_depth cannot be 0 (unlimited) "
|
|
<< "when grow_policy is depthwise.";
|
|
}
|
|
const auto& info = fmat.Info();
|
|
|
|
{
|
|
// initialize the row set
|
|
row_set_collection_.Clear();
|
|
// clear local prediction cache
|
|
leaf_value_cache_.clear();
|
|
// initialize histogram collection
|
|
uint32_t nbins = gmat.cut.Ptrs().back();
|
|
hist_.Init(nbins);
|
|
hist_buff_.Init(nbins);
|
|
|
|
// initialize histogram builder
|
|
#pragma omp parallel
|
|
{
|
|
this->nthread_ = omp_get_num_threads();
|
|
}
|
|
|
|
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
|
|
row_split_tloc_.resize(nthread);
|
|
hist_builder_.Init(this->nthread_, nbins);
|
|
|
|
CHECK_EQ(info.root_index_.size(), 0U);
|
|
std::vector<size_t>& row_indices = row_set_collection_.row_indices_;
|
|
row_indices.resize(info.num_row_);
|
|
auto* p_row_indices = row_indices.data();
|
|
// mark subsample and build list of member rows
|
|
|
|
if (param_.subsample < 1.0f) {
|
|
std::bernoulli_distribution coin_flip(param_.subsample);
|
|
auto& rnd = common::GlobalRandom();
|
|
size_t j = 0;
|
|
for (size_t i = 0; i < info.num_row_; ++i) {
|
|
if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) {
|
|
p_row_indices[j++] = i;
|
|
}
|
|
}
|
|
row_indices.resize(j);
|
|
} else {
|
|
MemStackAllocator<bool, 128> buff(this->nthread_);
|
|
bool* p_buff = buff.Get();
|
|
std::fill(p_buff, p_buff + this->nthread_, false);
|
|
|
|
const size_t block_size = info.num_row_ / this->nthread_ + !!(info.num_row_ % this->nthread_);
|
|
|
|
#pragma omp parallel num_threads(this->nthread_)
|
|
{
|
|
const size_t tid = omp_get_thread_num();
|
|
const size_t ibegin = tid * block_size;
|
|
const size_t iend = std::min(static_cast<size_t>(ibegin + block_size),
|
|
static_cast<size_t>(info.num_row_));
|
|
|
|
for (size_t i = ibegin; i < iend; ++i) {
|
|
if (gpair[i].GetHess() < 0.0f) {
|
|
p_buff[tid] = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool has_neg_hess = false;
|
|
for (int32_t tid = 0; tid < this->nthread_; ++tid) {
|
|
if (p_buff[tid]) {
|
|
has_neg_hess = true;
|
|
}
|
|
}
|
|
|
|
if (has_neg_hess) {
|
|
size_t j = 0;
|
|
for (size_t i = 0; i < info.num_row_; ++i) {
|
|
if (gpair[i].GetHess() >= 0.0f) {
|
|
p_row_indices[j++] = i;
|
|
}
|
|
}
|
|
row_indices.resize(j);
|
|
} else {
|
|
#pragma omp parallel num_threads(this->nthread_)
|
|
{
|
|
const size_t tid = omp_get_thread_num();
|
|
const size_t ibegin = tid * block_size;
|
|
const size_t iend = std::min(static_cast<size_t>(ibegin + block_size),
|
|
static_cast<size_t>(info.num_row_));
|
|
for (size_t i = ibegin; i < iend; ++i) {
|
|
p_row_indices[i] = i;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
row_set_collection_.Init();
|
|
buffer_for_partition_.reserve(2 * info.num_row_);
|
|
|
|
{
|
|
/* determine layout of data */
|
|
const size_t nrow = info.num_row_;
|
|
const size_t ncol = info.num_col_;
|
|
const size_t nnz = info.num_nonzero_;
|
|
// number of discrete bins for feature 0
|
|
const uint32_t nbins_f0 = gmat.cut.Ptrs()[1] - gmat.cut.Ptrs()[0];
|
|
if (nrow * ncol == nnz) {
|
|
// dense data with zero-based indexing
|
|
data_layout_ = kDenseDataZeroBased;
|
|
} else if (nbins_f0 == 0 && nrow * (ncol - 1) == nnz) {
|
|
// dense data with one-based indexing
|
|
data_layout_ = kDenseDataOneBased;
|
|
} else {
|
|
// sparse data
|
|
data_layout_ = kSparseData;
|
|
}
|
|
}
|
|
{
|
|
// store a pointer to the tree
|
|
p_last_tree_ = &tree;
|
|
// store a pointer to training data
|
|
p_last_fmat_ = &fmat;
|
|
}
|
|
if (data_layout_ == kDenseDataOneBased) {
|
|
column_sampler_.Init(info.num_col_, param_.colsample_bynode, param_.colsample_bylevel,
|
|
param_.colsample_bytree, true);
|
|
} else {
|
|
column_sampler_.Init(info.num_col_, param_.colsample_bynode, param_.colsample_bylevel,
|
|
param_.colsample_bytree, false);
|
|
}
|
|
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
|
|
/* specialized code for dense data:
|
|
choose the column that has a least positive number of discrete bins.
|
|
For dense data (with no missing value),
|
|
the sum of gradient histogram is equal to snode[nid] */
|
|
const std::vector<uint32_t>& row_ptr = gmat.cut.Ptrs();
|
|
const auto nfeature = static_cast<bst_uint>(row_ptr.size() - 1);
|
|
uint32_t min_nbins_per_feature = 0;
|
|
for (bst_uint i = 0; i < nfeature; ++i) {
|
|
const uint32_t nbins = row_ptr[i + 1] - row_ptr[i];
|
|
if (nbins > 0) {
|
|
if (min_nbins_per_feature == 0 || min_nbins_per_feature > nbins) {
|
|
min_nbins_per_feature = nbins;
|
|
fid_least_bins_ = i;
|
|
}
|
|
}
|
|
}
|
|
CHECK_GT(min_nbins_per_feature, 0U);
|
|
}
|
|
{
|
|
snode_.reserve(256);
|
|
snode_.clear();
|
|
}
|
|
{
|
|
if (param_.grow_policy == TrainParam::kLossGuide) {
|
|
qexpand_loss_guided_.reset(new ExpandQueue(LossGuide));
|
|
} else {
|
|
qexpand_depth_wise_.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::EvaluateSplitsBatch(
|
|
const std::vector<ExpandEntry>& nodes,
|
|
const GHistIndexMatrix& gmat,
|
|
const DMatrix& fmat,
|
|
const std::vector<std::vector<uint8_t>>& hist_is_init,
|
|
const std::vector<std::vector<common::GradStatHist::GradType*>>& hist_buffers) {
|
|
perf_monitor.TickStart();
|
|
const MetaInfo& info = fmat.Info();
|
|
// prepare tasks
|
|
std::vector<std::pair<int32_t, size_t>> tasks;
|
|
for (size_t i = 0; i < nodes.size(); ++i) {
|
|
auto p_feature_set = column_sampler_.GetFeatureSet(nodes[i].depth);
|
|
|
|
const auto& feature_set = p_feature_set->HostVector();
|
|
const auto nfeature = static_cast<bst_uint>(feature_set.size());
|
|
for (size_t j = 0; j < nfeature; ++j) {
|
|
tasks.emplace_back(i, feature_set[j]);
|
|
}
|
|
}
|
|
|
|
// rabit::IsDistributed is not thread-safe
|
|
auto isDistributed = rabit::IsDistributed();
|
|
// partial results
|
|
std::vector<std::pair<SplitEntry, SplitEntry>> splits(tasks.size());
|
|
// parallel enumeration
|
|
#pragma omp parallel for schedule(static)
|
|
for (omp_ulong i = 0; i < tasks.size(); ++i) {
|
|
// node_idx : offset within `nodes` list
|
|
const int32_t node_idx = tasks[i].first;
|
|
const size_t fid = tasks[i].second;
|
|
const int32_t nid = nodes[node_idx].nid; // usually node_idx != nid
|
|
const int32_t sibling_nid = nodes[node_idx].sibling_nid;
|
|
const int32_t parent_nid = nodes[node_idx].parent_nid;
|
|
|
|
// reduce needed part of a hist here to have it in cache before enumeration
|
|
if (!isDistributed) {
|
|
auto hist_data = reinterpret_cast<common::GradStatHist::GradType *>(hist_[nid].data());
|
|
auto sibling_hist_data = sibling_nid > -1 ?
|
|
reinterpret_cast<common::GradStatHist::GradType *>(
|
|
hist_[sibling_nid].data()) : nullptr;
|
|
auto parent_hist_data = sibling_nid > -1 ?
|
|
reinterpret_cast<common::GradStatHist::GradType *>(
|
|
hist_[parent_nid].data()) : nullptr;
|
|
|
|
const std::vector<uint32_t>& cut_ptr = gmat.cut.Ptrs();
|
|
const size_t ibegin = 2 * cut_ptr[fid];
|
|
const size_t iend = 2 * cut_ptr[fid + 1];
|
|
ReduceHistograms(hist_data, sibling_hist_data, parent_hist_data, ibegin, iend, node_idx,
|
|
hist_is_init, hist_buffers);
|
|
}
|
|
|
|
if (spliteval_->CheckFeatureConstraint(nid, fid)) {
|
|
auto& snode = snode_[nid];
|
|
const bool compute_backward = this->EnumerateSplit(+1, gmat, hist_[nid], snode,
|
|
info, &splits[i].first, fid, nid);
|
|
|
|
// Sometimes, we don't need to enumerate backward because forward and backward
|
|
// enumeration will give same loss values. This is the case if the particular feature
|
|
// column contains no missing values. So enumerate backward only if it's necessary.
|
|
if (compute_backward) {
|
|
this->EnumerateSplit(-1, gmat, hist_[nid], snode, info,
|
|
&splits[i].first, fid, nid);
|
|
}
|
|
}
|
|
|
|
if (sibling_nid > -1 && spliteval_->CheckFeatureConstraint(sibling_nid, fid)) {
|
|
auto& snode = snode_[sibling_nid];
|
|
|
|
const bool compute_backward = this->EnumerateSplit(+1, gmat, hist_[sibling_nid], snode,
|
|
info, &splits[i].second, fid, sibling_nid);
|
|
|
|
if (compute_backward) {
|
|
this->EnumerateSplit(-1, gmat, hist_[sibling_nid], snode, info,
|
|
&splits[i].second, fid, sibling_nid);
|
|
}
|
|
}
|
|
}
|
|
|
|
// choice of the best splits
|
|
for (size_t i = 0; i < splits.size(); ++i) {
|
|
const int32_t node_idx = tasks[i].first;
|
|
const int32_t nid = nodes[node_idx].nid;
|
|
const int32_t sibling_nid = nodes[node_idx].sibling_nid;
|
|
snode_[nid].best.Update(splits[i].first);
|
|
if (sibling_nid > -1) {
|
|
snode_[sibling_nid].best.Update(splits[i].second);
|
|
}
|
|
}
|
|
|
|
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);
|
|
}
|
|
|
|
void QuantileHistMaker::Builder::InitNewNode(int nid,
|
|
const GHistIndexMatrix& gmat,
|
|
const std::vector<GradientPair>& gpair,
|
|
const DMatrix& fmat,
|
|
RegTree* tree,
|
|
QuantileHistMaker::NodeEntry* snode,
|
|
int32_t parentid) {
|
|
// calculating the weights
|
|
{
|
|
snode->weight = static_cast<float>(
|
|
spliteval_->ComputeWeight(parentid, snode->stats));
|
|
snode->root_gain = static_cast<float>(
|
|
spliteval_->ComputeScore(parentid, snode->stats,
|
|
snode->weight));
|
|
}
|
|
}
|
|
|
|
// enumerate the split values of specific feature
|
|
// d_step: +1 or -1, indicating direction at which we scan candidate thresholds in order
|
|
// fid: feature for which we seek to pick best threshold
|
|
// Returns false if we don't need to enumerate in opposite direction.
|
|
// This is the case if the particular feature (fid) column contains no missing values.
|
|
bool QuantileHistMaker::Builder::EnumerateSplit(int d_step,
|
|
const GHistIndexMatrix& gmat,
|
|
const GHistRow& hist,
|
|
const NodeEntry& snode,
|
|
const MetaInfo& info,
|
|
SplitEntry* p_best,
|
|
bst_uint fid,
|
|
bst_uint nodeID) {
|
|
CHECK(d_step == +1 || d_step == -1);
|
|
|
|
// aliases
|
|
const std::vector<uint32_t>& cut_ptr = gmat.cut.Ptrs();
|
|
const std::vector<bst_float>& cut_val = gmat.cut.Values();
|
|
|
|
// statistics on both sides of split
|
|
GradStats c;
|
|
GradStats e;
|
|
// best split so far
|
|
SplitEntry best;
|
|
|
|
// bin boundaries
|
|
CHECK_LE(cut_ptr[fid],
|
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
|
CHECK_LE(cut_ptr[fid + 1],
|
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
|
// imin: index (offset) of the minimum value for feature fid
|
|
// need this for backward enumeration
|
|
const auto imin = static_cast<int32_t>(cut_ptr[fid]);
|
|
// ibegin, iend: smallest/largest cut points for feature fid
|
|
// use int to allow for value -1
|
|
int32_t ibegin, iend;
|
|
if (d_step > 0) {
|
|
ibegin = static_cast<int32_t>(cut_ptr[fid]);
|
|
iend = static_cast<int32_t>(cut_ptr[fid + 1]);
|
|
} else {
|
|
ibegin = static_cast<int32_t>(cut_ptr[fid + 1]) - 1;
|
|
iend = static_cast<int32_t>(cut_ptr[fid]) - 1;
|
|
}
|
|
|
|
if (d_step == 1) {
|
|
for (int32_t i = ibegin; i < iend; i++) {
|
|
e.Add(hist[i].GetGrad(), hist[i].GetHess());
|
|
if (e.sum_hess >= param_.min_child_weight) {
|
|
c.SetSubstract(snode.stats, e);
|
|
if (c.sum_hess >= param_.min_child_weight) {
|
|
bst_float loss_chg = static_cast<bst_float>(spliteval_->ComputeSplitScore(nodeID,
|
|
fid, e, c) - snode.root_gain);
|
|
bst_float split_pt = cut_val[i];
|
|
best.Update(loss_chg, fid, split_pt, false, e, c);
|
|
}
|
|
}
|
|
}
|
|
p_best->Update(best);
|
|
|
|
if (e.GetGrad() == snode.stats.GetGrad() && e.GetHess() == snode.stats.GetHess()) {
|
|
return false;
|
|
}
|
|
} else {
|
|
for (int32_t i = ibegin; i != iend; i--) {
|
|
e.Add(hist[i].GetGrad(), hist[i].GetHess());
|
|
if (e.sum_hess >= param_.min_child_weight) {
|
|
c.SetSubstract(snode.stats, e);
|
|
if (c.sum_hess >= param_.min_child_weight) {
|
|
bst_float split_pt;
|
|
// backward enumeration: split at left bound of each bin
|
|
bst_float loss_chg = static_cast<bst_float>(
|
|
spliteval_->ComputeSplitScore(nodeID, fid, c, e) -
|
|
snode.root_gain);
|
|
|
|
if (i == imin) {
|
|
// for leftmost bin, left bound is the smallest feature value
|
|
split_pt = gmat.cut.MinValues()[fid];
|
|
} else {
|
|
split_pt = cut_val[i - 1];
|
|
}
|
|
best.Update(loss_chg, fid, split_pt, true, c, e);
|
|
}
|
|
}
|
|
}
|
|
p_best->Update(best);
|
|
|
|
if (e.GetGrad() == snode.stats.GetGrad() && e.GetHess() == snode.stats.GetHess()) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
XGBOOST_REGISTER_TREE_UPDATER(FastHistMaker, "grow_fast_histmaker")
|
|
.describe("(Deprecated, use grow_quantile_histmaker instead.)"
|
|
" Grow tree using quantized histogram.")
|
|
.set_body(
|
|
[]() {
|
|
LOG(WARNING) << "grow_fast_histmaker is deprecated, "
|
|
<< "use grow_quantile_histmaker instead.";
|
|
return new QuantileHistMaker();
|
|
});
|
|
|
|
XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
|
|
.describe("Grow tree using quantized histogram.")
|
|
.set_body(
|
|
[]() {
|
|
return new QuantileHistMaker();
|
|
});
|
|
|
|
} // namespace tree
|
|
} // namespace xgboost
|