Merge branch 'unity'
Conflicts: .gitignore R-package/src/xgboost_R.cpp src/gbm/gblinear-inl.hpp tools/xgcombine_buffer.cpp
This commit is contained in:
@@ -68,8 +68,9 @@ class TreeModel {
|
||||
}
|
||||
};
|
||||
/*! \brief tree node */
|
||||
class Node{
|
||||
class Node {
|
||||
public:
|
||||
Node(void) : sindex_(0) {}
|
||||
/*! \brief index of left child */
|
||||
inline int cleft(void) const {
|
||||
return this->cleft_;
|
||||
@@ -110,6 +111,10 @@ class TreeModel {
|
||||
inline bool is_left_child(void) const {
|
||||
return (parent_ & (1U << 31)) != 0;
|
||||
}
|
||||
/*! \brief whether this node is deleted */
|
||||
inline bool is_deleted(void) const {
|
||||
return sindex_ == std::numeric_limits<unsigned>::max();
|
||||
}
|
||||
/*! \brief whether current node is root */
|
||||
inline bool is_root(void) const {
|
||||
return parent_ == -1;
|
||||
@@ -144,7 +149,11 @@ class TreeModel {
|
||||
this->cleft_ = -1;
|
||||
this->cright_ = right;
|
||||
}
|
||||
|
||||
/*! \brief mark that this node is deleted */
|
||||
inline void mark_delete(void) {
|
||||
this->sindex_ = std::numeric_limits<unsigned>::max();
|
||||
}
|
||||
|
||||
private:
|
||||
friend class TreeModel<TSplitCond, TNodeStat>;
|
||||
/*!
|
||||
@@ -197,11 +206,11 @@ class TreeModel {
|
||||
leaf_vector.resize(param.num_nodes * param.size_leaf_vector);
|
||||
return nd;
|
||||
}
|
||||
// delete a tree node
|
||||
// delete a tree node, keep the parent field to allow trace back
|
||||
inline void DeleteNode(int nid) {
|
||||
utils::Assert(nid >= param.num_roots, "can not delete root");
|
||||
deleted_nodes.push_back(nid);
|
||||
nodes[nid].set_parent(-1);
|
||||
nodes[nid].mark_delete();
|
||||
++param.num_deleted;
|
||||
}
|
||||
|
||||
@@ -296,11 +305,12 @@ class TreeModel {
|
||||
}
|
||||
// chg deleted nodes
|
||||
deleted_nodes.resize(0);
|
||||
for (int i = param.num_roots; i < param.num_nodes; i ++) {
|
||||
if (nodes[i].is_root()) deleted_nodes.push_back(i);
|
||||
for (int i = param.num_roots; i < param.num_nodes; ++i) {
|
||||
if (nodes[i].is_deleted()) deleted_nodes.push_back(i);
|
||||
}
|
||||
utils::Assert(static_cast<int>(deleted_nodes.size()) == param.num_deleted,
|
||||
"number of deleted nodes do not match");
|
||||
"number of deleted nodes do not match, num_deleted=%d, dnsize=%lu, num_nodes=%d",
|
||||
param.num_deleted, deleted_nodes.size(), param.num_nodes);
|
||||
}
|
||||
/*!
|
||||
* \brief save model to stream
|
||||
|
||||
@@ -36,8 +36,14 @@ struct TrainParam{
|
||||
float colsample_bytree;
|
||||
// speed optimization for dense column
|
||||
float opt_dense_col;
|
||||
// accuracy of sketch
|
||||
float sketch_eps;
|
||||
// accuracy of sketch
|
||||
float sketch_ratio;
|
||||
// leaf vector size
|
||||
int size_leaf_vector;
|
||||
int size_leaf_vector;
|
||||
// option for parallelization
|
||||
int parallel_option;
|
||||
// number of threads to be used for tree construction,
|
||||
// if OpenMP is enabled, if equals 0, use system default
|
||||
int nthread;
|
||||
@@ -55,6 +61,9 @@ struct TrainParam{
|
||||
opt_dense_col = 1.0f;
|
||||
nthread = 0;
|
||||
size_leaf_vector = 0;
|
||||
parallel_option = 2;
|
||||
sketch_eps = 0.1f;
|
||||
sketch_ratio = 2.0f;
|
||||
}
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
@@ -76,10 +85,13 @@ struct TrainParam{
|
||||
if (!strcmp(name, "subsample")) subsample = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "colsample_bylevel")) colsample_bylevel = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "colsample_bytree")) colsample_bytree = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "sketch_eps")) sketch_eps = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "sketch_ratio")) sketch_ratio = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "opt_dense_col")) opt_dense_col = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "size_leaf_vector")) size_leaf_vector = atoi(val);
|
||||
if (!strcmp(name, "max_depth")) max_depth = atoi(val);
|
||||
if (!strcmp(name, "nthread")) nthread = atoi(val);
|
||||
if (!strcmp(name, "parallel_option")) parallel_option = atoi(val);
|
||||
if (!strcmp(name, "default_direction")) {
|
||||
if (!strcmp(val, "learn")) default_direction = 0;
|
||||
if (!strcmp(val, "left")) default_direction = 1;
|
||||
@@ -132,6 +144,12 @@ struct TrainParam{
|
||||
inline bool cannot_split(double sum_hess, int depth) const {
|
||||
return sum_hess < this->min_child_weight * 2.0;
|
||||
}
|
||||
/*! \brief maximum sketch size */
|
||||
inline unsigned max_sketch_size(void) const {
|
||||
unsigned ret = static_cast<unsigned>(sketch_ratio / sketch_eps);
|
||||
utils::Check(ret > 0, "sketch_ratio/sketch_eps must be bigger than 1");
|
||||
return ret;
|
||||
}
|
||||
|
||||
protected:
|
||||
// functions for L1 cost
|
||||
@@ -186,6 +204,10 @@ struct GradStats {
|
||||
inline void Add(const GradStats &b) {
|
||||
this->Add(b.sum_grad, b.sum_hess);
|
||||
}
|
||||
/*! \brief same as add, reduce is used in All Reduce */
|
||||
inline void Reduce(const GradStats &b) {
|
||||
this->Add(b);
|
||||
}
|
||||
/*! \brief set current value to a - b */
|
||||
inline void SetSubstract(const GradStats &a, const GradStats &b) {
|
||||
sum_grad = a.sum_grad - b.sum_grad;
|
||||
@@ -262,6 +284,10 @@ struct CVGradStats : public GradStats {
|
||||
valid[i].Add(b.valid[i]);
|
||||
}
|
||||
}
|
||||
/*! \brief same as add, reduce is used in All Reduce */
|
||||
inline void Reduce(const CVGradStats &b) {
|
||||
this->Add(b);
|
||||
}
|
||||
/*! \brief set current value to a - b */
|
||||
inline void SetSubstract(const CVGradStats &a, const CVGradStats &b) {
|
||||
GradStats::SetSubstract(a, b);
|
||||
@@ -341,6 +367,10 @@ struct SplitEntry{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
/*! \brief same as update, used by AllReduce*/
|
||||
inline void Reduce(const SplitEntry &e) {
|
||||
this->Update(e);
|
||||
}
|
||||
/*!\return feature index to split on */
|
||||
inline unsigned split_index(void) const {
|
||||
return sindex & ((1U << 31) - 1U);
|
||||
|
||||
@@ -1,18 +1,28 @@
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
#define NOMINMAX
|
||||
#include <cstring>
|
||||
#include "./updater.h"
|
||||
#include "./updater_sync-inl.hpp"
|
||||
#include "./updater_prune-inl.hpp"
|
||||
#include "./updater_refresh-inl.hpp"
|
||||
#include "./updater_colmaker-inl.hpp"
|
||||
#include "./updater_distcol-inl.hpp"
|
||||
#include "./updater_histmaker-inl.hpp"
|
||||
//#include "./updater_skmaker-inl.hpp"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
IUpdater* CreateUpdater(const char *name) {
|
||||
using namespace std;
|
||||
if (!strcmp(name, "prune")) return new TreePruner();
|
||||
if (!strcmp(name, "sync")) return new TreeSyncher();
|
||||
if (!strcmp(name, "refresh")) return new TreeRefresher<GradStats>();
|
||||
if (!strcmp(name, "grow_colmaker")) return new ColMaker<GradStats>();
|
||||
if (!strcmp(name, "grow_histmaker")) return new CQHistMaker<GradStats>();
|
||||
//if (!strcmp(name, "grow_skmaker")) return new SketchMaker();
|
||||
if (!strcmp(name, "distcol")) return new DistColMaker<GradStats>();
|
||||
|
||||
utils::Error("unknown updater:%s", name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -37,6 +37,16 @@ class IUpdater {
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<RegTree*> &trees) = 0;
|
||||
|
||||
/*!
|
||||
* \brief this is simply a function for optimizing performance
|
||||
* this function asks the updater to return the leaf position of each instance in the p_fmat,
|
||||
* if it is cached in the updater, if it is not available, return NULL
|
||||
* \return array of leaf position of each instance in the last updated tree
|
||||
*/
|
||||
virtual const int* GetLeafPosition(void) const {
|
||||
return NULL;
|
||||
}
|
||||
// destructor
|
||||
virtual ~IUpdater(void) {}
|
||||
};
|
||||
|
||||
409
src/tree/updater_basemaker-inl.hpp
Normal file
409
src/tree/updater_basemaker-inl.hpp
Normal file
@@ -0,0 +1,409 @@
|
||||
#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
|
||||
#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
|
||||
/*!
|
||||
* \file updater_basemaker-inl.hpp
|
||||
* \brief implement a common tree constructor
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <rabit.h>
|
||||
#include "../utils/random.h"
|
||||
#include "../utils/quantile.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
/*!
|
||||
* \brief base tree maker class that defines common operation
|
||||
* needed in tree making
|
||||
*/
|
||||
class BaseMaker: public IUpdater {
|
||||
public:
|
||||
// destructor
|
||||
virtual ~BaseMaker(void) {}
|
||||
// set training parameter
|
||||
virtual void SetParam(const char *name, const char *val) {
|
||||
param.SetParam(name, val);
|
||||
}
|
||||
|
||||
protected:
|
||||
// helper to collect and query feature meta information
|
||||
struct FMetaHelper {
|
||||
public:
|
||||
/*! \brief find type of each feature, use column format */
|
||||
inline void InitByCol(IFMatrix *p_fmat,
|
||||
const RegTree &tree) {
|
||||
fminmax.resize(tree.param.num_feature * 2);
|
||||
std::fill(fminmax.begin(), fminmax.end(),
|
||||
-std::numeric_limits<bst_float>::max());
|
||||
// start accumulating statistics
|
||||
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
for (bst_uint i = 0; i < batch.size; ++i) {
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
const ColBatch::Inst &c = batch[i];
|
||||
if (c.length != 0) {
|
||||
fminmax[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax[fid * 2 + 0]);
|
||||
fminmax[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax[fid * 2 + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
rabit::Allreduce<rabit::op::Max>(BeginPtr(fminmax), fminmax.size());
|
||||
}
|
||||
// get feature type, 0:empty 1:binary 2:real
|
||||
inline int Type(bst_uint fid) const {
|
||||
utils::Assert(fid * 2 + 1 < fminmax.size(),
|
||||
"FeatHelper fid exceed query bound ");
|
||||
bst_float a = fminmax[fid * 2];
|
||||
bst_float b = fminmax[fid * 2 + 1];
|
||||
if (a == -std::numeric_limits<bst_float>::max()) return 0;
|
||||
if (-a == b) return 1;
|
||||
else return 2;
|
||||
}
|
||||
inline bst_float MaxValue(bst_uint fid) const {
|
||||
return fminmax[fid *2 + 1];
|
||||
}
|
||||
inline void SampleCol(float p, std::vector<bst_uint> *p_findex) const {
|
||||
std::vector<bst_uint> &findex = *p_findex;
|
||||
findex.clear();
|
||||
for (size_t i = 0; i < fminmax.size(); i += 2) {
|
||||
const bst_uint fid = static_cast<bst_uint>(i / 2);
|
||||
if (this->Type(fid) != 0) findex.push_back(fid);
|
||||
}
|
||||
unsigned n = static_cast<unsigned>(p * findex.size());
|
||||
random::Shuffle(findex);
|
||||
findex.resize(n);
|
||||
// sync the findex if it is subsample
|
||||
std::string s_cache;
|
||||
utils::MemoryBufferStream fc(&s_cache);
|
||||
utils::IStream &fs = fc;
|
||||
if (rabit::GetRank() == 0) {
|
||||
fs.Write(findex);
|
||||
}
|
||||
rabit::Broadcast(&s_cache, 0);
|
||||
fs.Read(&findex);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<bst_float> fminmax;
|
||||
};
|
||||
// ------static helper functions ------
|
||||
// helper function to get to next level of the tree
|
||||
/*! \brief this is helper function for row based data*/
|
||||
inline static int NextLevel(const RowBatch::Inst &inst, const RegTree &tree, int nid) {
|
||||
const RegTree::Node &n = tree[nid];
|
||||
bst_uint findex = n.split_index();
|
||||
for (unsigned i = 0; i < inst.length; ++i) {
|
||||
if (findex == inst[i].index) {
|
||||
if (inst[i].fvalue < n.split_cond()) {
|
||||
return n.cleft();
|
||||
} else {
|
||||
return n.cright();
|
||||
}
|
||||
}
|
||||
}
|
||||
return n.cdefault();
|
||||
}
|
||||
/*! \brief get number of omp thread in current context */
|
||||
inline static int get_nthread(void) {
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
return nthread;
|
||||
}
|
||||
// ------class member helpers---------
|
||||
/*! \brief initialize temp data structure */
|
||||
inline void InitData(const std::vector<bst_gpair> &gpair,
|
||||
const IFMatrix &fmat,
|
||||
const std::vector<unsigned> &root_index,
|
||||
const RegTree &tree) {
|
||||
utils::Assert(tree.param.num_nodes == tree.param.num_roots,
|
||||
"TreeMaker: can only grow new tree");
|
||||
{// setup position
|
||||
position.resize(gpair.size());
|
||||
if (root_index.size() == 0) {
|
||||
std::fill(position.begin(), position.end(), 0);
|
||||
} else {
|
||||
for (size_t i = 0; i < position.size(); ++i) {
|
||||
position[i] = root_index[i];
|
||||
utils::Assert(root_index[i] < (unsigned)tree.param.num_roots,
|
||||
"root index exceed setting");
|
||||
}
|
||||
}
|
||||
// mark delete for the deleted datas
|
||||
for (size_t i = 0; i < position.size(); ++i) {
|
||||
if (gpair[i].hess < 0.0f) position[i] = ~position[i];
|
||||
}
|
||||
// mark subsample
|
||||
if (param.subsample < 1.0f) {
|
||||
for (size_t i = 0; i < position.size(); ++i) {
|
||||
if (gpair[i].hess < 0.0f) continue;
|
||||
if (random::SampleBinary(param.subsample) == 0) position[i] = ~position[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
{// expand query
|
||||
qexpand.reserve(256); qexpand.clear();
|
||||
for (int i = 0; i < tree.param.num_roots; ++i) {
|
||||
qexpand.push_back(i);
|
||||
}
|
||||
this->UpdateNode2WorkIndex(tree);
|
||||
}
|
||||
}
|
||||
/*! \brief update queue expand add in new leaves */
|
||||
inline void UpdateQueueExpand(const RegTree &tree) {
|
||||
std::vector<int> newnodes;
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
if (!tree[nid].is_leaf()) {
|
||||
newnodes.push_back(tree[nid].cleft());
|
||||
newnodes.push_back(tree[nid].cright());
|
||||
}
|
||||
}
|
||||
// use new nodes for qexpand
|
||||
qexpand = newnodes;
|
||||
this->UpdateNode2WorkIndex(tree);
|
||||
}
|
||||
// return decoded position
|
||||
inline int DecodePosition(bst_uint ridx) const{
|
||||
const int pid = position[ridx];
|
||||
return pid < 0 ? ~pid : pid;
|
||||
}
|
||||
// encode the encoded position value for ridx
|
||||
inline void SetEncodePosition(bst_uint ridx, int nid) {
|
||||
if (position[ridx] < 0) {
|
||||
position[ridx] = ~nid;
|
||||
} else {
|
||||
position[ridx] = nid;
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief this is helper function uses column based data structure,
|
||||
* reset the positions to the lastest one
|
||||
* \param nodes the set of nodes that contains the split to be used
|
||||
* \param p_fmat feature matrix needed for tree construction
|
||||
* \param tree the regression tree structure
|
||||
*/
|
||||
inline void ResetPositionCol(const std::vector<int> &nodes, IFMatrix *p_fmat, const RegTree &tree) {
|
||||
// set the positions in the nondefault
|
||||
this->SetNonDefaultPositionCol(nodes, p_fmat, tree);
|
||||
// set rest of instances to default position
|
||||
const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
|
||||
// set default direct nodes to default
|
||||
// for leaf nodes that are not fresh, mark then to ~nid,
|
||||
// so that they are ignored in future statistics collection
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
if (tree[nid].is_leaf()) {
|
||||
// mark finish when it is not a fresh leaf
|
||||
if (tree[nid].cright() == -1) {
|
||||
position[ridx] = ~nid;
|
||||
}
|
||||
} else {
|
||||
// push to default branch
|
||||
if (tree[nid].default_left()) {
|
||||
this->SetEncodePosition(ridx, tree[nid].cleft());
|
||||
} else {
|
||||
this->SetEncodePosition(ridx, tree[nid].cright());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief this is helper function uses column based data structure,
|
||||
* update all positions into nondefault branch, if any, ignore the default branch
|
||||
* \param nodes the set of nodes that contains the split to be used
|
||||
* \param p_fmat feature matrix needed for tree construction
|
||||
* \param tree the regression tree structure
|
||||
*/
|
||||
virtual void SetNonDefaultPositionCol(const std::vector<int> &nodes,
|
||||
IFMatrix *p_fmat, const RegTree &tree) {
|
||||
// step 1, classify the non-default data into right places
|
||||
std::vector<unsigned> fsplits;
|
||||
for (size_t i = 0; i < nodes.size(); ++i) {
|
||||
const int nid = nodes[i];
|
||||
if (!tree[nid].is_leaf()) {
|
||||
fsplits.push_back(tree[nid].split_index());
|
||||
}
|
||||
}
|
||||
std::sort(fsplits.begin(), fsplits.end());
|
||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||
|
||||
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(fsplits);
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
for (size_t i = 0; i < batch.size; ++i) {
|
||||
ColBatch::Inst col = batch[i];
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
const bst_uint ridx = col[j].index;
|
||||
const float fvalue = col[j].fvalue;
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
// go back to parent, correct those who are not default
|
||||
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
|
||||
if(fvalue < tree[nid].split_cond()) {
|
||||
this->SetEncodePosition(ridx, tree[nid].cleft());
|
||||
} else {
|
||||
this->SetEncodePosition(ridx, tree[nid].cright());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*! \brief helper function to get statistics from a tree */
|
||||
template<typename TStats>
|
||||
inline void GetNodeStats(const std::vector<bst_gpair> &gpair,
|
||||
const IFMatrix &fmat,
|
||||
const RegTree &tree,
|
||||
const BoosterInfo &info,
|
||||
std::vector< std::vector<TStats> > *p_thread_temp,
|
||||
std::vector<TStats> *p_node_stats) {
|
||||
std::vector< std::vector<TStats> > &thread_temp = *p_thread_temp;
|
||||
thread_temp.resize(this->get_nthread());
|
||||
p_node_stats->resize(tree.param.num_nodes);
|
||||
#pragma omp parallel
|
||||
{
|
||||
const int tid = omp_get_thread_num();
|
||||
thread_temp[tid].resize(tree.param.num_nodes, TStats(param));
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const unsigned nid = qexpand[i];
|
||||
thread_temp[tid][nid].Clear();
|
||||
}
|
||||
}
|
||||
const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
|
||||
// setup position
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
const int nid = position[ridx];
|
||||
const int tid = omp_get_thread_num();
|
||||
if (nid >= 0) {
|
||||
thread_temp[tid][nid].Add(gpair, info, ridx);
|
||||
}
|
||||
}
|
||||
// sum the per thread statistics together
|
||||
for (size_t j = 0; j < qexpand.size(); ++j) {
|
||||
const int nid = qexpand[j];
|
||||
TStats &s = (*p_node_stats)[nid];
|
||||
s.Clear();
|
||||
for (size_t tid = 0; tid < thread_temp.size(); ++tid) {
|
||||
s.Add(thread_temp[tid][nid]);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*! \brief common helper data structure to build sketch*/
|
||||
struct SketchEntry {
|
||||
/*! \brief total sum of amount to be met */
|
||||
bst_float sum_total;
|
||||
/*! \brief statistics used in the sketch */
|
||||
bst_float rmin, wmin;
|
||||
/*! \brief last seen feature value */
|
||||
bst_float last_fvalue;
|
||||
/*! \brief current size of sketch */
|
||||
bst_float next_goal;
|
||||
// pointer to the sketch to put things in
|
||||
utils::WXQuantileSketch<bst_float, bst_float> *sketch;
|
||||
// initialize the space
|
||||
inline void Init(unsigned max_size) {
|
||||
next_goal = -1.0f;
|
||||
rmin = wmin = 0.0f;
|
||||
sketch->temp.Reserve(max_size + 1);
|
||||
sketch->temp.size = 0;
|
||||
}
|
||||
/*!
|
||||
* \brief push a new element to sketch
|
||||
* \param fvalue feature value, comes in sorted ascending order
|
||||
* \param w weight
|
||||
* \param max_size
|
||||
*/
|
||||
inline void Push(bst_float fvalue, bst_float w, unsigned max_size) {
|
||||
if (next_goal == -1.0f) {
|
||||
next_goal = 0.0f;
|
||||
last_fvalue = fvalue;
|
||||
wmin = w;
|
||||
return;
|
||||
}
|
||||
if (last_fvalue != fvalue) {
|
||||
bst_float rmax = rmin + wmin;
|
||||
if (rmax >= next_goal) {
|
||||
if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
|
||||
// push to sketch
|
||||
sketch->temp.data[sketch->temp.size] =
|
||||
utils::WXQuantileSketch<bst_float, bst_float>::
|
||||
Entry(rmin, rmax, wmin, last_fvalue);
|
||||
utils::Assert(sketch->temp.size < max_size,
|
||||
"invalid maximum size max_size=%u, stemp.size=%lu\n",
|
||||
max_size, sketch->temp.size);
|
||||
++sketch->temp.size;
|
||||
}
|
||||
if (sketch->temp.size == max_size) {
|
||||
next_goal = sum_total * 2.0f + 1e-5f;
|
||||
} else{
|
||||
next_goal = static_cast<bst_float>(sketch->temp.size * sum_total / max_size);
|
||||
}
|
||||
}
|
||||
rmin = rmax;
|
||||
wmin = w;
|
||||
last_fvalue = fvalue;
|
||||
} else {
|
||||
wmin += w;
|
||||
}
|
||||
}
|
||||
/*! \brief push final unfinished value to the sketch */
|
||||
inline void Finalize(unsigned max_size) {
|
||||
bst_float rmax = rmin + wmin;
|
||||
if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
|
||||
utils::Assert(sketch->temp.size <= max_size,
|
||||
"Finalize: invalid maximum size, max_size=%u, stemp.size=%lu",
|
||||
sketch->temp.size, max_size );
|
||||
// push to sketch
|
||||
sketch->temp.data[sketch->temp.size] =
|
||||
utils::WXQuantileSketch<bst_float, bst_float>::
|
||||
Entry(rmin, rmax, wmin, last_fvalue);
|
||||
++sketch->temp.size;
|
||||
}
|
||||
sketch->PushTemp();
|
||||
}
|
||||
};
|
||||
/*! \brief training parameter of tree grower */
|
||||
TrainParam param;
|
||||
/*! \brief queue of nodes to be expanded */
|
||||
std::vector<int> qexpand;
|
||||
/*!
|
||||
* \brief map active node to is working index offset in qexpand,
|
||||
* can be -1, which means the node is node actively expanding
|
||||
*/
|
||||
std::vector<int> node2workindex;
|
||||
/*!
|
||||
* \brief position of each instance in the tree
|
||||
* can be negative, which means this position is no longer expanding
|
||||
* see also Decode/EncodePosition
|
||||
*/
|
||||
std::vector<int> position;
|
||||
|
||||
private:
|
||||
inline void UpdateNode2WorkIndex(const RegTree &tree) {
|
||||
// update the node2workindex
|
||||
std::fill(node2workindex.begin(), node2workindex.end(), -1);
|
||||
node2workindex.resize(tree.param.num_nodes);
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
node2workindex[qexpand[i]] = static_cast<int>(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
/*! \brief pruner that prunes a tree after growing finishs */
|
||||
/*! \brief colunwise update to construct a tree */
|
||||
template<typename TStats>
|
||||
class ColMaker: public IUpdater {
|
||||
public:
|
||||
@@ -36,24 +36,29 @@ class ColMaker: public IUpdater {
|
||||
Builder builder(param);
|
||||
builder.Update(gpair, p_fmat, info, trees[i]);
|
||||
}
|
||||
|
||||
param.learning_rate = lr;
|
||||
}
|
||||
|
||||
private:
|
||||
protected:
|
||||
// training parameter
|
||||
TrainParam param;
|
||||
// data structure
|
||||
/*! \brief per thread x per node entry to store tmp data */
|
||||
struct ThreadEntry {
|
||||
/*! \brief statistics of data*/
|
||||
/*! \brief statistics of data */
|
||||
TStats stats;
|
||||
/*! \brief extra statistics of data */
|
||||
TStats stats_extra;
|
||||
/*! \brief last feature value scanned */
|
||||
float last_fvalue;
|
||||
/*! \brief first feature value scanned */
|
||||
float first_fvalue;
|
||||
/*! \brief current best solution */
|
||||
SplitEntry best;
|
||||
// constructor
|
||||
explicit ThreadEntry(const TrainParam ¶m)
|
||||
: stats(param) {
|
||||
: stats(param), stats_extra(param) {
|
||||
}
|
||||
};
|
||||
struct NodeEntry {
|
||||
@@ -104,7 +109,7 @@ class ColMaker: public IUpdater {
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
protected:
|
||||
// initialize temp data structure
|
||||
inline void InitData(const std::vector<bst_gpair> &gpair,
|
||||
const IFMatrix &fmat,
|
||||
@@ -127,17 +132,17 @@ class ColMaker: public IUpdater {
|
||||
// mark delete for the deleted datas
|
||||
for (size_t i = 0; i < rowset.size(); ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
if (gpair[ridx].hess < 0.0f) position[ridx] = -1;
|
||||
if (gpair[ridx].hess < 0.0f) position[ridx] = ~position[ridx];
|
||||
}
|
||||
// mark subsample
|
||||
if (param.subsample < 1.0f) {
|
||||
for (size_t i = 0; i < rowset.size(); ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
if (gpair[ridx].hess < 0.0f) continue;
|
||||
if (random::SampleBinary(param.subsample) == 0) position[ridx] = -1;
|
||||
if (random::SampleBinary(param.subsample) == 0) position[ridx] = ~position[ridx];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
// initialize feature index
|
||||
unsigned ncol = static_cast<unsigned>(fmat.NumCol());
|
||||
@@ -219,7 +224,138 @@ class ColMaker: public IUpdater {
|
||||
}
|
||||
// use new nodes for qexpand
|
||||
qexpand = newnodes;
|
||||
}
|
||||
}
|
||||
// parallel find the best split of current fid
|
||||
// this function does not support nested functions
|
||||
inline void ParallelFindSplit(const ColBatch::Inst &col,
|
||||
bst_uint fid,
|
||||
const IFMatrix &fmat,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
const BoosterInfo &info) {
|
||||
bool need_forward = param.need_forward_search(fmat.GetColDensity(fid));
|
||||
bool need_backward = param.need_backward_search(fmat.GetColDensity(fid));
|
||||
const std::vector<int> &qexpand = qexpand_;
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
const int tid = omp_get_thread_num();
|
||||
std::vector<ThreadEntry> &temp = stemp[tid];
|
||||
// cleanup temp statistics
|
||||
for (size_t j = 0; j < qexpand.size(); ++j) {
|
||||
temp[qexpand[j]].stats.Clear();
|
||||
}
|
||||
nthread = omp_get_num_threads();
|
||||
bst_uint step = (col.length + nthread - 1) / nthread;
|
||||
bst_uint end = std::min(col.length, step * (tid + 1));
|
||||
for (bst_uint i = tid * step; i < end; ++i) {
|
||||
const bst_uint ridx = col[i].index;
|
||||
const int nid = position[ridx];
|
||||
if (nid < 0) continue;
|
||||
const float fvalue = col[i].fvalue;
|
||||
if (temp[nid].stats.Empty()) {
|
||||
temp[nid].first_fvalue = fvalue;
|
||||
}
|
||||
temp[nid].stats.Add(gpair, info, ridx);
|
||||
temp[nid].last_fvalue = fvalue;
|
||||
}
|
||||
}
|
||||
// start collecting the partial sum statistics
|
||||
bst_omp_uint nnode = static_cast<bst_omp_uint>(qexpand.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < nnode; ++j) {
|
||||
const int nid = qexpand[j];
|
||||
TStats sum(param), tmp(param), c(param);
|
||||
for (int tid = 0; tid < nthread; ++tid) {
|
||||
tmp = stemp[tid][nid].stats;
|
||||
stemp[tid][nid].stats = sum;
|
||||
sum.Add(tmp);
|
||||
if (tid != 0) {
|
||||
std::swap(stemp[tid - 1][nid].last_fvalue, stemp[tid][nid].first_fvalue);
|
||||
}
|
||||
}
|
||||
for (int tid = 0; tid < nthread; ++tid) {
|
||||
stemp[tid][nid].stats_extra = sum;
|
||||
ThreadEntry &e = stemp[tid][nid];
|
||||
float fsplit;
|
||||
if (tid != 0) {
|
||||
if(fabsf(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) {
|
||||
fsplit = (stemp[tid - 1][nid].last_fvalue - e.first_fvalue) * 0.5f;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
fsplit = e.first_fvalue - rt_eps;
|
||||
}
|
||||
if (need_forward && tid != 0) {
|
||||
c.SetSubstract(snode[nid].stats, e.stats);
|
||||
if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
|
||||
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
|
||||
e.best.Update(loss_chg, fid, fsplit, false);
|
||||
}
|
||||
}
|
||||
if (need_backward) {
|
||||
tmp.SetSubstract(sum, e.stats);
|
||||
c.SetSubstract(snode[nid].stats, tmp);
|
||||
if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
|
||||
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
|
||||
e.best.Update(loss_chg, fid, fsplit, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (need_backward) {
|
||||
tmp = sum;
|
||||
ThreadEntry &e = stemp[nthread-1][nid];
|
||||
c.SetSubstract(snode[nid].stats, tmp);
|
||||
if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
|
||||
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
|
||||
e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
// rescan, generate candidate split
|
||||
#pragma omp parallel
|
||||
{
|
||||
TStats c(param), cright(param);
|
||||
const int tid = omp_get_thread_num();
|
||||
std::vector<ThreadEntry> &temp = stemp[tid];
|
||||
nthread = static_cast<bst_uint>(omp_get_num_threads());
|
||||
bst_uint step = (col.length + nthread - 1) / nthread;
|
||||
bst_uint end = std::min(col.length, step * (tid + 1));
|
||||
for (bst_uint i = tid * step; i < end; ++i) {
|
||||
const bst_uint ridx = col[i].index;
|
||||
const int nid = position[ridx];
|
||||
if (nid < 0) continue;
|
||||
const float fvalue = col[i].fvalue;
|
||||
// get the statistics of nid
|
||||
ThreadEntry &e = temp[nid];
|
||||
if (e.stats.Empty()) {
|
||||
e.stats.Add(gpair, info, ridx);
|
||||
e.first_fvalue = fvalue;
|
||||
} else {
|
||||
// forward default right
|
||||
if (fabsf(fvalue - e.first_fvalue) > rt_2eps){
|
||||
if (need_forward) {
|
||||
c.SetSubstract(snode[nid].stats, e.stats);
|
||||
if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
|
||||
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
|
||||
e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false);
|
||||
}
|
||||
}
|
||||
if (need_backward) {
|
||||
cright.SetSubstract(e.stats_extra, e.stats);
|
||||
c.SetSubstract(snode[nid].stats, cright);
|
||||
if (c.sum_hess >= param.min_child_weight && cright.sum_hess >= param.min_child_weight) {
|
||||
bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
|
||||
e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
e.stats.Add(gpair, info, ridx);
|
||||
e.first_fvalue = fvalue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// enumerate the split values of specific feature
|
||||
inline void EnumerateSplit(const ColBatch::Entry *begin,
|
||||
const ColBatch::Entry *end,
|
||||
@@ -273,6 +409,42 @@ class ColMaker: public IUpdater {
|
||||
}
|
||||
}
|
||||
}
|
||||
// update the solution candidate
|
||||
virtual void UpdateSolution(const ColBatch &batch,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
const IFMatrix &fmat,
|
||||
const BoosterInfo &info) {
|
||||
// start enumeration
|
||||
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
#if defined(_OPENMP)
|
||||
const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
|
||||
#endif
|
||||
int poption = param.parallel_option;
|
||||
if (poption == 2) {
|
||||
poption = nsize * 2 < nthread ? 1 : 0;
|
||||
}
|
||||
if (poption == 0) {
|
||||
#pragma omp parallel for schedule(dynamic, batch_size)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
const int tid = omp_get_thread_num();
|
||||
const ColBatch::Inst c = batch[i];
|
||||
if (param.need_forward_search(fmat.GetColDensity(fid))) {
|
||||
this->EnumerateSplit(c.data, c.data + c.length, +1,
|
||||
fid, gpair, info, stemp[tid]);
|
||||
}
|
||||
if (param.need_backward_search(fmat.GetColDensity(fid))) {
|
||||
this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1,
|
||||
fid, gpair, info, stemp[tid]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
this->ParallelFindSplit(batch[i], batch.col_index[i],
|
||||
fmat, gpair, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
// find splits at current level, do split per level
|
||||
inline void FindSplit(int depth,
|
||||
const std::vector<int> &qexpand,
|
||||
@@ -289,66 +461,76 @@ class ColMaker: public IUpdater {
|
||||
}
|
||||
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(feat_set);
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
// start enumeration
|
||||
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
#if defined(_OPENMP)
|
||||
const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
|
||||
#endif
|
||||
#pragma omp parallel for schedule(dynamic, batch_size)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
const int tid = omp_get_thread_num();
|
||||
const ColBatch::Inst c = batch[i];
|
||||
if (param.need_forward_search(p_fmat->GetColDensity(fid))) {
|
||||
this->EnumerateSplit(c.data, c.data + c.length, +1,
|
||||
fid, gpair, info, stemp[tid]);
|
||||
this->UpdateSolution(iter->Value(), gpair, *p_fmat, info);
|
||||
}
|
||||
// after this each thread's stemp will get the best candidates, aggregate results
|
||||
this->SyncBestSolution(qexpand);
|
||||
// get the best result, we can synchronize the solution
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
NodeEntry &e = snode[nid];
|
||||
// now we know the solution in snode[nid], set split
|
||||
if (e.best.loss_chg > rt_eps) {
|
||||
p_tree->AddChilds(nid);
|
||||
(*p_tree)[nid].set_split(e.best.split_index(), e.best.split_value, e.best.default_left());
|
||||
// mark right child as 0, to indicate fresh leaf
|
||||
(*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
|
||||
(*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
|
||||
} else {
|
||||
(*p_tree)[nid].set_leaf(e.weight * param.learning_rate);
|
||||
}
|
||||
}
|
||||
}
|
||||
// reset position of each data points after split is created in the tree
|
||||
inline void ResetPosition(const std::vector<int> &qexpand, IFMatrix *p_fmat, const RegTree &tree) {
|
||||
// set the positions in the nondefault
|
||||
this->SetNonDefaultPosition(qexpand, p_fmat, tree);
|
||||
// set rest of instances to default position
|
||||
const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
|
||||
// set default direct nodes to default
|
||||
// for leaf nodes that are not fresh, mark then to ~nid,
|
||||
// so that they are ignored in future statistics collection
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
if (tree[nid].is_leaf()) {
|
||||
// mark finish when it is not a fresh leaf
|
||||
if (tree[nid].cright() == -1) {
|
||||
position[ridx] = ~nid;
|
||||
}
|
||||
if (param.need_backward_search(p_fmat->GetColDensity(fid))) {
|
||||
this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1,
|
||||
fid, gpair, info, stemp[tid]);
|
||||
} else {
|
||||
// push to default branch
|
||||
if (tree[nid].default_left()) {
|
||||
this->SetEncodePosition(ridx, tree[nid].cleft());
|
||||
} else {
|
||||
this->SetEncodePosition(ridx, tree[nid].cright());
|
||||
}
|
||||
}
|
||||
}
|
||||
// after this each thread's stemp will get the best candidates, aggregate results
|
||||
}
|
||||
// customization part
|
||||
// synchronize the best solution of each node
|
||||
virtual void SyncBestSolution(const std::vector<int> &qexpand) {
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
NodeEntry &e = snode[nid];
|
||||
for (int tid = 0; tid < this->nthread; ++tid) {
|
||||
e.best.Update(stemp[tid][nid].best);
|
||||
}
|
||||
// now we know the solution in snode[nid], set split
|
||||
if (e.best.loss_chg > rt_eps) {
|
||||
p_tree->AddChilds(nid);
|
||||
(*p_tree)[nid].set_split(e.best.split_index(), e.best.split_value, e.best.default_left());
|
||||
} else {
|
||||
(*p_tree)[nid].set_leaf(e.weight * param.learning_rate);
|
||||
}
|
||||
}
|
||||
}
|
||||
// reset position of each data points after split is created in the tree
|
||||
inline void ResetPosition(const std::vector<int> &qexpand, IFMatrix *p_fmat, const RegTree &tree) {
|
||||
const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
|
||||
// step 1, set default direct nodes to default, and leaf nodes to -1
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
const int nid = position[ridx];
|
||||
if (nid >= 0) {
|
||||
if (tree[nid].is_leaf()) {
|
||||
position[ridx] = -1;
|
||||
} else {
|
||||
// push to default branch, correct latter
|
||||
position[ridx] = tree[nid].default_left() ? tree[nid].cleft(): tree[nid].cright();
|
||||
}
|
||||
}
|
||||
}
|
||||
// step 2, classify the non-default data into right places
|
||||
virtual void SetNonDefaultPosition(const std::vector<int> &qexpand,
|
||||
IFMatrix *p_fmat, const RegTree &tree) {
|
||||
// step 1, classify the non-default data into right places
|
||||
std::vector<unsigned> fsplits;
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
if (!tree[nid].is_leaf()) fsplits.push_back(tree[nid].split_index());
|
||||
if (!tree[nid].is_leaf()) {
|
||||
fsplits.push_back(tree[nid].split_index());
|
||||
}
|
||||
}
|
||||
std::sort(fsplits.begin(), fsplits.end());
|
||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||
@@ -364,21 +546,33 @@ class ColMaker: public IUpdater {
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
const bst_uint ridx = col[j].index;
|
||||
const float fvalue = col[j].fvalue;
|
||||
int nid = position[ridx];
|
||||
if (nid == -1) continue;
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
// go back to parent, correct those who are not default
|
||||
nid = tree[nid].parent();
|
||||
if (tree[nid].split_index() == fid) {
|
||||
if (fvalue < tree[nid].split_cond()) {
|
||||
position[ridx] = tree[nid].cleft();
|
||||
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
|
||||
if(fvalue < tree[nid].split_cond()) {
|
||||
this->SetEncodePosition(ridx, tree[nid].cleft());
|
||||
} else {
|
||||
position[ridx] = tree[nid].cright();
|
||||
this->SetEncodePosition(ridx, tree[nid].cright());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// utils to get/set position, with encoded format
|
||||
// return decoded position
|
||||
inline int DecodePosition(bst_uint ridx) const{
|
||||
const int pid = position[ridx];
|
||||
return pid < 0 ? ~pid : pid;
|
||||
}
|
||||
// encode the encoded position value for ridx
|
||||
inline void SetEncodePosition(bst_uint ridx, int nid) {
|
||||
if (position[ridx] < 0) {
|
||||
position[ridx] = ~nid;
|
||||
} else {
|
||||
position[ridx] = nid;
|
||||
}
|
||||
}
|
||||
//--data fields--
|
||||
const TrainParam ¶m;
|
||||
// number of omp thread used during training
|
||||
|
||||
169
src/tree/updater_distcol-inl.hpp
Normal file
169
src/tree/updater_distcol-inl.hpp
Normal file
@@ -0,0 +1,169 @@
|
||||
#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
|
||||
#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
|
||||
/*!
|
||||
* \file updater_distcol-inl.hpp
|
||||
* \brief beta distributed version that takes a sub-column
|
||||
* and construct a tree
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#include <rabit.h>
|
||||
#include "../utils/bitmap.h"
|
||||
#include "../utils/io.h"
|
||||
#include "./updater_colmaker-inl.hpp"
|
||||
#include "./updater_prune-inl.hpp"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
template<typename TStats>
|
||||
class DistColMaker : public ColMaker<TStats> {
|
||||
public:
|
||||
DistColMaker(void) : builder(param) {}
|
||||
virtual ~DistColMaker(void) {}
|
||||
// set training parameter
|
||||
virtual void SetParam(const char *name, const char *val) {
|
||||
param.SetParam(name, val);
|
||||
pruner.SetParam(name, val);
|
||||
}
|
||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<RegTree*> &trees) {
|
||||
TStats::CheckInfo(info);
|
||||
utils::Check(trees.size() == 1, "DistColMaker: only support one tree at a time");
|
||||
// build the tree
|
||||
builder.Update(gpair, p_fmat, info, trees[0]);
|
||||
//// prune the tree, note that pruner will sync the tree
|
||||
pruner.Update(gpair, p_fmat, info, trees);
|
||||
// update position after the tree is pruned
|
||||
builder.UpdatePosition(p_fmat, *trees[0]);
|
||||
}
|
||||
virtual const int* GetLeafPosition(void) const {
|
||||
return builder.GetLeafPosition();
|
||||
}
|
||||
private:
|
||||
struct Builder : public ColMaker<TStats>::Builder {
|
||||
public:
|
||||
Builder(const TrainParam ¶m)
|
||||
: ColMaker<TStats>::Builder(param) {
|
||||
}
|
||||
inline void UpdatePosition(IFMatrix *p_fmat, const RegTree &tree) {
|
||||
const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
int nid = this->DecodePosition(ridx);
|
||||
while (tree[nid].is_deleted()) {
|
||||
nid = tree[nid].parent();
|
||||
utils::Assert(nid >=0, "distributed learning error");
|
||||
}
|
||||
this->position[ridx] = nid;
|
||||
}
|
||||
}
|
||||
virtual const int* GetLeafPosition(void) const {
|
||||
return BeginPtr(this->position);
|
||||
}
|
||||
protected:
|
||||
virtual void SetNonDefaultPosition(const std::vector<int> &qexpand,
|
||||
IFMatrix *p_fmat, const RegTree &tree) {
|
||||
// step 2, classify the non-default data into right places
|
||||
std::vector<unsigned> fsplits;
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
if (!tree[nid].is_leaf()) {
|
||||
fsplits.push_back(tree[nid].split_index());
|
||||
}
|
||||
}
|
||||
// get the candidate split index
|
||||
std::sort(fsplits.begin(), fsplits.end());
|
||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||
while (fsplits.size() != 0 && fsplits.back() >= p_fmat->NumCol()) {
|
||||
fsplits.pop_back();
|
||||
}
|
||||
// bitmap is only word concurrent, set to bool first
|
||||
{
|
||||
bst_omp_uint ndata = static_cast<bst_omp_uint>(this->position.size());
|
||||
boolmap.resize(ndata);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
boolmap[j] = 0;
|
||||
}
|
||||
}
|
||||
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(fsplits);
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
for (size_t i = 0; i < batch.size; ++i) {
|
||||
ColBatch::Inst col = batch[i];
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
const bst_uint ridx = col[j].index;
|
||||
const float fvalue = col[j].fvalue;
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
|
||||
if (fvalue < tree[nid].split_cond()) {
|
||||
if (!tree[nid].default_left()) boolmap[ridx] = 1;
|
||||
} else {
|
||||
if (tree[nid].default_left()) boolmap[ridx] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bitmap.InitFromBool(boolmap);
|
||||
// communicate bitmap
|
||||
rabit::Allreduce<rabit::op::BitOR>(BeginPtr(bitmap.data), bitmap.data.size());
|
||||
const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
|
||||
// get the new position
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
if (bitmap.Get(ridx)) {
|
||||
utils::Assert(!tree[nid].is_leaf(), "inconsistent reduce information");
|
||||
if (tree[nid].default_left()) {
|
||||
this->SetEncodePosition(ridx, tree[nid].cright());
|
||||
} else {
|
||||
this->SetEncodePosition(ridx, tree[nid].cleft());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// synchronize the best solution of each node
|
||||
virtual void SyncBestSolution(const std::vector<int> &qexpand) {
|
||||
std::vector<SplitEntry> vec;
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
for (int tid = 0; tid < this->nthread; ++tid) {
|
||||
this->snode[nid].best.Update(this->stemp[tid][nid].best);
|
||||
}
|
||||
vec.push_back(this->snode[nid].best);
|
||||
}
|
||||
// TODO, lazy version
|
||||
// communicate best solution
|
||||
reducer.Allreduce(BeginPtr(vec), vec.size());
|
||||
// assign solution back
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
this->snode[nid].best = vec[i];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
utils::BitMap bitmap;
|
||||
std::vector<int> boolmap;
|
||||
rabit::Reducer<SplitEntry> reducer;
|
||||
};
|
||||
// we directly introduce pruner here
|
||||
TreePruner pruner;
|
||||
// training parameter
|
||||
TrainParam param;
|
||||
// pointer to the builder
|
||||
Builder builder;
|
||||
};
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
#endif
|
||||
701
src/tree/updater_histmaker-inl.hpp
Normal file
701
src/tree/updater_histmaker-inl.hpp
Normal file
@@ -0,0 +1,701 @@
|
||||
#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
|
||||
#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
|
||||
/*!
|
||||
* \file updater_histmaker-inl.hpp
|
||||
* \brief use histogram counting to construct a tree
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <rabit.h>
|
||||
#include "../utils/quantile.h"
|
||||
#include "../utils/group_data.h"
|
||||
#include "./updater_basemaker-inl.hpp"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
template<typename TStats>
|
||||
class HistMaker: public BaseMaker {
|
||||
public:
|
||||
virtual ~HistMaker(void) {}
|
||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<RegTree*> &trees) {
|
||||
TStats::CheckInfo(info);
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param.learning_rate;
|
||||
param.learning_rate = lr / trees.size();
|
||||
// build tree
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
this->Update(gpair, p_fmat, info, trees[i]);
|
||||
}
|
||||
param.learning_rate = lr;
|
||||
}
|
||||
|
||||
protected:
|
||||
/*! \brief a single histogram */
|
||||
struct HistUnit {
|
||||
/*! \brief cutting point of histogram, contains maximum point */
|
||||
const bst_float *cut;
|
||||
/*! \brief content of statistics data */
|
||||
TStats *data;
|
||||
/*! \brief size of histogram */
|
||||
unsigned size;
|
||||
// default constructor
|
||||
HistUnit(void) {}
|
||||
// constructor
|
||||
HistUnit(const bst_float *cut, TStats *data, unsigned size)
|
||||
: cut(cut), data(data), size(size) {}
|
||||
/*! \brief add a histogram to data */
|
||||
inline void Add(bst_float fv,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
const BoosterInfo &info,
|
||||
const bst_uint ridx) {
|
||||
unsigned i = std::upper_bound(cut, cut + size, fv) - cut;
|
||||
utils::Assert(size != 0, "try insert into size=0");
|
||||
utils::Assert(i < size,
|
||||
"maximum value must be in cut, fv = %g, cutmax=%g", fv, cut[size-1]);
|
||||
data[i].Add(gpair, info, ridx);
|
||||
}
|
||||
};
|
||||
/*! \brief a set of histograms from different index */
|
||||
struct HistSet {
|
||||
/*! \brief the index pointer of each histunit */
|
||||
const unsigned *rptr;
|
||||
/*! \brief cutting points in each histunit */
|
||||
const bst_float *cut;
|
||||
/*! \brief data in different hist unit */
|
||||
std::vector<TStats> data;
|
||||
/*! \brief */
|
||||
inline HistUnit operator[](size_t fid) {
|
||||
return HistUnit(cut + rptr[fid],
|
||||
&data[0] + rptr[fid],
|
||||
rptr[fid+1] - rptr[fid]);
|
||||
}
|
||||
};
|
||||
// thread workspace
|
||||
struct ThreadWSpace {
|
||||
/*! \brief actual unit pointer */
|
||||
std::vector<unsigned> rptr;
|
||||
/*! \brief cut field */
|
||||
std::vector<bst_float> cut;
|
||||
// per thread histset
|
||||
std::vector<HistSet> hset;
|
||||
// initialize the hist set
|
||||
inline void Init(const TrainParam ¶m, int nthread) {
|
||||
hset.resize(nthread);
|
||||
// cleanup statistics
|
||||
for (int tid = 0; tid < nthread; ++tid) {
|
||||
for (size_t i = 0; i < hset[tid].data.size(); ++i) {
|
||||
hset[tid].data[i].Clear();
|
||||
}
|
||||
hset[tid].rptr = BeginPtr(rptr);
|
||||
hset[tid].cut = BeginPtr(cut);
|
||||
hset[tid].data.resize(cut.size(), TStats(param));
|
||||
}
|
||||
}
|
||||
// aggregate all statistics to hset[0]
|
||||
inline void Aggregate(void) {
|
||||
bst_omp_uint nsize = static_cast<bst_omp_uint>(cut.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
for (size_t tid = 1; tid < hset.size(); ++tid) {
|
||||
hset[0].data[i].Add(hset[tid].data[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*! \brief clear the workspace */
|
||||
inline void Clear(void) {
|
||||
cut.clear(); rptr.resize(1); rptr[0] = 0;
|
||||
}
|
||||
/*! \brief total size */
|
||||
inline size_t Size(void) const {
|
||||
return rptr.size() - 1;
|
||||
}
|
||||
};
|
||||
// workspace of thread
|
||||
ThreadWSpace wspace;
|
||||
// reducer for histogram
|
||||
rabit::Reducer<TStats> histred;
|
||||
// set of working features
|
||||
std::vector<bst_uint> fwork_set;
|
||||
// update function implementation
|
||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
RegTree *p_tree) {
|
||||
this->InitData(gpair, *p_fmat, info.root_index, *p_tree);
|
||||
this->InitWorkSet(p_fmat, *p_tree, &fwork_set);
|
||||
for (int depth = 0; depth < param.max_depth; ++depth) {
|
||||
// reset and propose candidate split
|
||||
this->ResetPosAndPropose(gpair, p_fmat, info, fwork_set, *p_tree);
|
||||
// create histogram
|
||||
this->CreateHist(gpair, p_fmat, info, fwork_set, *p_tree);
|
||||
// find split based on histogram statistics
|
||||
this->FindSplit(depth, gpair, p_fmat, info, fwork_set, p_tree);
|
||||
// reset position after split
|
||||
this->ResetPositionAfterSplit(p_fmat, *p_tree);
|
||||
this->UpdateQueueExpand(*p_tree);
|
||||
// if nothing left to be expand, break
|
||||
if (qexpand.size() == 0) break;
|
||||
}
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
|
||||
}
|
||||
}
|
||||
// this function does two jobs
|
||||
// (1) reset the position in array position, to be the latest leaf id
|
||||
// (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly
|
||||
virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector <bst_uint> &fset,
|
||||
const RegTree &tree) = 0;
|
||||
// initialize the current working set of features in this round
|
||||
virtual void InitWorkSet(IFMatrix *p_fmat,
|
||||
const RegTree &tree,
|
||||
std::vector<bst_uint> *p_fset) {
|
||||
p_fset->resize(tree.param.num_feature);
|
||||
for (size_t i = 0; i < p_fset->size(); ++i) {
|
||||
(*p_fset)[i] = static_cast<unsigned>(i);
|
||||
}
|
||||
}
|
||||
// reset position after split, this is not a must, depending on implementation
|
||||
virtual void ResetPositionAfterSplit(IFMatrix *p_fmat,
|
||||
const RegTree &tree) {
|
||||
}
|
||||
virtual void CreateHist(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector <bst_uint> &fset,
|
||||
const RegTree &tree) = 0;
|
||||
private:
|
||||
inline void EnumerateSplit(const HistUnit &hist,
|
||||
const TStats &node_sum,
|
||||
bst_uint fid,
|
||||
SplitEntry *best,
|
||||
TStats *left_sum) {
|
||||
if (hist.size == 0) return;
|
||||
|
||||
double root_gain = node_sum.CalcGain(param);
|
||||
TStats s(param), c(param);
|
||||
for (bst_uint i = 0; i < hist.size; ++i) {
|
||||
s.Add(hist.data[i]);
|
||||
if (s.sum_hess >= param.min_child_weight) {
|
||||
c.SetSubstract(node_sum, s);
|
||||
if (c.sum_hess >= param.min_child_weight) {
|
||||
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
|
||||
if (best->Update((float)loss_chg, fid, hist.cut[i], false)) {
|
||||
*left_sum = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
s.Clear();
|
||||
for (bst_uint i = hist.size - 1; i != 0; --i) {
|
||||
s.Add(hist.data[i]);
|
||||
if (s.sum_hess >= param.min_child_weight) {
|
||||
c.SetSubstract(node_sum, s);
|
||||
if (c.sum_hess >= param.min_child_weight) {
|
||||
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
|
||||
if (best->Update((float)loss_chg, fid, hist.cut[i-1], true)) {
|
||||
*left_sum = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
inline void FindSplit(int depth,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector <bst_uint> &fset,
|
||||
RegTree *p_tree) {
|
||||
const size_t num_feature = fset.size();
|
||||
// get the best split condition for each node
|
||||
std::vector<SplitEntry> sol(qexpand.size());
|
||||
std::vector<TStats> left_sum(qexpand.size());
|
||||
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
|
||||
const int nid = qexpand[wid];
|
||||
utils::Assert(node2workindex[nid] == static_cast<int>(wid),
|
||||
"node2workindex inconsistent");
|
||||
SplitEntry &best = sol[wid];
|
||||
TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
|
||||
for (size_t i = 0; i < fset.size(); ++ i) {
|
||||
EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)],
|
||||
node_sum, fset[i], &best, &left_sum[wid]);
|
||||
}
|
||||
}
|
||||
// get the best result, we can synchronize the solution
|
||||
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
|
||||
const int nid = qexpand[wid];
|
||||
const SplitEntry &best = sol[wid];
|
||||
const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
|
||||
this->SetStats(p_tree, nid, node_sum);
|
||||
// set up the values
|
||||
p_tree->stat(nid).loss_chg = best.loss_chg;
|
||||
// now we know the solution in snode[nid], set split
|
||||
if (best.loss_chg > rt_eps) {
|
||||
p_tree->AddChilds(nid);
|
||||
(*p_tree)[nid].set_split(best.split_index(),
|
||||
best.split_value, best.default_left());
|
||||
// mark right child as 0, to indicate fresh leaf
|
||||
(*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
|
||||
(*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
|
||||
// right side sum
|
||||
TStats right_sum;
|
||||
right_sum.SetSubstract(node_sum, left_sum[wid]);
|
||||
this->SetStats(p_tree, (*p_tree)[nid].cleft(), left_sum[wid]);
|
||||
this->SetStats(p_tree, (*p_tree)[nid].cright(), right_sum);
|
||||
} else {
|
||||
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) {
|
||||
p_tree->stat(nid).base_weight = static_cast<float>(node_sum.CalcWeight(param));
|
||||
p_tree->stat(nid).sum_hess = static_cast<float>(node_sum.sum_hess);
|
||||
node_sum.SetLeafVec(param, p_tree->leafvec(nid));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename TStats>
|
||||
class CQHistMaker: public HistMaker<TStats> {
|
||||
protected:
|
||||
struct HistEntry {
|
||||
typename HistMaker<TStats>::HistUnit hist;
|
||||
unsigned istart;
|
||||
/*!
|
||||
* \brief add a histogram to data,
|
||||
* do linear scan, start from istart
|
||||
*/
|
||||
inline void Add(bst_float fv,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
const BoosterInfo &info,
|
||||
const bst_uint ridx) {
|
||||
while (istart < hist.size && !(fv < hist.cut[istart])) ++istart;
|
||||
utils::Assert(istart != hist.size, "the bound variable must be max");
|
||||
hist.data[istart].Add(gpair, info, ridx);
|
||||
}
|
||||
};
|
||||
// sketch type used for this
|
||||
typedef utils::WXQuantileSketch<bst_float, bst_float> WXQSketch;
|
||||
// initialize the work set of tree
|
||||
virtual void InitWorkSet(IFMatrix *p_fmat,
|
||||
const RegTree &tree,
|
||||
std::vector<bst_uint> *p_fset) {
|
||||
feat_helper.InitByCol(p_fmat, tree);
|
||||
feat_helper.SampleCol(this->param.colsample_bytree, p_fset);
|
||||
}
|
||||
// code to create histogram
|
||||
virtual void CreateHist(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<bst_uint> &fset,
|
||||
const RegTree &tree) {
|
||||
// fill in reverse map
|
||||
feat2workindex.resize(tree.param.num_feature);
|
||||
std::fill(feat2workindex.begin(), feat2workindex.end(), -1);
|
||||
for (size_t i = 0; i < fset.size(); ++i) {
|
||||
feat2workindex[fset[i]] = static_cast<int>(i);
|
||||
}
|
||||
// start to work
|
||||
this->wspace.Init(this->param, 1);
|
||||
// if it is C++11, use lazy evaluation for Allreduce,
|
||||
// to gain speedup in recovery
|
||||
#if __cplusplus >= 201103L
|
||||
auto lazy_get_hist = [&]()
|
||||
#endif
|
||||
{
|
||||
thread_hist.resize(this->get_nthread());
|
||||
// start accumulating statistics
|
||||
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(fset);
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
// start enumeration
|
||||
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
int offset = feat2workindex[batch.col_index[i]];
|
||||
if (offset >= 0) {
|
||||
this->UpdateHistCol(gpair, batch[i], info, tree,
|
||||
fset, offset,
|
||||
&thread_hist[omp_get_thread_num()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const int nid = this->qexpand[i];
|
||||
const int wid = this->node2workindex[nid];
|
||||
this->wspace.hset[0][fset.size() + wid * (fset.size()+1)]
|
||||
.data[0] = node_stats[nid];
|
||||
}
|
||||
};
|
||||
// sync the histogram
|
||||
// if it is C++11, use lazy evaluation for Allreduce
|
||||
#if __cplusplus >= 201103L
|
||||
this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data),
|
||||
this->wspace.hset[0].data.size(), lazy_get_hist);
|
||||
#else
|
||||
this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size());
|
||||
#endif
|
||||
}
|
||||
virtual void ResetPositionAfterSplit(IFMatrix *p_fmat,
|
||||
const RegTree &tree) {
|
||||
this->ResetPositionCol(this->qexpand, p_fmat, tree);
|
||||
}
|
||||
virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<bst_uint> &fset,
|
||||
const RegTree &tree) {
|
||||
// fill in reverse map
|
||||
feat2workindex.resize(tree.param.num_feature);
|
||||
std::fill(feat2workindex.begin(), feat2workindex.end(), -1);
|
||||
freal_set.clear();
|
||||
for (size_t i = 0; i < fset.size(); ++i) {
|
||||
if (feat_helper.Type(fset[i]) == 2) {
|
||||
feat2workindex[fset[i]] = static_cast<int>(freal_set.size());
|
||||
freal_set.push_back(fset[i]);
|
||||
} else {
|
||||
feat2workindex[fset[i]] = -2;
|
||||
}
|
||||
}
|
||||
this->GetNodeStats(gpair, *p_fmat, tree, info,
|
||||
&thread_stats, &node_stats);
|
||||
sketchs.resize(this->qexpand.size() * freal_set.size());
|
||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||
sketchs[i].Init(info.num_row, this->param.sketch_eps);
|
||||
}
|
||||
// intitialize the summary array
|
||||
summary_array.resize(sketchs.size());
|
||||
// setup maximum size
|
||||
unsigned max_size = this->param.max_sketch_size();
|
||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||
summary_array[i].Reserve(max_size);
|
||||
}
|
||||
// if it is C++11, use lazy evaluation for Allreduce
|
||||
#if __cplusplus >= 201103L
|
||||
auto lazy_get_summary = [&]()
|
||||
#endif
|
||||
{// get smmary
|
||||
thread_sketch.resize(this->get_nthread());
|
||||
// number of rows in
|
||||
const size_t nrows = p_fmat->buffered_rowset().size();
|
||||
// start accumulating statistics
|
||||
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(freal_set);
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
// start enumeration
|
||||
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
int offset = feat2workindex[batch.col_index[i]];
|
||||
if (offset >= 0) {
|
||||
this->UpdateSketchCol(gpair, batch[i], tree,
|
||||
node_stats,
|
||||
freal_set, offset,
|
||||
batch[i].length == nrows,
|
||||
&thread_sketch[omp_get_thread_num()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||
utils::WXQuantileSketch<bst_float, bst_float>::SummaryContainer out;
|
||||
sketchs[i].GetSummary(&out);
|
||||
summary_array[i].SetPrune(out, max_size);
|
||||
}
|
||||
utils::Assert(summary_array.size() == sketchs.size(), "shape mismatch");
|
||||
};
|
||||
if (summary_array.size() != 0) {
|
||||
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
|
||||
#if __cplusplus >= 201103L
|
||||
sreducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size(), lazy_get_summary);
|
||||
#else
|
||||
sreducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size());
|
||||
#endif
|
||||
}
|
||||
// now we get the final result of sketch, setup the cut
|
||||
this->wspace.cut.clear();
|
||||
this->wspace.rptr.clear();
|
||||
this->wspace.rptr.push_back(0);
|
||||
for (size_t wid = 0; wid < this->qexpand.size(); ++wid) {
|
||||
for (size_t i = 0; i < fset.size(); ++i) {
|
||||
int offset = feat2workindex[fset[i]];
|
||||
if (offset >= 0) {
|
||||
const WXQSketch::Summary &a = summary_array[wid * freal_set.size() + offset];
|
||||
for (size_t i = 1; i < a.size; ++i) {
|
||||
bst_float cpt = a.data[i].value - rt_eps;
|
||||
if (i == 1 || cpt > this->wspace.cut.back()) {
|
||||
this->wspace.cut.push_back(cpt);
|
||||
}
|
||||
}
|
||||
// push a value that is greater than anything
|
||||
if (a.size != 0) {
|
||||
bst_float cpt = a.data[a.size - 1].value;
|
||||
// this must be bigger than last value in a scale
|
||||
bst_float last = cpt + fabs(cpt) + rt_eps;
|
||||
this->wspace.cut.push_back(last);
|
||||
}
|
||||
this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
|
||||
} else {
|
||||
utils::Assert(offset == -2, "BUG in mark");
|
||||
bst_float cpt = feat_helper.MaxValue(fset[i]);
|
||||
this->wspace.cut.push_back(cpt + fabs(cpt) + rt_eps);
|
||||
this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
|
||||
}
|
||||
}
|
||||
// reserve last value for global statistics
|
||||
this->wspace.cut.push_back(0.0f);
|
||||
this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
|
||||
}
|
||||
utils::Assert(this->wspace.rptr.size() ==
|
||||
(fset.size() + 1) * this->qexpand.size() + 1,
|
||||
"cut space inconsistent");
|
||||
}
|
||||
|
||||
private:
|
||||
inline void UpdateHistCol(const std::vector<bst_gpair> &gpair,
|
||||
const ColBatch::Inst &c,
|
||||
const BoosterInfo &info,
|
||||
const RegTree &tree,
|
||||
const std::vector<bst_uint> &fset,
|
||||
bst_uint fid_offset,
|
||||
std::vector<HistEntry> *p_temp) {
|
||||
if (c.length == 0) return;
|
||||
// initialize sbuilder for use
|
||||
std::vector<HistEntry> &hbuilder = *p_temp;
|
||||
hbuilder.resize(tree.param.num_nodes);
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const unsigned nid = this->qexpand[i];
|
||||
const unsigned wid = this->node2workindex[nid];
|
||||
hbuilder[nid].istart = 0;
|
||||
hbuilder[nid].hist = this->wspace.hset[0][fid_offset + wid * (fset.size()+1)];
|
||||
}
|
||||
for (bst_uint j = 0; j < c.length; ++j) {
|
||||
const bst_uint ridx = c[j].index;
|
||||
const int nid = this->position[ridx];
|
||||
if (nid >= 0) {
|
||||
hbuilder[nid].Add(c[j].fvalue, gpair, info, ridx);
|
||||
}
|
||||
}
|
||||
}
|
||||
inline void UpdateSketchCol(const std::vector<bst_gpair> &gpair,
|
||||
const ColBatch::Inst &c,
|
||||
const RegTree &tree,
|
||||
const std::vector<TStats> &nstats,
|
||||
const std::vector<bst_uint> &frealset,
|
||||
bst_uint offset,
|
||||
bool col_full,
|
||||
std::vector<BaseMaker::SketchEntry> *p_temp) {
|
||||
if (c.length == 0) return;
|
||||
// initialize sbuilder for use
|
||||
std::vector<BaseMaker::SketchEntry> &sbuilder = *p_temp;
|
||||
sbuilder.resize(tree.param.num_nodes);
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const unsigned nid = this->qexpand[i];
|
||||
const unsigned wid = this->node2workindex[nid];
|
||||
sbuilder[nid].sum_total = 0.0f;
|
||||
sbuilder[nid].sketch = &sketchs[wid * frealset.size() + offset];
|
||||
}
|
||||
|
||||
if (!col_full) {
|
||||
// first pass, get sum of weight, TODO, optimization to skip first pass
|
||||
for (bst_uint j = 0; j < c.length; ++j) {
|
||||
const bst_uint ridx = c[j].index;
|
||||
const int nid = this->position[ridx];
|
||||
if (nid >= 0) {
|
||||
sbuilder[nid].sum_total += gpair[ridx].hess;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const unsigned nid = this->qexpand[i];
|
||||
sbuilder[nid].sum_total = static_cast<bst_float>(nstats[nid].sum_hess);
|
||||
}
|
||||
}
|
||||
// if only one value, no need to do second pass
|
||||
if (c[0].fvalue == c[c.length-1].fvalue) {
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const int nid = this->qexpand[i];
|
||||
sbuilder[nid].sketch->Push(c[0].fvalue, sbuilder[nid].sum_total);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// two pass scan
|
||||
unsigned max_size = this->param.max_sketch_size();
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const int nid = this->qexpand[i];
|
||||
sbuilder[nid].Init(max_size);
|
||||
}
|
||||
// second pass, build the sketch
|
||||
for (bst_uint j = 0; j < c.length; ++j) {
|
||||
const bst_uint ridx = c[j].index;
|
||||
const int nid = this->position[ridx];
|
||||
if (nid >= 0) {
|
||||
sbuilder[nid].Push(c[j].fvalue, gpair[ridx].hess, max_size);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const int nid = this->qexpand[i];
|
||||
sbuilder[nid].Finalize(max_size);
|
||||
}
|
||||
}
|
||||
// feature helper
|
||||
BaseMaker::FMetaHelper feat_helper;
|
||||
// temp space to map feature id to working index
|
||||
std::vector<int> feat2workindex;
|
||||
// set of index from fset that are real
|
||||
std::vector<bst_uint> freal_set;
|
||||
// thread temp data
|
||||
std::vector< std::vector<BaseMaker::SketchEntry> > thread_sketch;
|
||||
// used to hold statistics
|
||||
std::vector< std::vector<TStats> > thread_stats;
|
||||
// used to hold start pointer
|
||||
std::vector< std::vector<HistEntry> > thread_hist;
|
||||
// node statistics
|
||||
std::vector<TStats> node_stats;
|
||||
// summary array
|
||||
std::vector<WXQSketch::SummaryContainer> summary_array;
|
||||
// reducer for summary
|
||||
rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
|
||||
// per node, per feature sketch
|
||||
std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
|
||||
};
|
||||
|
||||
template<typename TStats>
|
||||
class QuantileHistMaker: public HistMaker<TStats> {
|
||||
protected:
|
||||
typedef utils::WXQuantileSketch<bst_float, bst_float> WXQSketch;
|
||||
virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector <bst_uint> &fset,
|
||||
const RegTree &tree) {
|
||||
// initialize the data structure
|
||||
int nthread = BaseMaker::get_nthread();
|
||||
sketchs.resize(this->qexpand.size() * tree.param.num_feature);
|
||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||
sketchs[i].Init(info.num_row, this->param.sketch_eps);
|
||||
}
|
||||
// start accumulating statistics
|
||||
utils::IIterator<RowBatch> *iter = p_fmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const RowBatch &batch = iter->Value();
|
||||
// parallel convert to column major format
|
||||
utils::ParallelGroupBuilder<SparseBatch::Entry> builder(&col_ptr, &col_data, &thread_col_ptr);
|
||||
builder.InitBudget(tree.param.num_feature, nthread);
|
||||
|
||||
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nbatch; ++i) {
|
||||
RowBatch::Inst inst = batch[i];
|
||||
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||
int nid = this->position[ridx];
|
||||
if (nid >= 0) {
|
||||
if (!tree[nid].is_leaf()) {
|
||||
this->position[ridx] = nid = HistMaker<TStats>::NextLevel(inst, tree, nid);
|
||||
}
|
||||
if (this->node2workindex[nid] < 0) {
|
||||
this->position[ridx] = ~nid;
|
||||
} else{
|
||||
for (bst_uint j = 0; j < inst.length; ++j) {
|
||||
builder.AddBudget(inst[j].index, omp_get_thread_num());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
builder.InitStorage();
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nbatch; ++i) {
|
||||
RowBatch::Inst inst = batch[i];
|
||||
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||
const int nid = this->position[ridx];
|
||||
if (nid >= 0) {
|
||||
for (bst_uint j = 0; j < inst.length; ++j) {
|
||||
builder.Push(inst[j].index,
|
||||
SparseBatch::Entry(nid, inst[j].fvalue),
|
||||
omp_get_thread_num());
|
||||
}
|
||||
}
|
||||
}
|
||||
// start putting things into sketch
|
||||
const bst_omp_uint nfeat = col_ptr.size() - 1;
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint k = 0; k < nfeat; ++k) {
|
||||
for (size_t i = col_ptr[k]; i < col_ptr[k+1]; ++i) {
|
||||
const SparseBatch::Entry &e = col_data[i];
|
||||
const int wid = this->node2workindex[e.index];
|
||||
sketchs[wid * tree.param.num_feature + k].Push(e.fvalue, gpair[e.index].hess);
|
||||
}
|
||||
}
|
||||
}
|
||||
// setup maximum size
|
||||
unsigned max_size = this->param.max_sketch_size();
|
||||
// synchronize sketch
|
||||
summary_array.resize(sketchs.size());
|
||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||
utils::WQuantileSketch<bst_float, bst_float>::SummaryContainer out;
|
||||
sketchs[i].GetSummary(&out);
|
||||
summary_array[i].Reserve(max_size);
|
||||
summary_array[i].SetPrune(out, max_size);
|
||||
}
|
||||
|
||||
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
|
||||
sreducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size());
|
||||
// now we get the final result of sketch, setup the cut
|
||||
this->wspace.cut.clear();
|
||||
this->wspace.rptr.clear();
|
||||
this->wspace.rptr.push_back(0);
|
||||
for (size_t wid = 0; wid < this->qexpand.size(); ++wid) {
|
||||
for (int fid = 0; fid < tree.param.num_feature; ++fid) {
|
||||
const WXQSketch::Summary &a = summary_array[wid * tree.param.num_feature + fid];
|
||||
for (size_t i = 1; i < a.size; ++i) {
|
||||
bst_float cpt = a.data[i].value - rt_eps;
|
||||
if (i == 1 || cpt > this->wspace.cut.back()) {
|
||||
this->wspace.cut.push_back(cpt);
|
||||
}
|
||||
}
|
||||
// push a value that is greater than anything
|
||||
if (a.size != 0) {
|
||||
bst_float cpt = a.data[a.size - 1].value;
|
||||
// this must be bigger than last value in a scale
|
||||
bst_float last = cpt + fabs(cpt) + rt_eps;
|
||||
this->wspace.cut.push_back(last);
|
||||
}
|
||||
this->wspace.rptr.push_back(this->wspace.cut.size());
|
||||
}
|
||||
// reserve last value for global statistics
|
||||
this->wspace.cut.push_back(0.0f);
|
||||
this->wspace.rptr.push_back(this->wspace.cut.size());
|
||||
}
|
||||
utils::Assert(this->wspace.rptr.size() ==
|
||||
(tree.param.num_feature + 1) * this->qexpand.size() + 1,
|
||||
"cut space inconsistent");
|
||||
}
|
||||
|
||||
private:
|
||||
// summary array
|
||||
std::vector<WXQSketch::SummaryContainer> summary_array;
|
||||
// reducer for summary
|
||||
rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
|
||||
// local temp column data structure
|
||||
std::vector<size_t> col_ptr;
|
||||
// local storage of column data
|
||||
std::vector<SparseBatch::Entry> col_data;
|
||||
std::vector< std::vector<size_t> > thread_col_ptr;
|
||||
// per node, per feature sketch
|
||||
std::vector< utils::WQuantileSketch<bst_float, bst_float> > sketchs;
|
||||
};
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <vector>
|
||||
#include "./param.h"
|
||||
#include "./updater.h"
|
||||
#include "./updater_sync-inl.hpp"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@@ -19,6 +20,7 @@ class TreePruner: public IUpdater {
|
||||
virtual void SetParam(const char *name, const char *val) {
|
||||
using namespace std;
|
||||
param.SetParam(name, val);
|
||||
syncher.SetParam(name, val);
|
||||
if (!strcmp(name, "silent")) silent = atoi(val);
|
||||
}
|
||||
// update the tree, do pruning
|
||||
@@ -33,8 +35,8 @@ class TreePruner: public IUpdater {
|
||||
this->DoPrune(*trees[i]);
|
||||
}
|
||||
param.learning_rate = lr;
|
||||
syncher.Update(gpair, p_fmat, info, trees);
|
||||
}
|
||||
|
||||
private:
|
||||
// try to prune off current leaf
|
||||
inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) {
|
||||
@@ -70,6 +72,8 @@ class TreePruner: public IUpdater {
|
||||
}
|
||||
|
||||
private:
|
||||
// synchronizer
|
||||
TreeSyncher syncher;
|
||||
// shutup
|
||||
int silent;
|
||||
// training parameter
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
*/
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <rabit.h>
|
||||
#include "./param.h"
|
||||
#include "./updater.h"
|
||||
#include "../utils/omp.h"
|
||||
@@ -26,7 +27,7 @@ class TreeRefresher: public IUpdater {
|
||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<RegTree*> &trees) {
|
||||
const std::vector<RegTree*> &trees) {
|
||||
if (trees.size() == 0) return;
|
||||
// number of threads
|
||||
// thread temporal space
|
||||
@@ -39,54 +40,71 @@ class TreeRefresher: public IUpdater {
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
fvec_temp.resize(nthread, RegTree::FVec());
|
||||
stemp.resize(trees.size() * nthread, std::vector<TStats>());
|
||||
stemp.resize(nthread, std::vector<TStats>());
|
||||
#pragma omp parallel
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int num_nodes = 0;
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
std::vector<TStats> &vec = stemp[tid * trees.size() + i];
|
||||
vec.resize(trees[i]->param.num_nodes, TStats(param));
|
||||
std::fill(vec.begin(), vec.end(), TStats(param));
|
||||
num_nodes += trees[i]->param.num_nodes;
|
||||
}
|
||||
stemp[tid].resize(num_nodes, TStats(param));
|
||||
std::fill(stemp[tid].begin(), stemp[tid].end(), TStats(param));
|
||||
fvec_temp[tid].Init(trees[0]->param.num_feature);
|
||||
}
|
||||
// start accumulating statistics
|
||||
utils::IIterator<RowBatch> *iter = p_fmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const RowBatch &batch = iter->Value();
|
||||
utils::Check(batch.size < std::numeric_limits<unsigned>::max(),
|
||||
"too large batch size ");
|
||||
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nbatch; ++i) {
|
||||
RowBatch::Inst inst = batch[i];
|
||||
const int tid = omp_get_thread_num();
|
||||
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||
RegTree::FVec &feats = fvec_temp[tid];
|
||||
feats.Fill(inst);
|
||||
for (size_t j = 0; j < trees.size(); ++j) {
|
||||
AddStats(*trees[j], feats, gpair, info, ridx,
|
||||
&stemp[tid * trees.size() + j]);
|
||||
// if it is C++11, use lazy evaluation for Allreduce,
|
||||
// to gain speedup in recovery
|
||||
#if __cplusplus >= 201103L
|
||||
auto lazy_get_stats = [&]()
|
||||
#endif
|
||||
{
|
||||
// start accumulating statistics
|
||||
utils::IIterator<RowBatch> *iter = p_fmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const RowBatch &batch = iter->Value();
|
||||
utils::Check(batch.size < std::numeric_limits<unsigned>::max(),
|
||||
"too large batch size ");
|
||||
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nbatch; ++i) {
|
||||
RowBatch::Inst inst = batch[i];
|
||||
const int tid = omp_get_thread_num();
|
||||
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||
RegTree::FVec &feats = fvec_temp[tid];
|
||||
feats.Fill(inst);
|
||||
int offset = 0;
|
||||
for (size_t j = 0; j < trees.size(); ++j) {
|
||||
AddStats(*trees[j], feats, gpair, info, ridx,
|
||||
BeginPtr(stemp[tid]) + offset);
|
||||
offset += trees[j]->param.num_nodes;
|
||||
}
|
||||
feats.Drop(inst);
|
||||
}
|
||||
feats.Drop(inst);
|
||||
}
|
||||
}
|
||||
// start update the trees using the statistics
|
||||
// aggregate the statistics
|
||||
int num_nodes = static_cast<int>(stemp[0].size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (int nid = 0; nid < num_nodes; ++nid) {
|
||||
for (int tid = 1; tid < nthread; ++tid) {
|
||||
stemp[0][nid].Add(stemp[tid][nid]);
|
||||
}
|
||||
}
|
||||
};
|
||||
#if __cplusplus >= 201103L
|
||||
reducer.Allreduce(BeginPtr(stemp[0]), stemp[0].size(), lazy_get_stats);
|
||||
#else
|
||||
reducer.Allreduce(BeginPtr(stemp[0]), stemp[0].size());
|
||||
#endif
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param.learning_rate;
|
||||
param.learning_rate = lr / trees.size();
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
// aggregate
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (int nid = 0; nid < trees[i]->param.num_nodes; ++nid) {
|
||||
for (int tid = 1; tid < nthread; ++tid) {
|
||||
stemp[i][nid].Add(stemp[tid * trees.size() + i][nid]);
|
||||
}
|
||||
}
|
||||
int offset = 0;
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) {
|
||||
this->Refresh(stemp[i], rid, trees[i]);
|
||||
this->Refresh(BeginPtr(stemp[0]) + offset, rid, trees[i]);
|
||||
}
|
||||
offset += trees[i]->param.num_nodes;
|
||||
}
|
||||
// set learning rate back
|
||||
param.learning_rate = lr;
|
||||
@@ -98,8 +116,7 @@ class TreeRefresher: public IUpdater {
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
const BoosterInfo &info,
|
||||
const bst_uint ridx,
|
||||
std::vector<TStats> *p_gstats) {
|
||||
std::vector<TStats> &gstats = *p_gstats;
|
||||
TStats *gstats) {
|
||||
// start from groups that belongs to current data
|
||||
int pid = static_cast<int>(info.GetRoot(ridx));
|
||||
gstats[pid].Add(gpair, info, ridx);
|
||||
@@ -110,7 +127,7 @@ class TreeRefresher: public IUpdater {
|
||||
gstats[pid].Add(gpair, info, ridx);
|
||||
}
|
||||
}
|
||||
inline void Refresh(const std::vector<TStats> &gstats,
|
||||
inline void Refresh(const TStats *gstats,
|
||||
int nid, RegTree *p_tree) {
|
||||
RegTree &tree = *p_tree;
|
||||
tree.stat(nid).base_weight = static_cast<float>(gstats[nid].CalcWeight(param));
|
||||
@@ -129,6 +146,8 @@ class TreeRefresher: public IUpdater {
|
||||
}
|
||||
// training parameter
|
||||
TrainParam param;
|
||||
// reducer
|
||||
rabit::Reducer<TStats> reducer;
|
||||
};
|
||||
|
||||
} // namespace tree
|
||||
|
||||
393
src/tree/updater_skmaker-inl.hpp
Normal file
393
src/tree/updater_skmaker-inl.hpp
Normal file
@@ -0,0 +1,393 @@
|
||||
#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
|
||||
#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
|
||||
/*!
|
||||
* \file updater_skmaker-inl.hpp
|
||||
* \brief use approximation sketch to construct a tree,
|
||||
a refresh is needed to make the statistics exactly correct
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <rabit.h>
|
||||
#include "../utils/quantile.h"
|
||||
#include "./updater_basemaker-inl.hpp"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
class SketchMaker: public BaseMaker {
|
||||
public:
|
||||
virtual ~SketchMaker(void) {}
|
||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<RegTree*> &trees) {
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param.learning_rate;
|
||||
param.learning_rate = lr / trees.size();
|
||||
// build tree
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
this->Update(gpair, p_fmat, info, trees[i]);
|
||||
}
|
||||
param.learning_rate = lr;
|
||||
}
|
||||
|
||||
protected:
|
||||
inline void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
RegTree *p_tree) {
|
||||
this->InitData(gpair, *p_fmat, info.root_index, *p_tree);
|
||||
for (int depth = 0; depth < param.max_depth; ++depth) {
|
||||
this->GetNodeStats(gpair, *p_fmat, *p_tree, info,
|
||||
&thread_stats, &node_stats);
|
||||
this->BuildSketch(gpair, p_fmat, info, *p_tree);
|
||||
this->SyncNodeStats();
|
||||
this->FindSplit(depth, gpair, p_fmat, info, p_tree);
|
||||
this->ResetPositionCol(qexpand, p_fmat, *p_tree);
|
||||
this->UpdateQueueExpand(*p_tree);
|
||||
// if nothing left to be expand, break
|
||||
if (qexpand.size() == 0) break;
|
||||
}
|
||||
if (qexpand.size() != 0) {
|
||||
this->GetNodeStats(gpair, *p_fmat, *p_tree, info,
|
||||
&thread_stats, &node_stats);
|
||||
this->SyncNodeStats();
|
||||
}
|
||||
// set all statistics correctly
|
||||
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
|
||||
this->SetStats(nid, node_stats[nid], p_tree);
|
||||
if (!(*p_tree)[nid].is_leaf()) {
|
||||
p_tree->stat(nid).loss_chg =
|
||||
node_stats[(*p_tree)[nid].cleft()].CalcGain(param) +
|
||||
node_stats[(*p_tree)[nid].cright()].CalcGain(param) -
|
||||
node_stats[nid].CalcGain(param);
|
||||
}
|
||||
}
|
||||
// set left leaves
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
|
||||
}
|
||||
}
|
||||
// define the sketch we want to use
|
||||
typedef utils::WXQuantileSketch<bst_float, bst_float> WXQSketch;
|
||||
|
||||
private:
|
||||
// statistics needed in the gradient calculation
|
||||
struct SKStats {
|
||||
/*! \brief sum of all positive gradient */
|
||||
double pos_grad;
|
||||
/*! \brief sum of all negative gradient */
|
||||
double neg_grad;
|
||||
/*! \brief sum of hessian statistics */
|
||||
double sum_hess;
|
||||
explicit SKStats(void) {}
|
||||
// constructor
|
||||
explicit SKStats(const TrainParam ¶m) {
|
||||
this->Clear();
|
||||
}
|
||||
/*! \brief clear the statistics */
|
||||
inline void Clear(void) {
|
||||
neg_grad = pos_grad = sum_hess = 0.0f;
|
||||
}
|
||||
// accumulate statistics
|
||||
inline void Add(const std::vector<bst_gpair> &gpair,
|
||||
const BoosterInfo &info,
|
||||
bst_uint ridx) {
|
||||
const bst_gpair &b = gpair[ridx];
|
||||
if (b.grad >= 0.0f) {
|
||||
pos_grad += b.grad;
|
||||
} else {
|
||||
neg_grad -= b.grad;
|
||||
}
|
||||
sum_hess += b.hess;
|
||||
}
|
||||
/*! \brief calculate gain of the solution */
|
||||
inline double CalcGain(const TrainParam ¶m) const {
|
||||
return param.CalcGain(pos_grad - neg_grad, sum_hess);
|
||||
}
|
||||
/*! \brief set current value to a - b */
|
||||
inline void SetSubstract(const SKStats &a, const SKStats &b) {
|
||||
pos_grad = a.pos_grad - b.pos_grad;
|
||||
neg_grad = a.neg_grad - b.neg_grad;
|
||||
sum_hess = a.sum_hess - b.sum_hess;
|
||||
}
|
||||
// calculate leaf weight
|
||||
inline double CalcWeight(const TrainParam ¶m) const {
|
||||
return param.CalcWeight(pos_grad - neg_grad, sum_hess);
|
||||
}
|
||||
/*! \brief add statistics to the data */
|
||||
inline void Add(const SKStats &b) {
|
||||
pos_grad += b.pos_grad;
|
||||
neg_grad += b.neg_grad;
|
||||
sum_hess += b.sum_hess;
|
||||
}
|
||||
/*! \brief same as add, reduce is used in All Reduce */
|
||||
inline void Reduce(const SKStats &b) {
|
||||
this->Add(b);
|
||||
}
|
||||
/*! \brief set leaf vector value based on statistics */
|
||||
inline void SetLeafVec(const TrainParam ¶m, bst_float *vec) const {
|
||||
}
|
||||
};
|
||||
inline void BuildSketch(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const RegTree &tree) {
|
||||
sketchs.resize(this->qexpand.size() * tree.param.num_feature * 3);
|
||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||
sketchs[i].Init(info.num_row, this->param.sketch_eps);
|
||||
}
|
||||
thread_sketch.resize(this->get_nthread());
|
||||
// number of rows in
|
||||
const size_t nrows = p_fmat->buffered_rowset().size();
|
||||
// start accumulating statistics
|
||||
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
// start enumeration
|
||||
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
this->UpdateSketchCol(gpair, batch[i], tree,
|
||||
node_stats,
|
||||
batch.col_index[i],
|
||||
batch[i].length == nrows,
|
||||
&thread_sketch[omp_get_thread_num()]);
|
||||
}
|
||||
}
|
||||
// setup maximum size
|
||||
unsigned max_size = param.max_sketch_size();
|
||||
// synchronize sketch
|
||||
summary_array.Init(sketchs.size(), max_size);
|
||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||
utils::WXQuantileSketch<bst_float, bst_float>::SummaryContainer out;
|
||||
sketchs[i].GetSummary(&out);
|
||||
summary_array.Set(i, out);
|
||||
}
|
||||
size_t nbytes = summary_array.MemSize();;
|
||||
sketch_reducer.Allreduce(&summary_array, nbytes);
|
||||
}
|
||||
// update sketch information in column fid
|
||||
inline void UpdateSketchCol(const std::vector<bst_gpair> &gpair,
|
||||
const ColBatch::Inst &c,
|
||||
const RegTree &tree,
|
||||
const std::vector<SKStats> &nstats,
|
||||
bst_uint fid,
|
||||
bool col_full,
|
||||
std::vector<SketchEntry> *p_temp) {
|
||||
if (c.length == 0) return;
|
||||
// initialize sbuilder for use
|
||||
std::vector<SketchEntry> &sbuilder = *p_temp;
|
||||
sbuilder.resize(tree.param.num_nodes * 3);
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const unsigned nid = this->qexpand[i];
|
||||
const unsigned wid = this->node2workindex[nid];
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
sbuilder[3 * nid + k].sum_total = 0.0f;
|
||||
sbuilder[3 * nid + k].sketch = &sketchs[(wid * tree.param.num_feature + fid) * 3 + k];
|
||||
}
|
||||
}
|
||||
if (!col_full) {
|
||||
for (bst_uint j = 0; j < c.length; ++j) {
|
||||
const bst_uint ridx = c[j].index;
|
||||
const int nid = this->position[ridx];
|
||||
if (nid >= 0) {
|
||||
const bst_gpair &e = gpair[ridx];
|
||||
if (e.grad >= 0.0f) {
|
||||
sbuilder[3 * nid + 0].sum_total += e.grad;
|
||||
} else {
|
||||
sbuilder[3 * nid + 1].sum_total -= e.grad;
|
||||
}
|
||||
sbuilder[3 * nid + 2].sum_total += e.hess;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const unsigned nid = this->qexpand[i];
|
||||
sbuilder[3 * nid + 0].sum_total = nstats[nid].pos_grad;
|
||||
sbuilder[3 * nid + 1].sum_total = nstats[nid].neg_grad;
|
||||
sbuilder[3 * nid + 2].sum_total = nstats[nid].sum_hess;
|
||||
}
|
||||
}
|
||||
// if only one value, no need to do second pass
|
||||
if (c[0].fvalue == c[c.length-1].fvalue) {
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const int nid = this->qexpand[i];
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, sbuilder[3 * nid + k].sum_total);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
// two pass scan
|
||||
unsigned max_size = param.max_sketch_size();
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const int nid = this->qexpand[i];
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
sbuilder[3 * nid + k].Init(max_size);
|
||||
}
|
||||
}
|
||||
// second pass, build the sketch
|
||||
for (bst_uint j = 0; j < c.length; ++j) {
|
||||
const bst_uint ridx = c[j].index;
|
||||
const int nid = this->position[ridx];
|
||||
if (nid >= 0) {
|
||||
const bst_gpair &e = gpair[ridx];
|
||||
if (e.grad >= 0.0f) {
|
||||
sbuilder[3 * nid + 0].Push(c[j].fvalue, e.grad, max_size);
|
||||
} else {
|
||||
sbuilder[3 * nid + 1].Push(c[j].fvalue, -e.grad, max_size);
|
||||
}
|
||||
sbuilder[3 * nid + 2].Push(c[j].fvalue, e.hess, max_size);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < this->qexpand.size(); ++i) {
|
||||
const int nid = this->qexpand[i];
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
sbuilder[3 * nid + k].Finalize(max_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
inline void SyncNodeStats(void) {
|
||||
utils::Assert(qexpand.size() != 0, "qexpand must not be empty");
|
||||
std::vector<SKStats> tmp(qexpand.size());
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
tmp[i] = node_stats[qexpand[i]];
|
||||
}
|
||||
stats_reducer.Allreduce(BeginPtr(tmp), tmp.size());
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
node_stats[qexpand[i]] = tmp[i];
|
||||
}
|
||||
}
|
||||
inline void FindSplit(int depth,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
RegTree *p_tree) {
|
||||
const bst_uint num_feature = p_tree->param.num_feature;
|
||||
// get the best split condition for each node
|
||||
std::vector<SplitEntry> sol(qexpand.size());
|
||||
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
|
||||
const int nid = qexpand[wid];
|
||||
utils::Assert(node2workindex[nid] == static_cast<int>(wid),
|
||||
"node2workindex inconsistent");
|
||||
SplitEntry &best = sol[wid];
|
||||
for (bst_uint fid = 0; fid < num_feature; ++ fid) {
|
||||
unsigned base = (wid * p_tree->param.num_feature + fid) * 3;
|
||||
EnumerateSplit(summary_array[base + 0],
|
||||
summary_array[base + 1],
|
||||
summary_array[base + 2],
|
||||
node_stats[nid], fid, &best);
|
||||
}
|
||||
}
|
||||
// get the best result, we can synchronize the solution
|
||||
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
|
||||
const int nid = qexpand[wid];
|
||||
const SplitEntry &best = sol[wid];
|
||||
// set up the values
|
||||
p_tree->stat(nid).loss_chg = best.loss_chg;
|
||||
this->SetStats(nid, node_stats[nid], p_tree);
|
||||
// now we know the solution in snode[nid], set split
|
||||
if (best.loss_chg > rt_eps) {
|
||||
p_tree->AddChilds(nid);
|
||||
(*p_tree)[nid].set_split(best.split_index(),
|
||||
best.split_value, best.default_left());
|
||||
// mark right child as 0, to indicate fresh leaf
|
||||
(*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
|
||||
(*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
|
||||
} else {
|
||||
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
|
||||
}
|
||||
}
|
||||
}
|
||||
// set statistics on ptree
|
||||
inline void SetStats(int nid, const SKStats &node_sum, RegTree *p_tree) {
|
||||
p_tree->stat(nid).base_weight = node_sum.CalcWeight(param);
|
||||
p_tree->stat(nid).sum_hess = static_cast<float>(node_sum.sum_hess);
|
||||
node_sum.SetLeafVec(param, p_tree->leafvec(nid));
|
||||
}
|
||||
inline void EnumerateSplit(const WXQSketch::Summary &pos_grad,
|
||||
const WXQSketch::Summary &neg_grad,
|
||||
const WXQSketch::Summary &sum_hess,
|
||||
const SKStats &node_sum,
|
||||
bst_uint fid,
|
||||
SplitEntry *best) {
|
||||
if (sum_hess.size == 0) return;
|
||||
double root_gain = node_sum.CalcGain(param);
|
||||
std::vector<bst_float> fsplits;
|
||||
for (size_t i = 0; i < pos_grad.size; ++i) {
|
||||
fsplits.push_back(pos_grad.data[i].value);
|
||||
}
|
||||
for (size_t i = 0; i < neg_grad.size; ++i) {
|
||||
fsplits.push_back(neg_grad.data[i].value);
|
||||
}
|
||||
for (size_t i = 0; i < sum_hess.size; ++i) {
|
||||
fsplits.push_back(sum_hess.data[i].value);
|
||||
}
|
||||
std::sort(fsplits.begin(), fsplits.end());
|
||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||
// sum feature
|
||||
SKStats feat_sum;
|
||||
feat_sum.pos_grad = pos_grad.data[pos_grad.size - 1].rmax;
|
||||
feat_sum.neg_grad = neg_grad.data[neg_grad.size - 1].rmax;
|
||||
feat_sum.sum_hess = sum_hess.data[sum_hess.size - 1].rmax;
|
||||
size_t ipos = 0, ineg = 0, ihess = 0;
|
||||
for (size_t i = 1; i < fsplits.size(); ++i) {
|
||||
WXQSketch::Entry pos = pos_grad.Query(fsplits[i], ipos);
|
||||
WXQSketch::Entry neg = neg_grad.Query(fsplits[i], ineg);
|
||||
WXQSketch::Entry hess = sum_hess.Query(fsplits[i], ihess);
|
||||
SKStats s, c;
|
||||
s.pos_grad = 0.5f * (pos.rmin + pos.rmax - pos.wmin);
|
||||
s.neg_grad = 0.5f * (neg.rmin + neg.rmax - neg.wmin);
|
||||
s.sum_hess = 0.5f * (hess.rmin + hess.rmax - hess.wmin);
|
||||
c.SetSubstract(node_sum, s);
|
||||
// forward
|
||||
if (s.sum_hess >= param.min_child_weight &&
|
||||
c.sum_hess >= param.min_child_weight) {
|
||||
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
|
||||
best->Update(loss_chg, fid, fsplits[i], false);
|
||||
}
|
||||
// backward
|
||||
c.SetSubstract(feat_sum, s);
|
||||
s.SetSubstract(node_sum, c);
|
||||
if (s.sum_hess >= param.min_child_weight &&
|
||||
c.sum_hess >= param.min_child_weight) {
|
||||
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
|
||||
best->Update(loss_chg, fid, fsplits[i], true);
|
||||
}
|
||||
}
|
||||
{// all including
|
||||
SKStats s = feat_sum, c;
|
||||
c.SetSubstract(node_sum, s);
|
||||
if (s.sum_hess >= param.min_child_weight &&
|
||||
c.sum_hess >= param.min_child_weight) {
|
||||
bst_float cpt = fsplits.back();
|
||||
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
|
||||
best->Update(loss_chg, fid, cpt + fabsf(cpt) + 1.0f, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// thread temp data
|
||||
// used to hold temporal sketch
|
||||
std::vector< std::vector<SketchEntry> > thread_sketch;
|
||||
// used to hold statistics
|
||||
std::vector< std::vector<SKStats> > thread_stats;
|
||||
// node statistics
|
||||
std::vector<SKStats> node_stats;
|
||||
// summary array
|
||||
WXQSketch::SummaryArray summary_array;
|
||||
// reducer for summary
|
||||
rabit::Reducer<SKStats> stats_reducer;
|
||||
// reducer for summary
|
||||
rabit::SerializeReducer<WXQSketch::SummaryArray> sketch_reducer;
|
||||
// per node, per feature sketch
|
||||
std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
|
||||
};
|
||||
} // tree
|
||||
} // xgboost
|
||||
#endif
|
||||
53
src/tree/updater_sync-inl.hpp
Normal file
53
src/tree/updater_sync-inl.hpp
Normal file
@@ -0,0 +1,53 @@
|
||||
#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
|
||||
#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
|
||||
/*!
|
||||
* \file updater_sync-inl.hpp
|
||||
* \brief synchronize the tree in all distributed nodes
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <rabit.h>
|
||||
#include "./updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
/*!
|
||||
* \brief syncher that synchronize the tree in all distributed nodes
|
||||
* can implement various strategies, so far it is always set to node 0's tree
|
||||
*/
|
||||
class TreeSyncher: public IUpdater {
|
||||
public:
|
||||
virtual ~TreeSyncher(void) {}
|
||||
virtual void SetParam(const char *name, const char *val) {
|
||||
}
|
||||
// update the tree, do pruning
|
||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<RegTree*> &trees) {
|
||||
this->SyncTrees(trees);
|
||||
}
|
||||
|
||||
private:
|
||||
// synchronize the trees in different nodes, take tree from rank 0
|
||||
inline void SyncTrees(const std::vector<RegTree *> &trees) {
|
||||
if (rabit::GetWorldSize() == 1) return;
|
||||
std::string s_model;
|
||||
utils::MemoryBufferStream fs(&s_model);
|
||||
int rank = rabit::GetRank();
|
||||
if (rank == 0) {
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
trees[i]->SaveModel(fs);
|
||||
}
|
||||
}
|
||||
fs.Seek(0);
|
||||
rabit::Broadcast(&s_model, 0);
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
trees[i]->LoadModel(fs);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
|
||||
Reference in New Issue
Block a user