adapt svdfeature tree
This commit is contained in:
parent
bf36374678
commit
5d052b9e14
556
booster/tree/xgboost_svdf_tree.hpp
Normal file
556
booster/tree/xgboost_svdf_tree.hpp
Normal file
@ -0,0 +1,556 @@
|
|||||||
|
#ifndef _XGBOOST_APEX_TREE_HPP_
|
||||||
|
#define _XGBOOST_APEX_TREE_HPP_
|
||||||
|
/*!
|
||||||
|
* \file xgboost_svdf_tree.hpp
|
||||||
|
* \brief implementation of regression tree, with layerwise support
|
||||||
|
* this file is adapted from GBRT implementation in SVDFeature project
|
||||||
|
* \author Tianqi Chen: tqchen@apex.sjtu.edu.cn, tianqi.tchen@gmail.com
|
||||||
|
*/
|
||||||
|
#include <algorithm>
|
||||||
|
#include "xgboost_tree_model.h"
|
||||||
|
#include "../../utils/xgboost_random.h"
|
||||||
|
#include "../../utils/xgboost_matrix_csr.h"
|
||||||
|
|
||||||
|
namespace xgboost{
|
||||||
|
namespace booster{
|
||||||
|
const bool rt_debug = false;
|
||||||
|
// whether to check bugs
|
||||||
|
const bool check_bug = false;
|
||||||
|
|
||||||
|
const float rt_eps = 1e-5f;
|
||||||
|
const float rt_2eps = rt_eps * 2.0f;
|
||||||
|
|
||||||
|
inline double sqr( double a ){
|
||||||
|
return a * a;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void assert_sorted( unsigned *idset, int len ){
|
||||||
|
if( !rt_debug || !check_bug ) return;
|
||||||
|
for( int i = 1; i < len; i ++ ){
|
||||||
|
utils::Assert( idset[i-1] < idset[i], "idset not sorted" );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace booster{
|
||||||
|
// node stat used in rtree
|
||||||
|
struct RTreeNodeStat{
|
||||||
|
// loss chg caused by current split
|
||||||
|
float loss_chg;
|
||||||
|
// weight of current node
|
||||||
|
float base_weight;
|
||||||
|
// number of child that is leaf node known up to now
|
||||||
|
int leaf_child_cnt;
|
||||||
|
};
|
||||||
|
|
||||||
|
// structure of Regression Tree
|
||||||
|
class RTree: public TreeModel<float,RTreeNodeStat>{
|
||||||
|
};
|
||||||
|
|
||||||
|
// selecter of rtree to find the suitable candidate
|
||||||
|
class RTSelecter{
|
||||||
|
public:
|
||||||
|
struct Entry{
|
||||||
|
float loss_chg;
|
||||||
|
size_t start;
|
||||||
|
int len;
|
||||||
|
unsigned sindex;
|
||||||
|
float split_value;
|
||||||
|
Entry(){}
|
||||||
|
Entry( float loss_chg, size_t start, int len, unsigned split_index, float split_value, bool default_left ){
|
||||||
|
this->loss_chg = loss_chg;
|
||||||
|
this->start = start;
|
||||||
|
this->len = len;
|
||||||
|
if( default_left ) split_index |= (1U << 31);
|
||||||
|
this->sindex = split_index;
|
||||||
|
this->split_value = split_value;
|
||||||
|
}
|
||||||
|
inline unsigned split_index( void ) const{
|
||||||
|
return sindex & ( (1U<<31) - 1U );
|
||||||
|
}
|
||||||
|
inline bool default_left( void ) const{
|
||||||
|
return (sindex >> 31) != 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
private:
|
||||||
|
Entry best_entry;
|
||||||
|
const TreeParamTrain ¶m;
|
||||||
|
public:
|
||||||
|
RTSelecter( const TreeParamTrain &p ):param( p ){
|
||||||
|
memset( &best_entry, 0, sizeof(best_entry) );
|
||||||
|
best_entry.loss_chg = 0.0f;
|
||||||
|
}
|
||||||
|
inline void push_back( const Entry &e ){
|
||||||
|
if( e.loss_chg > best_entry.loss_chg ) best_entry = e;
|
||||||
|
}
|
||||||
|
inline const Entry & select( void ){
|
||||||
|
return best_entry;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// updater of rtree, allows the parameters to be stored inside, key solver
|
||||||
|
class RTreeUpdater{
|
||||||
|
protected:
|
||||||
|
// training task, element of single task
|
||||||
|
struct Task{
|
||||||
|
// node id in tree
|
||||||
|
int nid;
|
||||||
|
// idset pointer, instance id in [idset,idset+len)
|
||||||
|
unsigned *idset;
|
||||||
|
// length of idset
|
||||||
|
unsigned len;
|
||||||
|
// base_weight of parent
|
||||||
|
float parent_base_weight;
|
||||||
|
Task(){}
|
||||||
|
Task( int nid, unsigned *idset, unsigned len, float pweight = 0.0f ){
|
||||||
|
this->nid = nid;
|
||||||
|
this->idset = idset;
|
||||||
|
this->len = len;
|
||||||
|
this->parent_base_weight = pweight;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// sparse column entry
|
||||||
|
struct SCEntry{
|
||||||
|
// feature value
|
||||||
|
float fvalue;
|
||||||
|
// row index in grad
|
||||||
|
unsigned rindex;
|
||||||
|
SCEntry(){}
|
||||||
|
SCEntry( float fvalue, unsigned rindex ){
|
||||||
|
this->fvalue = fvalue; this->rindex = rindex;
|
||||||
|
}
|
||||||
|
inline bool operator<( const SCEntry &p ) const{
|
||||||
|
return fvalue < p.fvalue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
private:
|
||||||
|
// training parameter
|
||||||
|
const TreeParamTrain ¶m;
|
||||||
|
// parameters, reference
|
||||||
|
RTree &tree;
|
||||||
|
std::vector<float> &grad;
|
||||||
|
std::vector<float> &hess;
|
||||||
|
const FMatrixS::Image &smat;
|
||||||
|
const std::vector<unsigned> &group_id;
|
||||||
|
private:
|
||||||
|
// maximum depth up to now
|
||||||
|
int max_depth;
|
||||||
|
// number of nodes being pruned
|
||||||
|
int num_pruned;
|
||||||
|
// stack to store current task
|
||||||
|
std::vector<Task> task_stack;
|
||||||
|
// temporal space for index set
|
||||||
|
std::vector<unsigned> idset;
|
||||||
|
private:
|
||||||
|
// task management: NOTE DFS here
|
||||||
|
inline void add_task( Task tsk ){
|
||||||
|
task_stack.push_back( tsk );
|
||||||
|
}
|
||||||
|
inline bool next_task( Task &tsk ){
|
||||||
|
if( task_stack.size() == 0 ) return false;
|
||||||
|
tsk = task_stack.back();
|
||||||
|
task_stack.pop_back();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
// try to prune off current leaf, return true if successful
|
||||||
|
inline void try_prune_leaf( int nid, int depth ){
|
||||||
|
if( tree[ nid ].is_root() ) return;
|
||||||
|
int pid = tree[ nid ].parent();
|
||||||
|
RTree::NodeStat &s = tree.stat( pid );
|
||||||
|
s.leaf_child_cnt ++;
|
||||||
|
|
||||||
|
if( s.leaf_child_cnt >= 2 && param.need_prune( s.loss_chg, depth - 1 ) ){
|
||||||
|
// need to be pruned
|
||||||
|
tree.ChangeToLeaf( pid, param.learning_rate * s.base_weight );
|
||||||
|
// add statistics to number of nodes pruned
|
||||||
|
num_pruned += 2;
|
||||||
|
// tail recursion
|
||||||
|
this->try_prune_leaf( pid, depth - 1 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// make leaf for current node :)
|
||||||
|
inline void make_leaf( Task tsk, double sum_grad, double sum_hess, bool compute ){
|
||||||
|
for( unsigned i = 0; i < tsk.len; i ++ ){
|
||||||
|
const unsigned ridx = tsk.idset[i];
|
||||||
|
if( compute ){
|
||||||
|
sum_grad += grad[ ridx ];
|
||||||
|
sum_hess += hess[ ridx ];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tree[ tsk.nid ].set_leaf( param.learning_rate * param.CalcWeight( sum_grad, sum_hess, tsk.parent_base_weight ) );
|
||||||
|
this->try_prune_leaf( tsk.nid, tree.GetDepth( tsk.nid ) );
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
// make split for current task, re-arrange positions in idset
|
||||||
|
inline void make_split( Task tsk, const SCEntry *entry, int num, float loss_chg, double base_weight ){
|
||||||
|
// before split, first prepare statistics
|
||||||
|
RTree::NodeStat &s = tree.stat( tsk.nid );
|
||||||
|
s.loss_chg = loss_chg;
|
||||||
|
s.leaf_child_cnt = 0;
|
||||||
|
s.base_weight = static_cast<float>( base_weight );
|
||||||
|
|
||||||
|
// add childs to current node
|
||||||
|
tree.AddChilds( tsk.nid );
|
||||||
|
// assert that idset is sorted
|
||||||
|
assert_sorted( tsk.idset, tsk.len );
|
||||||
|
// use merge sort style to get the solution
|
||||||
|
std::vector<unsigned> qset;
|
||||||
|
for( int i = 0; i < num; i ++ ){
|
||||||
|
qset.push_back( entry[i].rindex );
|
||||||
|
}
|
||||||
|
std::sort( qset.begin(), qset.end() );
|
||||||
|
// do merge sort style, make the other set, remove elements in qset
|
||||||
|
for( unsigned i = 0, top = 0; i < tsk.len; i ++ ){
|
||||||
|
if( top < qset.size() ){
|
||||||
|
if( tsk.idset[ i ] != qset[ top ] ){
|
||||||
|
tsk.idset[ i - top ] = tsk.idset[ i ];
|
||||||
|
}else{
|
||||||
|
top ++;
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
tsk.idset[ i - qset.size() ] = tsk.idset[ i ];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// get two parts
|
||||||
|
RTree::Node &n = tree[ tsk.nid ];
|
||||||
|
Task def_part( n.default_left() ? n.cleft() : n.cright(), tsk.idset, tsk.len - qset.size(), s.base_weight );
|
||||||
|
Task spl_part( n.default_left() ? n.cright(): n.cleft() , tsk.idset + def_part.len, qset.size(), s.base_weight );
|
||||||
|
// fill back split part
|
||||||
|
for( unsigned i = 0; i < spl_part.len; i ++ ){
|
||||||
|
spl_part.idset[ i ] = qset[ i ];
|
||||||
|
}
|
||||||
|
// add tasks to the queue
|
||||||
|
this->add_task( def_part );
|
||||||
|
this->add_task( spl_part );
|
||||||
|
}
|
||||||
|
|
||||||
|
// enumerate split point of the tree
|
||||||
|
inline void enumerate_split( RTSelecter &sglobal, int tlen,
|
||||||
|
double rsum_grad, double rsum_hess, double root_cost,
|
||||||
|
const SCEntry *entry, size_t start, size_t end,
|
||||||
|
int findex, float parent_base_weight ){
|
||||||
|
// local selecter
|
||||||
|
RTSelecter slocal( param );
|
||||||
|
|
||||||
|
if( param.default_direction != 1 ){
|
||||||
|
// forward process, default right
|
||||||
|
double csum_grad = 0.0, csum_hess = 0.0;
|
||||||
|
for( size_t j = start; j < end; j ++ ){
|
||||||
|
const unsigned ridx = entry[ j ].rindex;
|
||||||
|
csum_grad += grad[ ridx ];
|
||||||
|
csum_hess += hess[ ridx ];
|
||||||
|
// check for split
|
||||||
|
if( j == end - 1 || entry[j].fvalue + rt_2eps < entry[ j + 1 ].fvalue ){
|
||||||
|
if( csum_hess < param.min_child_weight ) continue;
|
||||||
|
const double dsum_hess = rsum_hess - csum_hess;
|
||||||
|
if( dsum_hess < param.min_child_weight ) break;
|
||||||
|
// change of loss
|
||||||
|
double loss_chg =
|
||||||
|
param.CalcCost( csum_grad, csum_hess, parent_base_weight ) +
|
||||||
|
param.CalcCost( rsum_grad - csum_grad, dsum_hess, parent_base_weight ) - root_cost;
|
||||||
|
|
||||||
|
const int clen = static_cast<int>( j + 1 - start );
|
||||||
|
// add candidate to selecter
|
||||||
|
slocal.push_back( RTSelecter::Entry( loss_chg, start, clen, findex,
|
||||||
|
j == end - 1 ? entry[j].fvalue + rt_eps : 0.5 * (entry[j].fvalue+entry[j+1].fvalue),
|
||||||
|
false ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( param.default_direction != 2 ){
|
||||||
|
// backward process, default left
|
||||||
|
double csum_grad = 0.0, csum_hess = 0.0;
|
||||||
|
for( size_t j = end; j > start; j -- ){
|
||||||
|
const unsigned ridx = entry[ j - 1 ].rindex;
|
||||||
|
csum_grad += grad[ ridx ];
|
||||||
|
csum_hess += hess[ ridx ];
|
||||||
|
// check for split
|
||||||
|
if( j == start + 1 || entry[ j - 2 ].fvalue + rt_2eps < entry[ j - 1 ].fvalue ){
|
||||||
|
if( csum_hess < param.min_child_weight ) continue;
|
||||||
|
const double dsum_hess = rsum_hess - csum_hess;
|
||||||
|
if( dsum_hess < param.min_child_weight ) break;
|
||||||
|
double loss_chg = param.CalcCost( csum_grad, csum_hess, parent_base_weight ) +
|
||||||
|
param.CalcCost( rsum_grad - csum_grad, dsum_hess, parent_base_weight ) - root_cost;
|
||||||
|
const int clen = static_cast<int>( end - j + 1 );
|
||||||
|
// add candidate to selecter
|
||||||
|
slocal.push_back( RTSelecter::Entry( loss_chg, j - 1, clen, findex,
|
||||||
|
j == start + 1 ? entry[j-1].fvalue - rt_eps : 0.5 * (entry[j-2].fvalue + entry[j-1].fvalue),
|
||||||
|
true ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sglobal.push_back( slocal.select() );
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// temporal storage for expand column major
|
||||||
|
std::vector<size_t> tmp_rptr;
|
||||||
|
// find split for current task, another implementation of expand in column major manner
|
||||||
|
// should be more memory frugal, avoid global sorting across feature
|
||||||
|
inline void expand( Task tsk ){
|
||||||
|
// assert that idset is sorted
|
||||||
|
// if reach maximum depth, make leaf from current node
|
||||||
|
int depth = tree.GetDepth( tsk.nid );
|
||||||
|
// update statistiss
|
||||||
|
if( depth > max_depth ) max_depth = depth;
|
||||||
|
// if bigger than max depth
|
||||||
|
if( depth >= param.max_depth ){
|
||||||
|
this->make_leaf( tsk, 0.0, 0.0, true ); return;
|
||||||
|
}
|
||||||
|
// convert to column major CSR format
|
||||||
|
const int nrows = tree.param.num_feature;
|
||||||
|
if( tmp_rptr.size() == 0 ){
|
||||||
|
// initialize tmp storage in first usage
|
||||||
|
tmp_rptr.resize( nrows + 1 );
|
||||||
|
std::fill( tmp_rptr.begin(), tmp_rptr.end(), 0 );
|
||||||
|
}
|
||||||
|
// records the columns
|
||||||
|
std::vector<SCEntry> entry;
|
||||||
|
// records the active features
|
||||||
|
std::vector<size_t> aclist;
|
||||||
|
utils::SparseCSRMBuilder<SCEntry,true> builder( tmp_rptr, entry, aclist );
|
||||||
|
builder.InitBudget( nrows );
|
||||||
|
// statistics of root
|
||||||
|
double rsum_grad = 0.0, rsum_hess = 0.0;
|
||||||
|
for( unsigned i = 0; i < tsk.len; i ++ ){
|
||||||
|
const unsigned ridx = tsk.idset[i];
|
||||||
|
rsum_grad += grad[ ridx ];
|
||||||
|
rsum_hess += hess[ ridx ];
|
||||||
|
|
||||||
|
FMatrixS::Line sp = smat[ ridx ];
|
||||||
|
for( unsigned j = 0; j < sp.len; j ++ ){
|
||||||
|
builder.AddBudget( sp.findex[j] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if minimum split weight is not meet
|
||||||
|
if( param.cannot_split( rsum_hess, depth ) ){
|
||||||
|
this->make_leaf( tsk, rsum_grad, rsum_hess, false ); builder.Cleanup(); return;
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.InitStorage();
|
||||||
|
for( unsigned i = 0; i < tsk.len; i ++ ){
|
||||||
|
const unsigned ridx = tsk.idset[i];
|
||||||
|
FMatrixS::Line sp = smat[ ridx ];
|
||||||
|
for( unsigned j = 0; j < sp.len; j ++ ){
|
||||||
|
builder.PushElem( sp.findex[j], SCEntry( sp.fvalue[j], ridx ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// --- end of building column major matrix ---
|
||||||
|
// after this point, tmp_rptr and entry is ready to use
|
||||||
|
|
||||||
|
// global selecter
|
||||||
|
RTSelecter sglobal( param );
|
||||||
|
// cost root
|
||||||
|
const double root_cost = param.CalcRootCost( rsum_grad, rsum_hess );
|
||||||
|
// KEY: layerwise, weight of current node if it is leaf
|
||||||
|
const double base_weight = param.CalcWeight( rsum_grad, rsum_hess, tsk.parent_base_weight );
|
||||||
|
// enumerate feature index
|
||||||
|
for( size_t i = 0; i < aclist.size(); i ++ ){
|
||||||
|
int findex = static_cast<int>( aclist[i] );
|
||||||
|
size_t start = tmp_rptr[ findex ];
|
||||||
|
size_t end = tmp_rptr[ findex + 1 ];
|
||||||
|
utils::Assert( start < end, "bug" );
|
||||||
|
// local sort can be faster when the features are sparse
|
||||||
|
std::sort( entry.begin() + start, entry.begin() + end );
|
||||||
|
// local selecter
|
||||||
|
this->enumerate_split( sglobal, tsk.len,
|
||||||
|
rsum_grad, rsum_hess, root_cost,
|
||||||
|
&entry[0], start, end, findex, base_weight );
|
||||||
|
}
|
||||||
|
// Cleanup tmp_rptr for next use
|
||||||
|
builder.Cleanup();
|
||||||
|
// get the best solution
|
||||||
|
const RTSelecter::Entry &e = sglobal.select();
|
||||||
|
// allowed to split
|
||||||
|
if( e.loss_chg > rt_eps ){
|
||||||
|
// add splits
|
||||||
|
tree[ tsk.nid ].set_split( e.split_index(), e.split_value, e.default_left() );
|
||||||
|
// re-arrange idset, push tasks
|
||||||
|
this->make_split( tsk, &entry[ e.start ], e.len, e.loss_chg, base_weight );
|
||||||
|
}else{
|
||||||
|
// make leaf if we didn't meet requirement
|
||||||
|
this->make_leaf( tsk, rsum_grad, rsum_hess, false );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
// initialize the tasks
|
||||||
|
inline void init_tasks( size_t ngrads ){
|
||||||
|
// add group partition if necessary
|
||||||
|
if( group_id.size() == 0 ){
|
||||||
|
if( param.subsample > 1.0f - 1e-6f ){
|
||||||
|
idset.resize( 0 );
|
||||||
|
for( size_t i = 0; i < ngrads; i ++ ){
|
||||||
|
if( hess[i] < 0.0f ) continue;
|
||||||
|
idset.push_back( (unsigned)i );
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
idset.resize( 0 );
|
||||||
|
for( size_t i = 0; i < ngrads; i ++ ){
|
||||||
|
if( random::SampleBinary( param.subsample ) != 0 ){
|
||||||
|
idset.push_back( (unsigned)i );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this->add_task( Task( 0, &idset[0], idset.size() ) ); return;
|
||||||
|
}
|
||||||
|
|
||||||
|
utils::Assert( group_id.size() == ngrads, "number of groups must be exact" );
|
||||||
|
{// new method for grouping, use CSR builder
|
||||||
|
std::vector<size_t> rptr;
|
||||||
|
utils::SparseCSRMBuilder<unsigned> builder( rptr, idset );
|
||||||
|
builder.InitBudget( tree.param.num_roots );
|
||||||
|
for( size_t i = 0; i < group_id.size(); i ++ ){
|
||||||
|
// drop invalid elements
|
||||||
|
if( hess[ i ] < 0.0f ) continue;
|
||||||
|
utils::Assert( group_id[ i ] < (unsigned)tree.param.num_roots,
|
||||||
|
"group id exceed number of roots" );
|
||||||
|
builder.AddBudget( group_id[ i ] );
|
||||||
|
}
|
||||||
|
builder.InitStorage();
|
||||||
|
for( size_t i = 0; i < group_id.size(); i ++ ){
|
||||||
|
// drop invalid elements
|
||||||
|
if( hess[ i ] < 0.0f ) continue;
|
||||||
|
builder.PushElem( group_id[ i ], static_cast<unsigned>(i) );
|
||||||
|
}
|
||||||
|
for( size_t i = 1; i < rptr.size(); i ++ ){
|
||||||
|
const size_t start = rptr[ i - 1 ], end = rptr[ i ];
|
||||||
|
if( start < end ){
|
||||||
|
this->add_task( Task( i - 1, &idset[ start ], end - start ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
RTreeUpdater( const TreeParamTrain &pparam,
|
||||||
|
RTree &ptree,
|
||||||
|
std::vector<float> &pgrad,
|
||||||
|
std::vector<float> &phess,
|
||||||
|
const FMatrixS::Image &psmat,
|
||||||
|
const std::vector<unsigned> &pgroup_id ):
|
||||||
|
param( pparam ), tree( ptree ), grad( pgrad ), hess( phess ),
|
||||||
|
smat( psmat ), group_id( pgroup_id ){
|
||||||
|
}
|
||||||
|
inline int do_boost( int &num_pruned ){
|
||||||
|
this->init_tasks( grad.size() );
|
||||||
|
this->max_depth = 0;
|
||||||
|
this->num_pruned = 0;
|
||||||
|
Task tsk;
|
||||||
|
while( this->next_task( tsk ) ){
|
||||||
|
this->expand( tsk );
|
||||||
|
}
|
||||||
|
num_pruned = this->num_pruned;
|
||||||
|
return max_depth;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class RTreeTrainer : public IBooster{
|
||||||
|
private:
|
||||||
|
int silent;
|
||||||
|
// tree of current shape
|
||||||
|
RTree tree;
|
||||||
|
TreeParamTrain param;
|
||||||
|
private:
|
||||||
|
std::vector<float> tmp_feat;
|
||||||
|
std::vector<bool> tmp_funknown;
|
||||||
|
inline void init_tmpfeat( void ){
|
||||||
|
if( tmp_feat.size() != (size_t)tree.param.num_feature ){
|
||||||
|
tmp_feat.resize( tree.param.num_feature );
|
||||||
|
tmp_funknown.resize( tree.param.num_feature );
|
||||||
|
std::fill( tmp_funknown.begin(), tmp_funknown.end(), true );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
virtual void SetParam( const char *name, const char *val ){
|
||||||
|
if( !strcmp( name, "silent") ) silent = atoi( val );
|
||||||
|
param.SetParam( name, val );
|
||||||
|
tree.param.SetParam( name, val );
|
||||||
|
}
|
||||||
|
virtual void LoadModel( utils::IStream &fi ){
|
||||||
|
tree.LoadModel( fi );
|
||||||
|
}
|
||||||
|
virtual void SaveModel( utils::IStream &fo ) const{
|
||||||
|
tree.SaveModel( fo );
|
||||||
|
}
|
||||||
|
virtual void InitModel( void ){
|
||||||
|
tree.InitModel();
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
inline int get_next( int pid, float fvalue, bool is_unknown ){
|
||||||
|
float split_value = tree[ pid ].split_cond();
|
||||||
|
if( is_unknown ){
|
||||||
|
if( tree[ pid ].default_left() ) return tree[ pid ].cleft();
|
||||||
|
else return tree[ pid ].cright();
|
||||||
|
}else{
|
||||||
|
if( fvalue < split_value ) return tree[ pid ].cleft();
|
||||||
|
else return tree[ pid ].cright();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
virtual void DoBoost( std::vector<float> &grad,
|
||||||
|
std::vector<float> &hess,
|
||||||
|
const FMatrixS::Image &smat,
|
||||||
|
const std::vector<unsigned> &group_id ){
|
||||||
|
utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
|
||||||
|
if( !silent ){
|
||||||
|
printf( "\nbuild GBRT with %u instances\n", (unsigned)grad.size() );
|
||||||
|
}
|
||||||
|
// start with a id set
|
||||||
|
RTreeUpdater updater( param, tree, grad, hess, smat, group_id );
|
||||||
|
int num_pruned;
|
||||||
|
tree.param.max_depth = updater.do_boost( num_pruned );
|
||||||
|
|
||||||
|
if( !silent ){
|
||||||
|
printf( "tree train end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
|
||||||
|
tree.param.num_roots, tree.num_extra_nodes(), num_pruned, tree.param.max_depth );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual int GetLeafIndex( const std::vector<float> &feat,
|
||||||
|
const std::vector<bool> &funknown,
|
||||||
|
unsigned gid = 0 ){
|
||||||
|
// start from groups that belongs to current data
|
||||||
|
int pid = (int)gid;
|
||||||
|
// tranverse tree
|
||||||
|
while( !tree[ pid ].is_leaf() ){
|
||||||
|
unsigned split_index = tree[ pid ].split_index();
|
||||||
|
pid = this->get_next( pid, feat[ split_index ], funknown[ split_index ] );
|
||||||
|
}
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
virtual float Predict( const FMatrixS::Line &feat, unsigned gid = 0 ){
|
||||||
|
this->init_tmpfeat();
|
||||||
|
for( unsigned i = 0; i < feat.len; i ++ ){
|
||||||
|
utils::Assert( feat.findex[i] < (unsigned)tmp_funknown.size() , "input feature execeed bound" );
|
||||||
|
tmp_funknown[ feat.findex[i] ] = false;
|
||||||
|
tmp_feat[ feat.findex[i] ] = feat.fvalue[i];
|
||||||
|
}
|
||||||
|
int pid = this->GetLeafIndex( tmp_feat, tmp_funknown, gid );
|
||||||
|
// set back
|
||||||
|
for( unsigned i = 0; i < feat.len; i ++ ){
|
||||||
|
tmp_funknown[ feat.findex[i] ] = true;
|
||||||
|
}
|
||||||
|
return tree[ pid ].leaf_value();
|
||||||
|
}
|
||||||
|
virtual float Predict( const std::vector<float> &feat,
|
||||||
|
const std::vector<bool> &funknown,
|
||||||
|
unsigned gid = 0 ){
|
||||||
|
utils::Assert( feat.size() >= (size_t)tree.param.num_feature,
|
||||||
|
"input data smaller than num feature" );
|
||||||
|
int pid = this->GetLeafIndex( feat, funknown, gid );
|
||||||
|
return tree[ pid ].leaf_value();
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
RTreeTrainer( void ){ silent = 0; }
|
||||||
|
virtual ~RTreeTrainer( void ){}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
415
booster/tree/xgboost_tree_model.h
Normal file
415
booster/tree/xgboost_tree_model.h
Normal file
@ -0,0 +1,415 @@
|
|||||||
|
#ifndef _XGBOOST_TREE_MODEL_H_
|
||||||
|
#define _XGBOOST_TREE_MODEL_H_
|
||||||
|
/*!
|
||||||
|
* \file xgboost_tree_model.h
|
||||||
|
* \brief generic definition of model structure used in tree models
|
||||||
|
* used to support learning of boosting tree
|
||||||
|
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||||
|
*/
|
||||||
|
#include <cstring>
|
||||||
|
#include "../../utils/xgboost_utils.h"
|
||||||
|
#include "../../utils/xgboost_stream.h"
|
||||||
|
|
||||||
|
namespace xgboost{
|
||||||
|
namespace booster{
|
||||||
|
/*!
|
||||||
|
* \brief template class of TreeModel
|
||||||
|
* \tparam TSplitCond data type to indicate split condition
|
||||||
|
* \tparam TNodeStat auxiliary statistics of node to help tree building
|
||||||
|
*/
|
||||||
|
template<typename TSplitCond,typename TNodeStat>
|
||||||
|
class TreeModel{
|
||||||
|
public:
|
||||||
|
/*! \brief data type to indicate split condition */
|
||||||
|
typedef TNodeStat NodeStat;
|
||||||
|
/*! \brief auxiliary statistics of node to help tree building */
|
||||||
|
typedef TSplitCond SplitCond;
|
||||||
|
public:
|
||||||
|
/*! \brief parameters of the tree */
|
||||||
|
struct Param{
|
||||||
|
/*! \brief number of start root */
|
||||||
|
int num_roots;
|
||||||
|
/*! \brief total number of nodes */
|
||||||
|
int num_nodes;
|
||||||
|
/*!\brief number of deleted nodes */
|
||||||
|
int num_deleted;
|
||||||
|
/*! \brief maximum depth, this is a statistics of the tree */
|
||||||
|
int max_depth;
|
||||||
|
/*! \brief number of features used for tree construction */
|
||||||
|
int num_feature;
|
||||||
|
/*! \brief reserved part */
|
||||||
|
int reserved[ 32 ];
|
||||||
|
/*! \brief constructor */
|
||||||
|
Param( void ){
|
||||||
|
max_depth = 0;
|
||||||
|
memset( reserved, 0, sizeof( reserved ) );
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief set parameters from outside
|
||||||
|
* \param name name of the parameter
|
||||||
|
* \param val value of the parameter
|
||||||
|
*/
|
||||||
|
inline void SetParam( const char *name, const char *val ){
|
||||||
|
if( !strcmp("num_roots", name ) ) num_roots = atoi( val );
|
||||||
|
if( !strcmp("num_feature", name ) ) num_feature = atoi( val );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
/*! \brief tree node */
|
||||||
|
class Node{
|
||||||
|
private:
|
||||||
|
friend class TreeModel<TSplitCond,TNodeStat>;
|
||||||
|
/*!
|
||||||
|
* \brief in leaf node, we have weights, in non-leaf nodes,
|
||||||
|
* we have split condition
|
||||||
|
*/
|
||||||
|
union Info{
|
||||||
|
float leaf_value;
|
||||||
|
TSplitCond split_cond;
|
||||||
|
};
|
||||||
|
private:
|
||||||
|
// pointer to parent, highest bit is used to indicate whether it's a left child or not
|
||||||
|
int sparent;
|
||||||
|
// pointer to left, right
|
||||||
|
int left, right;
|
||||||
|
// split feature index, left split or right split depends on the highest bit
|
||||||
|
unsigned sindex;
|
||||||
|
// extra info
|
||||||
|
Info info;
|
||||||
|
private:
|
||||||
|
inline void set_parent( int pidx, bool is_left_child = true ){
|
||||||
|
if( is_left_child ) pidx |= (1U << 31);
|
||||||
|
this->sparent = pidx;
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
/*! \brief index of left child */
|
||||||
|
inline int cleft( void ) const{
|
||||||
|
return this->left;
|
||||||
|
}
|
||||||
|
/*! \brief index of right child */
|
||||||
|
inline int cright( void ) const{
|
||||||
|
return this->right;
|
||||||
|
}
|
||||||
|
/*! \brief feature index of split condition */
|
||||||
|
inline unsigned split_index( void ) const{
|
||||||
|
return sindex & ( (1U<<31) - 1U );
|
||||||
|
}
|
||||||
|
/*! \brief when feature is unknown, whether goes to left child */
|
||||||
|
inline bool default_left( void ) const{
|
||||||
|
return (sindex >> 31) != 0;
|
||||||
|
}
|
||||||
|
/*! \brief whether current node is leaf node */
|
||||||
|
inline bool is_leaf( void ) const{
|
||||||
|
return left == -1;
|
||||||
|
}
|
||||||
|
/*! \brief get leaf value of leaf node */
|
||||||
|
inline float leaf_value( void ) const{
|
||||||
|
return (this->info).leaf_value;
|
||||||
|
}
|
||||||
|
/*! \brief get split condition of the node */
|
||||||
|
inline TSplitCond split_cond( void ) const{
|
||||||
|
return (this->info).split_cond;
|
||||||
|
}
|
||||||
|
/*! \brief get parent of the node */
|
||||||
|
inline int parent( void ) const{
|
||||||
|
return sparent & ( (1U << 31) - 1 );
|
||||||
|
}
|
||||||
|
/*! \brief whether current node is left child */
|
||||||
|
inline bool is_left_child( void ) const{
|
||||||
|
return ( sparent & (1U << 31)) != 0;
|
||||||
|
}
|
||||||
|
/*! \brief whether current node is root */
|
||||||
|
inline bool is_root( void ) const{
|
||||||
|
return sparent == -1;
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief set the right child
|
||||||
|
* \param nide node id to right child
|
||||||
|
*/
|
||||||
|
inline void set_right_child( int nid ){
|
||||||
|
this->right = nid;
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief set split condition of current node
|
||||||
|
* \param split_index feature index to split
|
||||||
|
* \param split_cond split condition
|
||||||
|
* \param default_left the default direction when feature is unknown
|
||||||
|
*/
|
||||||
|
inline void set_split( unsigned split_index, TSplitCond split_cond, bool default_left = false ){
|
||||||
|
if( default_left ) split_index |= (1U << 31);
|
||||||
|
this->sindex = split_index;
|
||||||
|
(this->info).split_cond = split_cond;
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief set the leaf value of the node
|
||||||
|
* \param value leaf value
|
||||||
|
* \param right right index, could be used to store
|
||||||
|
* additional information
|
||||||
|
*/
|
||||||
|
inline void set_leaf( float value, int right = -1 ){
|
||||||
|
(this->info).leaf_value = value;
|
||||||
|
this->left = -1;
|
||||||
|
this->right = right;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
protected:
|
||||||
|
// vector of nodes
|
||||||
|
std::vector<Node> nodes;
|
||||||
|
// stats of nodes
|
||||||
|
std::vector<TNodeStat> stats;
|
||||||
|
protected:
|
||||||
|
// free node space, used during training process
|
||||||
|
std::vector<int> deleted_nodes;
|
||||||
|
// allocate a new node,
|
||||||
|
// !!!!!! NOTE: may cause BUG here, nodes.resize
|
||||||
|
inline int AllocNode( void ){
|
||||||
|
if( param.num_deleted != 0 ){
|
||||||
|
int nd = deleted_nodes.back();
|
||||||
|
deleted_nodes.pop_back();
|
||||||
|
param.num_deleted --;
|
||||||
|
return nd;
|
||||||
|
}
|
||||||
|
int nd = param.num_nodes ++;
|
||||||
|
nodes.resize( param.num_nodes );
|
||||||
|
stats.resize( param.num_nodes );
|
||||||
|
return nd;
|
||||||
|
}
|
||||||
|
// delete a tree node
|
||||||
|
inline void DeleteNode( int nid ){
|
||||||
|
utils::Assert( nid >= param.num_roots, "can not delete root");
|
||||||
|
deleted_nodes.push_back( nid );
|
||||||
|
nodes[ nid ].set_parent( -1 );
|
||||||
|
param.num_deleted ++;
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
/*!
|
||||||
|
* \brief change a non leaf node to a leaf node, delete its children
|
||||||
|
* \param rid node id of the node
|
||||||
|
* \param new leaf value
|
||||||
|
*/
|
||||||
|
inline void ChangeToLeaf( int rid, float value ){
|
||||||
|
utils::Assert( nodes[ nodes[rid].left ].is_leaf(), "can not delete a non termial child");
|
||||||
|
utils::Assert( nodes[ nodes[rid].right ].is_leaf(), "can not delete a non termial child");
|
||||||
|
this->DeleteNode( nodes[ rid ].left );
|
||||||
|
this->DeleteNode( nodes[ rid ].right );
|
||||||
|
nodes[ rid ].set_leaf( value );
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
/*! \brief model parameter */
|
||||||
|
Param param;
|
||||||
|
public:
|
||||||
|
/*! \brief constructor */
|
||||||
|
TreeModel( void ){
|
||||||
|
param.num_nodes = 1;
|
||||||
|
param.num_roots = 1;
|
||||||
|
param.num_deleted = 0;
|
||||||
|
nodes.resize( 1 );
|
||||||
|
}
|
||||||
|
/*! \brief get node given nid */
|
||||||
|
inline Node &operator[]( int nid ){
|
||||||
|
return nodes[ nid ];
|
||||||
|
}
|
||||||
|
/*! \brief get node statistics given nid */
|
||||||
|
inline NodeStat &stat( int nid ){
|
||||||
|
return stats[ nid ];
|
||||||
|
}
|
||||||
|
/*! \brief initialize the model */
|
||||||
|
inline void InitModel( void ){
|
||||||
|
param.num_nodes = param.num_roots;
|
||||||
|
nodes.resize( param.num_nodes );
|
||||||
|
stats.resize( param.num_nodes );
|
||||||
|
for( int i = 0; i < param.num_nodes; i ++ ){
|
||||||
|
nodes[i].set_leaf( 0.0f );
|
||||||
|
nodes[i].set_parent( -1 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief load model from stream
|
||||||
|
* \param fi input stream
|
||||||
|
*/
|
||||||
|
inline void LoadModel( utils::IStream &fi ){
|
||||||
|
utils::Assert( fi.Read( ¶m, sizeof(Param) ) > 0, "TreeModel" );
|
||||||
|
nodes.resize( param.num_nodes );
|
||||||
|
utils::Assert( fi.Read( &nodes[0], sizeof(Node) * nodes.size() ) > 0, "TreeModel::Node" );
|
||||||
|
|
||||||
|
deleted_nodes.resize( 0 );
|
||||||
|
for( int i = param.num_roots; i < param.num_nodes; i ++ ){
|
||||||
|
if( nodes[i].is_root() ) deleted_nodes.push_back( i );
|
||||||
|
}
|
||||||
|
utils::Assert( (int)deleted_nodes.size() == param.num_deleted, "number of deleted nodes do not match" );
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief save model to stream
|
||||||
|
* \param fo output stream
|
||||||
|
*/
|
||||||
|
inline void SaveModel( utils::IStream &fo ) const{
|
||||||
|
utils::Assert( param.num_nodes == (int)nodes.size() );
|
||||||
|
fo.Write( ¶m, sizeof(Param) );
|
||||||
|
fo.Write( &nodes[0], sizeof(Node) * nodes.size() );
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief add child nodes to node
|
||||||
|
* \param nid node id to add childs
|
||||||
|
*/
|
||||||
|
inline void AddChilds( int nid ){
|
||||||
|
int pleft = this->AllocNode();
|
||||||
|
int pright = this->AllocNode();
|
||||||
|
nodes[ nid ].left = pleft;
|
||||||
|
nodes[ nid ].right = pright;
|
||||||
|
nodes[ nodes[ nid ].left ].set_parent( nid, true );
|
||||||
|
nodes[ nodes[ nid ].right ].set_parent( nid, false );
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief only add a right child to a leaf node
|
||||||
|
* \param node id to add right child
|
||||||
|
*/
|
||||||
|
inline void AddRightChild( int nid ){
|
||||||
|
int pright = this->AllocNode();
|
||||||
|
nodes[ nid ].right = pright;
|
||||||
|
nodes[ nodes[ nid ].right ].set_parent( nid, false );
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief get current depth
|
||||||
|
* \param nid node id
|
||||||
|
* \param pass_rchild whether right child is not counted in depth
|
||||||
|
*/
|
||||||
|
inline int GetDepth( int nid, bool pass_rchild = false ) const{
|
||||||
|
int depth = 0;
|
||||||
|
while( !nodes[ nid ].is_root() ){
|
||||||
|
if( !pass_rchild || nodes[ nid ].is_left_child() ) depth ++;
|
||||||
|
nid = nodes[ nid ].parent();
|
||||||
|
}
|
||||||
|
return depth;
|
||||||
|
}
|
||||||
|
/*! \brief number of extra nodes besides the root */
|
||||||
|
inline int num_extra_nodes( void ) const {
|
||||||
|
return param.num_nodes - param.num_roots - param.num_deleted;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace booster{
|
||||||
|
/*! \brief training parameters for regression tree */
|
||||||
|
struct TreeParamTrain{
|
||||||
|
// learning step size for a time
|
||||||
|
float learning_rate;
|
||||||
|
// minimum loss change required for a split
|
||||||
|
float min_split_loss;
|
||||||
|
// maximum depth of a tree
|
||||||
|
int max_depth;
|
||||||
|
//----- the rest parameters are less important ----
|
||||||
|
// minimum amount of hessian(weight) allowed in a child
|
||||||
|
float min_child_weight;
|
||||||
|
// weight decay parameter used to control leaf fitting
|
||||||
|
float reg_lambda;
|
||||||
|
// reg method
|
||||||
|
int reg_method;
|
||||||
|
// default direction choice
|
||||||
|
int default_direction;
|
||||||
|
// whether we want to do subsample
|
||||||
|
float subsample;
|
||||||
|
// whether to use layerwise aware regularization
|
||||||
|
int use_layerwise;
|
||||||
|
/*! \brief constructor */
|
||||||
|
TreeParamTrain( void ){
|
||||||
|
learning_rate = 0.3f;
|
||||||
|
min_child_weight = 1.0f;
|
||||||
|
max_depth = 6;
|
||||||
|
reg_lambda = 1.0f;
|
||||||
|
reg_method = 2;
|
||||||
|
default_direction = 0;
|
||||||
|
subsample = 1.0f;
|
||||||
|
use_layerwise = 0;
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief set parameters from outside
|
||||||
|
* \param name name of the parameter
|
||||||
|
* \param val value of the parameter
|
||||||
|
*/
|
||||||
|
inline void SetParam( const char *name, const char *val ){
|
||||||
|
// sync-names
|
||||||
|
if( !strcmp( name, "gamma") ) min_split_loss = (float)atof( val );
|
||||||
|
if( !strcmp( name, "eta") ) learning_rate = (float)atof( val );
|
||||||
|
if( !strcmp( name, "lambda") ) reg_lambda = (float)atof( val );
|
||||||
|
// normal tree prameters
|
||||||
|
if( !strcmp( name, "learning_rate") ) learning_rate = (float)atof( val );
|
||||||
|
if( !strcmp( name, "min_child_weight") ) min_child_weight = (float)atof( val );
|
||||||
|
if( !strcmp( name, "min_split_loss") ) min_split_loss = (float)atof( val );
|
||||||
|
if( !strcmp( name, "max_depth") ) max_depth = atoi( val );
|
||||||
|
if( !strcmp( name, "reg_lambda") ) reg_lambda = (float)atof( val );
|
||||||
|
if( !strcmp( name, "reg_method") ) reg_method = (float)atof( val );
|
||||||
|
if( !strcmp( name, "subsample") ) subsample = (float)atof( val );
|
||||||
|
if( !strcmp( name, "use_layerwise") ) use_layerwise = atoi( val );
|
||||||
|
if( !strcmp( name, "default_direction") ) {
|
||||||
|
if( !strcmp( val, "learn") ) default_direction = 0;
|
||||||
|
if( !strcmp( val, "left") ) default_direction = 1;
|
||||||
|
if( !strcmp( val, "right") ) default_direction = 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
protected:
|
||||||
|
// functions for L1 cost
|
||||||
|
static inline double ThresholdL1( double w, double lambda ){
|
||||||
|
if( w > +lambda ) return w - lambda;
|
||||||
|
if( w < -lambda ) return w + lambda;
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
inline double CalcWeight( double sum_grad, double sum_hess )const{
|
||||||
|
if( sum_hess < min_child_weight ){
|
||||||
|
return 0.0;
|
||||||
|
}else{
|
||||||
|
switch( reg_method ){
|
||||||
|
case 1: return - ThresholdL1( sum_grad, reg_lambda ) / sum_hess;
|
||||||
|
case 2: return - sum_grad / ( sum_hess + reg_lambda );
|
||||||
|
// elstic net
|
||||||
|
case 3: return - ThresholdL1( sum_grad, 0.5 * reg_lambda ) / ( sum_hess + 0.5 * reg_lambda );
|
||||||
|
default: return - sum_grad / sum_hess;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
inline static double Sqr( double a ){
|
||||||
|
return a * a;
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
// calculate the cost of loss function
|
||||||
|
inline double CalcCost( double sum_grad, double sum_hess ) const{
|
||||||
|
if( sum_hess < min_child_weight ){
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
switch( reg_method ){
|
||||||
|
case 1 : return Sqr( ThresholdL1( sum_grad, reg_lambda ) ) / sum_hess;
|
||||||
|
case 2 : return Sqr( sum_grad ) / ( sum_hess + reg_lambda );
|
||||||
|
// elstic net
|
||||||
|
case 3 : return Sqr( ThresholdL1( sum_grad, 0.5 * reg_lambda ) ) / ( sum_hess + 0.5 * reg_lambda );
|
||||||
|
default: return Sqr( sum_grad ) / sum_hess;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// KEY:layerwise
|
||||||
|
// calculate cost of root
|
||||||
|
inline double CalcRootCost( double sum_grad, double sum_hess ) const{
|
||||||
|
if( use_layerwise == 0 ) return this->CalcCost( sum_grad, sum_hess );
|
||||||
|
else return 0.0;
|
||||||
|
}
|
||||||
|
// KEY:layerwise
|
||||||
|
// calculate the cost after split
|
||||||
|
// base_weight: the base_weight of parent
|
||||||
|
inline double CalcCost( double sum_grad, double sum_hess, double base_weight ) const{
|
||||||
|
if( use_layerwise == 0 ) return this->CalcCost( sum_grad, sum_hess );
|
||||||
|
else return this->CalcCost( sum_grad + sum_hess * base_weight, sum_hess );
|
||||||
|
}
|
||||||
|
// calculate the weight of leaf
|
||||||
|
inline double CalcWeight( double sum_grad, double sum_hess, double parent_base_weight )const{
|
||||||
|
if( use_layerwise == 0 ) return CalcWeight( sum_grad, sum_hess );
|
||||||
|
else return parent_base_weight + CalcWeight( sum_grad + parent_base_weight * sum_hess, sum_hess );
|
||||||
|
}
|
||||||
|
/*! \brief given the loss change, whether we need to invode prunning */
|
||||||
|
inline bool need_prune( double loss_chg, int depth ) const{
|
||||||
|
return loss_chg < min_split_loss;
|
||||||
|
}
|
||||||
|
/*! \brief whether we can split with current hessian */
|
||||||
|
inline bool cannot_split( double sum_hess, int depth ) const{
|
||||||
|
return sum_hess < min_child_weight * 2.0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
#endif
|
||||||
@ -10,6 +10,9 @@
|
|||||||
#include "xgboost.h"
|
#include "xgboost.h"
|
||||||
#include "../utils/xgboost_utils.h"
|
#include "../utils/xgboost_utils.h"
|
||||||
#include "xgboost_gbmbase.h"
|
#include "xgboost_gbmbase.h"
|
||||||
|
// implementations of boosters
|
||||||
|
#include "tree/xgboost_svdf_tree.hpp"
|
||||||
|
|
||||||
namespace xgboost{
|
namespace xgboost{
|
||||||
namespace booster{
|
namespace booster{
|
||||||
/*!
|
/*!
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file xgboost.h
|
* \file xgboost.h
|
||||||
* \brief the general gradient boosting interface
|
* \brief the general gradient boosting interface
|
||||||
|
*
|
||||||
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||||
*/
|
*/
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|||||||
@ -53,7 +53,7 @@ namespace xgboost{
|
|||||||
/*! \brief type of tree used */
|
/*! \brief type of tree used */
|
||||||
int booster_type;
|
int booster_type;
|
||||||
/*! \brief number of root: default 0, means single tree */
|
/*! \brief number of root: default 0, means single tree */
|
||||||
int num_root;
|
int num_roots;
|
||||||
/*! \brief number of features to be used by boosters */
|
/*! \brief number of features to be used by boosters */
|
||||||
int num_feature;
|
int num_feature;
|
||||||
/*! \brief size of predicton buffer allocated for buffering boosting computation */
|
/*! \brief size of predicton buffer allocated for buffering boosting computation */
|
||||||
@ -69,7 +69,7 @@ namespace xgboost{
|
|||||||
Param( void ){
|
Param( void ){
|
||||||
num_boosters = 0;
|
num_boosters = 0;
|
||||||
booster_type = 0;
|
booster_type = 0;
|
||||||
num_root = num_feature = 0;
|
num_roots = num_feature = 0;
|
||||||
do_reboost = 0;
|
do_reboost = 0;
|
||||||
num_pbuffer = 0;
|
num_pbuffer = 0;
|
||||||
memset( reserved, 0, sizeof( reserved ) );
|
memset( reserved, 0, sizeof( reserved ) );
|
||||||
@ -83,7 +83,7 @@ namespace xgboost{
|
|||||||
if( !strcmp("booster_type", name ) ) booster_type = atoi( val );
|
if( !strcmp("booster_type", name ) ) booster_type = atoi( val );
|
||||||
if( !strcmp("num_pbuffer", name ) ) num_pbuffer = atoi( val );
|
if( !strcmp("num_pbuffer", name ) ) num_pbuffer = atoi( val );
|
||||||
if( !strcmp("do_reboost", name ) ) do_reboost = atoi( val );
|
if( !strcmp("do_reboost", name ) ) do_reboost = atoi( val );
|
||||||
if( !strcmp("bst:num_root", name ) ) num_root = atoi( val );
|
if( !strcmp("bst:num_roots", name ) ) num_roots = atoi( val );
|
||||||
if( !strcmp("bst:num_feature", name ) ) num_feature = atoi( val );
|
if( !strcmp("bst:num_feature", name ) ) num_feature = atoi( val );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -98,9 +98,12 @@ namespace xgboost{
|
|||||||
* \param val value of the parameter
|
* \param val value of the parameter
|
||||||
*/
|
*/
|
||||||
inline void SetParam( const char *name, const char *val ){
|
inline void SetParam( const char *name, const char *val ){
|
||||||
if( strncmp( name, "bst:", 4 ) == 0 ){
|
if( !strncmp( name, "bst:", 4 ) ){
|
||||||
cfg.PushBack( name + 4, val );
|
cfg.PushBack( name + 4, val );
|
||||||
}
|
}
|
||||||
|
if( !strcmp( name, "silent") ){
|
||||||
|
cfg.PushBack( name, val );
|
||||||
|
}
|
||||||
if( boosters.size() == 0 ) param.SetParam( name, val );
|
if( boosters.size() == 0 ) param.SetParam( name, val );
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
|
|||||||
152
utils/xgboost_matrix_csr.h
Normal file
152
utils/xgboost_matrix_csr.h
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
/*!
|
||||||
|
* \file xgboost_matrix_csr.h
|
||||||
|
* \brief this file defines some easy to use STL based class for in memory sparse CSR matrix
|
||||||
|
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||||
|
*/
|
||||||
|
#ifndef _XGBOOST_MATRIX_CSR_H_
|
||||||
|
#define _XGBOOST_MATRIX_CSR_H_
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "xgboost_utils.h"
|
||||||
|
|
||||||
|
namespace xgboost{
|
||||||
|
namespace utils{
|
||||||
|
/*!
|
||||||
|
* \brief a class used to help construct CSR format matrix,
|
||||||
|
* can be used to convert row major CSR to column major CSR
|
||||||
|
* \tparam IndexType type of index used to store the index position, usually unsigned or size_t
|
||||||
|
* \tparam whether enabling the usage of aclist, this option must be enabled manually
|
||||||
|
*/
|
||||||
|
template<typename IndexType,bool UseAcList = false>
|
||||||
|
struct SparseCSRMBuilder{
|
||||||
|
private:
|
||||||
|
/*! \brief dummy variable used in the indicator matrix construction */
|
||||||
|
std::vector<size_t> dummy_aclist;
|
||||||
|
/*! \brief pointer to each of the row */
|
||||||
|
std::vector<size_t> &rptr;
|
||||||
|
/*! \brief index of nonzero entries in each row */
|
||||||
|
std::vector<IndexType> &findex;
|
||||||
|
/*! \brief a list of active rows, used when many rows are empty */
|
||||||
|
std::vector<size_t> &aclist;
|
||||||
|
public:
|
||||||
|
SparseCSRMBuilder( std::vector<size_t> &p_rptr,
|
||||||
|
std::vector<IndexType> &p_findex )
|
||||||
|
:rptr(p_rptr), findex( p_findex ), aclist( dummy_aclist ){
|
||||||
|
Assert( !UseAcList, "enabling bug" );
|
||||||
|
}
|
||||||
|
/*! \brief use with caution! rptr must be cleaned before use */
|
||||||
|
SparseCSRMBuilder( std::vector<size_t> &p_rptr,
|
||||||
|
std::vector<IndexType> &p_findex,
|
||||||
|
std::vector<size_t> &p_aclist )
|
||||||
|
:rptr(p_rptr), findex( p_findex ), aclist( p_aclist ){
|
||||||
|
Assert( UseAcList, "must manually enable the option use aclist" );
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
/*!
|
||||||
|
* \brief step 1: initialize the number of rows in the data
|
||||||
|
* \nrows number of rows in the matrix
|
||||||
|
*/
|
||||||
|
inline void InitBudget( size_t nrows ){
|
||||||
|
if( !UseAcList ){
|
||||||
|
rptr.resize( nrows + 1 );
|
||||||
|
std::fill( rptr.begin(), rptr.end(), 0 );
|
||||||
|
}else{
|
||||||
|
Assert( nrows + 1 == rptr.size(), "rptr must be initialized already" );
|
||||||
|
this->Cleanup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief step 2: add budget to each rows, this function is called when aclist is used
|
||||||
|
* \param row_id the id of the row
|
||||||
|
* \param nelem number of element budget add to this row
|
||||||
|
*/
|
||||||
|
inline void AddBudget( size_t row_id, size_t nelem = 1 ){
|
||||||
|
if( UseAcList ){
|
||||||
|
if( rptr[ row_id + 1 ] == 0 ) aclist.push_back( row_id );
|
||||||
|
}
|
||||||
|
rptr[ row_id + 1 ] += nelem;
|
||||||
|
}
|
||||||
|
/*! \brief step 3: initialize the necessary storage */
|
||||||
|
inline void InitStorage( void ){
|
||||||
|
// initialize rptr to be beginning of each segment
|
||||||
|
size_t start = 0;
|
||||||
|
if( !UseAcList ){
|
||||||
|
for( size_t i = 1; i < rptr.size(); i ++ ){
|
||||||
|
size_t rlen = rptr[ i ];
|
||||||
|
rptr[ i ] = start;
|
||||||
|
start += rlen;
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
// case with active list
|
||||||
|
std::sort( aclist.begin(), aclist.end() );
|
||||||
|
|
||||||
|
for( size_t i = 0; i < aclist.size(); i ++ ){
|
||||||
|
size_t ridx = aclist[ i ];
|
||||||
|
size_t rlen = rptr[ ridx + 1 ];
|
||||||
|
rptr[ ridx + 1 ] = start;
|
||||||
|
// set previous rptr to right position if previous feature is not active
|
||||||
|
if( i == 0 || ridx != aclist[i-1] + 1 ) rptr[ ridx ] = start;
|
||||||
|
start += rlen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
findex.resize( start );
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief step 4:
|
||||||
|
* used in indicator matrix construction, add new
|
||||||
|
* element to each row, the number of calls shall be exactly same as add_budget
|
||||||
|
*/
|
||||||
|
inline void PushElem( size_t row_id, IndexType col_id ){
|
||||||
|
size_t &rp = rptr[ row_id + 1 ];
|
||||||
|
findex[ rp ++ ] = col_id;
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief step 5: only needed when aclist is used
|
||||||
|
* clean up the rptr for next usage
|
||||||
|
*/
|
||||||
|
inline void Cleanup( void ){
|
||||||
|
Assert( UseAcList, "this function can only be called use AcList" );
|
||||||
|
for( size_t i = 0; i < aclist.size(); i ++ ){
|
||||||
|
const size_t ridx = aclist[i];
|
||||||
|
rptr[ ridx ] = 0; rptr[ ridx + 1 ] = 0;
|
||||||
|
}
|
||||||
|
aclist.clear();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace utils{
|
||||||
|
/*!
|
||||||
|
* \brief simple sparse matrix container
|
||||||
|
* \tparam IndexType type of index used to store the index position, usually unsigned or size_t
|
||||||
|
*/
|
||||||
|
template<typename IndexType>
|
||||||
|
struct SparseCSRMat{
|
||||||
|
private:
|
||||||
|
/*! \brief pointer to each of the row */
|
||||||
|
std::vector<size_t> rptr;
|
||||||
|
/*! \brief index of nonzero entries in each row */
|
||||||
|
std::vector<IndexType> findex;
|
||||||
|
public:
|
||||||
|
/*! \brief matrix builder*/
|
||||||
|
SparseCSRMBuilder<IndexType> builder;
|
||||||
|
public:
|
||||||
|
SparseCSRMat( void ):builder( rptr, findex ){
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
/*! \return number of rows in the matrx */
|
||||||
|
inline size_t NumRow( void ) const{
|
||||||
|
return rptr.size() - 1;
|
||||||
|
}
|
||||||
|
/*! \return number of elements r-th row */
|
||||||
|
inline size_t NumElem( size_t r ) const{
|
||||||
|
return rptr[ r + 1 ] - rptr[ r ];
|
||||||
|
}
|
||||||
|
/*! \return r-th row */
|
||||||
|
inline const IndexType *operator[]( size_t r ) const{
|
||||||
|
return &findex[ rptr[r] ];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
#endif
|
||||||
131
utils/xgboost_random.h
Normal file
131
utils/xgboost_random.h
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
#ifndef _XGBOOST_RANDOM_H_
|
||||||
|
#define _XGBOOST_RANDOM_H_
|
||||||
|
/*!
|
||||||
|
* \file xgboost_random.h
|
||||||
|
* \brief PRNG to support random number generation
|
||||||
|
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||||
|
*
|
||||||
|
* Use standard PRNG from stdlib
|
||||||
|
*/
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
typedef unsigned char uint8_t;
|
||||||
|
typedef unsigned short int uint16_t;
|
||||||
|
typedef unsigned int uint32_t;
|
||||||
|
#else
|
||||||
|
#include <inttypes.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*! namespace of PRNG */
|
||||||
|
namespace xgboost{
|
||||||
|
namespace random{
|
||||||
|
/*! \brief seed the PRNG */
|
||||||
|
inline void Seed( uint32_t seed ){
|
||||||
|
srand( seed );
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! \brief return a real number uniform in [0,1) */
|
||||||
|
inline double NextDouble(){
|
||||||
|
return static_cast<double>( rand() ) / (static_cast<double>( RAND_MAX )+1.0);
|
||||||
|
}
|
||||||
|
/*! \brief return a real numer uniform in (0,1) */
|
||||||
|
inline double NextDouble2(){
|
||||||
|
return (static_cast<double>( rand() ) + 1.0 ) / (static_cast<double>(RAND_MAX) + 2.0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace random{
|
||||||
|
/*! \brief return a random number */
|
||||||
|
inline uint32_t NextUInt32( void ){
|
||||||
|
return (uint32_t)rand();
|
||||||
|
}
|
||||||
|
/*! \brief return a random number in n */
|
||||||
|
inline uint32_t NextUInt32( uint32_t n ){
|
||||||
|
return (uint32_t) floor( NextDouble() * n ) ;
|
||||||
|
}
|
||||||
|
/*! \brief return x~N(0,1) */
|
||||||
|
inline double SampleNormal(){
|
||||||
|
double x,y,s;
|
||||||
|
do{
|
||||||
|
x = 2 * NextDouble2() - 1.0;
|
||||||
|
y = 2 * NextDouble2() - 1.0;
|
||||||
|
s = x*x + y*y;
|
||||||
|
}while( s >= 1.0 || s == 0.0 );
|
||||||
|
|
||||||
|
return x * sqrt( -2.0 * log(s) / s ) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! \brief return iid x,y ~N(0,1) */
|
||||||
|
inline void SampleNormal2D( double &xx, double &yy ){
|
||||||
|
double x,y,s;
|
||||||
|
do{
|
||||||
|
x = 2 * NextDouble2() - 1.0;
|
||||||
|
y = 2 * NextDouble2() - 1.0;
|
||||||
|
s = x*x + y*y;
|
||||||
|
}while( s >= 1.0 || s == 0.0 );
|
||||||
|
double t = sqrt( -2.0 * log(s) / s ) ;
|
||||||
|
xx = x * t;
|
||||||
|
yy = y * t;
|
||||||
|
}
|
||||||
|
/*! \brief return x~N(mu,sigma^2) */
|
||||||
|
inline double SampleNormal( double mu, double sigma ){
|
||||||
|
return SampleNormal() * sigma + mu;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! \brief return 1 with probability p, coin flip */
|
||||||
|
inline int SampleBinary( double p ){
|
||||||
|
return NextDouble() < p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! \brief return distribution from Gamma( alpha, beta ) */
|
||||||
|
inline double SampleGamma( double alpha, double beta ) {
|
||||||
|
if ( alpha < 1.0 ) {
|
||||||
|
double u;
|
||||||
|
do {
|
||||||
|
u = NextDouble();
|
||||||
|
} while (u == 0.0);
|
||||||
|
return SampleGamma(alpha + 1.0, beta) * pow(u, 1.0 / alpha);
|
||||||
|
} else {
|
||||||
|
double d,c,x,v,u;
|
||||||
|
d = alpha - 1.0/3.0;
|
||||||
|
c = 1.0 / sqrt( 9.0 * d );
|
||||||
|
do {
|
||||||
|
do {
|
||||||
|
x = SampleNormal();
|
||||||
|
v = 1.0 + c*x;
|
||||||
|
} while ( v <= 0.0 );
|
||||||
|
v = v * v * v;
|
||||||
|
u = NextDouble();
|
||||||
|
} while ( (u >= (1.0 - 0.0331 * (x*x) * (x*x)))
|
||||||
|
&& (log(u) >= (0.5 * x * x + d * (1.0 - v + log(v)))) );
|
||||||
|
return d * v / beta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline void Exchange( T &a, T &b ){
|
||||||
|
T c;
|
||||||
|
c = a;
|
||||||
|
a = b;
|
||||||
|
b = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline void Shuffle( T *data, size_t sz ){
|
||||||
|
if( sz == 0 ) return;
|
||||||
|
for( uint32_t i = (uint32_t)sz - 1; i > 0; i-- ){
|
||||||
|
Exchange( data[i], data[ NextUInt32( i+1 ) ] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// random shuffle the data inside, require PRNG
|
||||||
|
template<typename T>
|
||||||
|
inline void Shuffle( std::vector<T> &data ){
|
||||||
|
Shuffle( &data[0], data.size() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
Loading…
x
Reference in New Issue
Block a user