adapt svdfeature tree

2014-02-07 22:38:26 -08:00 · 2014-02-07 22:38:26 -08:00 · 5d052b9e14
commit 5d052b9e14
parent bf36374678
7 changed files with 1265 additions and 4 deletions
--- a/booster/tree/xgboost_svdf_tree.hpp
+++ b/booster/tree/xgboost_svdf_tree.hpp
@ -0,0 +1,556 @@
 #ifndef _XGBOOST_APEX_TREE_HPP_
 #define _XGBOOST_APEX_TREE_HPP_
 /*!
 * \file xgboost_svdf_tree.hpp
 * \brief implementation of regression tree, with layerwise support
 *        this file is adapted from GBRT implementation in SVDFeature project
 * \author Tianqi Chen: tqchen@apex.sjtu.edu.cn, tianqi.tchen@gmail.com 
 */
 #include <algorithm>
 #include "xgboost_tree_model.h"
 #include "../../utils/xgboost_random.h"
 #include "../../utils/xgboost_matrix_csr.h"
 namespace xgboost{
    namespace booster{
        const bool rt_debug = false;
        // whether to check bugs
        const bool check_bug = false;
        const float rt_eps = 1e-5f;
        const float rt_2eps = rt_eps * 2.0f;
        inline double sqr( double a ){
            return a * a;
        }
        inline void assert_sorted( unsigned *idset, int len ){
            if( !rt_debug || !check_bug ) return;
            for( int i = 1; i < len; i ++ ){
                utils::Assert( idset[i-1] < idset[i], "idset not sorted" );
            }
        }
    };
    namespace booster{        
        // node stat used in rtree
        struct RTreeNodeStat{
            // loss chg caused by current split
            float loss_chg;
            // weight of current node
            float base_weight;
            // number of child that is leaf node known up to now
            int  leaf_child_cnt;
        };
        // structure of Regression Tree
        class RTree: public TreeModel<float,RTreeNodeStat>{        
        };
        // selecter of rtree to find the suitable candidate
        class RTSelecter{
        public:
            struct Entry{
                float  loss_chg;
                size_t start;
                int    len;
                unsigned sindex;
                float    split_value;
                Entry(){}
                Entry( float loss_chg, size_t start, int len, unsigned split_index, float split_value, bool default_left ){
                    this->loss_chg = loss_chg;
                    this->start    = start;
                    this->len      = len;
                    if( default_left ) split_index |= (1U << 31);
                    this->sindex = split_index;
                    this->split_value = split_value;
                }
                inline unsigned split_index( void ) const{
                    return sindex & ( (1U<<31) - 1U );
                }
                inline bool default_left( void ) const{
                    return (sindex >> 31) != 0;
                }
            };
        private:
            Entry best_entry;
            const TreeParamTrain &param;
        public:
            RTSelecter( const TreeParamTrain &p ):param( p ){
                memset( &best_entry, 0, sizeof(best_entry) );
                best_entry.loss_chg = 0.0f;
            }
            inline void push_back( const Entry &e ){
                if( e.loss_chg > best_entry.loss_chg ) best_entry = e;
            }
            inline const Entry & select( void ){            
                return best_entry;                
            }
        };
        // updater of rtree, allows the parameters to be stored inside, key solver
        class RTreeUpdater{
        protected:
            // training task, element of single task
            struct Task{
                // node id in tree
                int nid;
                // idset pointer, instance id in [idset,idset+len)
                unsigned *idset;
                // length of idset
                unsigned len;            
                // base_weight of parent
                float parent_base_weight;
                Task(){}
                Task( int nid, unsigned *idset, unsigned len, float pweight = 0.0f ){
                    this->nid = nid;
                    this->idset = idset;
                    this->len = len;
                    this->parent_base_weight = pweight;
                }
            };
            // sparse column entry
            struct SCEntry{
                // feature value 
                float    fvalue;
                // row index in grad
                unsigned rindex;
                SCEntry(){}
                SCEntry( float fvalue, unsigned rindex ){
                    this->fvalue = fvalue; this->rindex = rindex;
                }
                inline bool operator<( const SCEntry &p ) const{
                    return fvalue < p.fvalue;
                }
            };
        private:
            // training parameter
            const TreeParamTrain &param;
            // parameters, reference
            RTree &tree;
            std::vector<float> &grad;
            std::vector<float> &hess;
            const FMatrixS::Image &smat;
            const std::vector<unsigned> &group_id;
        private:
            // maximum depth up to now
            int max_depth;
            // number of nodes being pruned
            int num_pruned;
            // stack to store current task
            std::vector<Task> task_stack;
            // temporal space for index set
            std::vector<unsigned> idset;
        private:
            // task management: NOTE DFS here
            inline void add_task( Task tsk ){
                task_stack.push_back( tsk );
            }
            inline bool next_task( Task &tsk ){
                if( task_stack.size() == 0 ) return false;
                tsk = task_stack.back(); 
                task_stack.pop_back();
                return true;
            } 
        private:
            // try to prune off current leaf, return true if successful
            inline void try_prune_leaf( int nid, int depth ){
                if( tree[ nid ].is_root() ) return;
                int pid = tree[ nid ].parent();
                RTree::NodeStat &s = tree.stat( pid );
                s.leaf_child_cnt ++;
                if( s.leaf_child_cnt >= 2 && param.need_prune( s.loss_chg, depth - 1 ) ){
                    // need to be pruned
                    tree.ChangeToLeaf( pid, param.learning_rate * s.base_weight );
                    // add statistics to number of nodes pruned
                    num_pruned += 2;
                    // tail recursion
                    this->try_prune_leaf( pid, depth - 1 );
                }
            }        
            // make leaf for current node :)
            inline void make_leaf( Task tsk, double sum_grad, double sum_hess, bool compute ){
                for( unsigned i = 0; i < tsk.len; i ++ ){
                    const unsigned ridx = tsk.idset[i];
                    if( compute ){
                        sum_grad += grad[ ridx ];
                        sum_hess += hess[ ridx ];
                }
                }
                tree[ tsk.nid ].set_leaf( param.learning_rate * param.CalcWeight( sum_grad, sum_hess, tsk.parent_base_weight ) );
                this->try_prune_leaf( tsk.nid, tree.GetDepth( tsk.nid ) );
            }
        private:
            // make split for current task, re-arrange positions in idset
            inline void make_split( Task tsk, const SCEntry *entry, int num, float loss_chg, double base_weight ){
                // before split, first prepare statistics
                RTree::NodeStat &s = tree.stat( tsk.nid );
                s.loss_chg = loss_chg; 
                s.leaf_child_cnt = 0;
                s.base_weight = static_cast<float>( base_weight );
                // add childs to current node
                tree.AddChilds( tsk.nid );
                // assert that idset is sorted
                assert_sorted( tsk.idset, tsk.len );
                // use merge sort style to get the solution
                std::vector<unsigned> qset;
                for( int i = 0; i < num; i ++ ){
                    qset.push_back( entry[i].rindex );
                }
                std::sort( qset.begin(), qset.end() );
                // do merge sort style, make the other set, remove elements in qset
                for( unsigned i = 0, top = 0; i < tsk.len; i ++ ){
                    if( top < qset.size() ){
                        if( tsk.idset[ i ] != qset[ top ] ){
                            tsk.idset[ i - top ] = tsk.idset[ i ];
                        }else{
                            top ++;
                        }
                    }else{
                        tsk.idset[ i - qset.size() ] = tsk.idset[ i ];
                    }
                }
                // get two parts 
                RTree::Node &n = tree[ tsk.nid ];
                Task def_part( n.default_left() ? n.cleft() : n.cright(), tsk.idset, tsk.len - qset.size(), s.base_weight );
                Task spl_part( n.default_left() ? n.cright(): n.cleft() , tsk.idset + def_part.len, qset.size(), s.base_weight );  
                // fill back split part
                for( unsigned i = 0; i < spl_part.len; i ++ ){
                    spl_part.idset[ i ] = qset[ i ];
                }
                // add tasks to the queue
                this->add_task( def_part ); 
                this->add_task( spl_part );
            }
            // enumerate split point of the tree
            inline void enumerate_split( RTSelecter &sglobal, int tlen,
                                         double rsum_grad, double rsum_hess, double root_cost,
                                         const SCEntry *entry, size_t start, size_t end, 
                                         int findex, float parent_base_weight ){
                // local selecter
                RTSelecter slocal( param );
                if( param.default_direction != 1 ){
                    // forward process, default right
                    double csum_grad = 0.0, csum_hess = 0.0;
                    for( size_t j = start; j < end; j ++ ){
                        const unsigned ridx = entry[ j ].rindex;
                        csum_grad += grad[ ridx ];
                        csum_hess += hess[ ridx ];
                        // check for split
                        if( j == end - 1 || entry[j].fvalue + rt_2eps < entry[ j + 1 ].fvalue ){
                            if( csum_hess < param.min_child_weight ) continue;
                            const double dsum_hess = rsum_hess - csum_hess;
                            if( dsum_hess < param.min_child_weight ) break;                        
                            // change of loss 
                            double loss_chg = 
                            param.CalcCost( csum_grad, csum_hess, parent_base_weight ) + 
                                param.CalcCost( rsum_grad - csum_grad, dsum_hess, parent_base_weight ) - root_cost;
                            const int clen = static_cast<int>( j + 1 - start );
                            // add candidate to selecter
                            slocal.push_back( RTSelecter::Entry( loss_chg, start, clen, findex, 
                                                                 j == end - 1 ? entry[j].fvalue + rt_eps : 0.5 * (entry[j].fvalue+entry[j+1].fvalue),
                                                                 false ) );
                        }
                    }
                }
                if( param.default_direction != 2 ){
                    // backward process, default left
                    double csum_grad = 0.0, csum_hess = 0.0;
                    for( size_t j = end; j > start; j -- ){
                        const unsigned ridx = entry[ j - 1 ].rindex;
                        csum_grad += grad[ ridx ];
                        csum_hess += hess[ ridx ];
                        // check for split
                        if( j == start + 1 || entry[ j - 2 ].fvalue + rt_2eps < entry[ j - 1 ].fvalue ){
                            if( csum_hess < param.min_child_weight ) continue;
                            const double dsum_hess = rsum_hess - csum_hess;
                            if( dsum_hess < param.min_child_weight ) break;
                            double loss_chg = param.CalcCost( csum_grad, csum_hess, parent_base_weight ) + 
                                param.CalcCost( rsum_grad - csum_grad, dsum_hess, parent_base_weight ) - root_cost;
                            const int clen = static_cast<int>( end - j + 1 );
                            // add candidate to selecter
                            slocal.push_back( RTSelecter::Entry( loss_chg, j - 1, clen, findex,
                                                                 j == start + 1 ? entry[j-1].fvalue - rt_eps : 0.5 * (entry[j-2].fvalue + entry[j-1].fvalue), 
                                                                 true ) );
                        }
                    }
                }
                sglobal.push_back( slocal.select() );
            }
        private:
            // temporal storage for expand column major
            std::vector<size_t>  tmp_rptr;        
            // find split for current task, another implementation of expand in column major manner
            // should be more memory frugal, avoid global sorting across feature       
            inline void expand( Task tsk ){
                // assert that idset is sorted
                // if reach maximum depth, make leaf from current node
                int depth = tree.GetDepth( tsk.nid );
                // update statistiss
                if( depth > max_depth ) max_depth = depth; 
                // if bigger than max depth
                if( depth >= param.max_depth ){
                    this->make_leaf( tsk, 0.0, 0.0, true ); return;
                }
                // convert to column major CSR format
                const int nrows = tree.param.num_feature;
                if( tmp_rptr.size() == 0 ){
                    // initialize tmp storage in first usage
                    tmp_rptr.resize( nrows + 1 ); 
                    std::fill( tmp_rptr.begin(), tmp_rptr.end(), 0 );
                }
                // records the columns
                std::vector<SCEntry> entry;
                // records the active features
                std::vector<size_t>  aclist;
                utils::SparseCSRMBuilder<SCEntry,true> builder( tmp_rptr, entry, aclist );
                builder.InitBudget( nrows );
                // statistics of root
                double rsum_grad = 0.0, rsum_hess = 0.0;            
                for( unsigned i = 0; i < tsk.len; i ++ ){
                    const unsigned ridx = tsk.idset[i];
                    rsum_grad  += grad[ ridx ];
                    rsum_hess  += hess[ ridx ];
                    FMatrixS::Line sp = smat[ ridx ];
                    for( unsigned j = 0; j < sp.len; j ++ ){
                        builder.AddBudget( sp.findex[j] );
                    }
                }
                // if minimum split weight is not meet
                if( param.cannot_split( rsum_hess, depth )  ){
                    this->make_leaf( tsk, rsum_grad, rsum_hess, false ); builder.Cleanup(); return; 
                }
                builder.InitStorage();
                for( unsigned i = 0; i < tsk.len; i ++ ){
                    const unsigned ridx = tsk.idset[i];
                    FMatrixS::Line sp = smat[ ridx ];
                    for( unsigned j = 0; j < sp.len; j ++ ){
                        builder.PushElem( sp.findex[j], SCEntry( sp.fvalue[j], ridx ) );
                }                
                }
                // --- end of building column major matrix ---
                // after this point, tmp_rptr and entry is ready to use
                // global selecter
                RTSelecter sglobal( param );
                // cost root 
                const double root_cost = param.CalcRootCost( rsum_grad, rsum_hess );
                // KEY: layerwise, weight of current node if it is leaf
                const double base_weight = param.CalcWeight( rsum_grad, rsum_hess, tsk.parent_base_weight );
                // enumerate feature index
                for( size_t i = 0; i < aclist.size(); i ++ ){
                    int findex = static_cast<int>( aclist[i] );                
                    size_t start = tmp_rptr[ findex ];
                    size_t end   = tmp_rptr[ findex + 1 ];
                    utils::Assert( start < end, "bug" );
                    // local sort can be faster when the features are sparse
                    std::sort( entry.begin() + start, entry.begin() + end );
                    // local selecter
                    this->enumerate_split( sglobal, tsk.len,
                                           rsum_grad, rsum_hess, root_cost,
                                           &entry[0], start, end, findex, base_weight );
                }
                // Cleanup tmp_rptr for next use
                builder.Cleanup();
                // get the best solution
                const RTSelecter::Entry &e = sglobal.select();
                // allowed to split
                if( e.loss_chg > rt_eps ){
                    // add splits
                    tree[ tsk.nid ].set_split( e.split_index(), e.split_value, e.default_left() );
                    // re-arrange idset, push tasks
                    this->make_split( tsk, &entry[ e.start ], e.len, e.loss_chg, base_weight ); 
                }else{
                    // make leaf if we didn't meet requirement
                    this->make_leaf( tsk, rsum_grad, rsum_hess, false );
                }
            }
        private:
            // initialize the tasks
            inline void init_tasks( size_t ngrads ){
                // add group partition if necessary
                if( group_id.size() == 0 ){       
                    if( param.subsample > 1.0f - 1e-6f ){ 
                        idset.resize( 0 );
                        for( size_t i = 0; i < ngrads; i ++ ){
                            if( hess[i] < 0.0f ) continue;
                            idset.push_back( (unsigned)i );
                        } 
                    }else{
                        idset.resize( 0 );
                        for( size_t i = 0; i < ngrads; i ++ ){
                            if( random::SampleBinary( param.subsample ) != 0 ){
                                idset.push_back( (unsigned)i );
                            }
                        } 
                    }
                    this->add_task( Task( 0, &idset[0], idset.size() ) ); return;
                }
                utils::Assert( group_id.size() == ngrads, "number of groups must be exact" );            
                {// new method for grouping, use CSR builder
                    std::vector<size_t>   rptr;
                    utils::SparseCSRMBuilder<unsigned> builder( rptr, idset );
                    builder.InitBudget( tree.param.num_roots );
                    for( size_t i = 0; i < group_id.size(); i ++ ){
                        // drop invalid elements
                        if( hess[ i ] < 0.0f ) continue;
                        utils::Assert( group_id[ i ] < (unsigned)tree.param.num_roots, 
                                       "group id exceed number of roots" );
                        builder.AddBudget( group_id[ i ] );
                    }
                    builder.InitStorage();
                    for( size_t i = 0; i < group_id.size(); i ++ ){
                        // drop invalid elements
                        if( hess[ i ] < 0.0f ) continue;
                        builder.PushElem( group_id[ i ], static_cast<unsigned>(i) );
                    }
                    for( size_t i = 1; i < rptr.size(); i ++ ){
                        const size_t start = rptr[ i - 1 ], end = rptr[ i ];
                        if( start < end ){
                            this->add_task( Task( i - 1, &idset[ start ], end - start ) );
                        }
                    }
                }
            }
        public:
            RTreeUpdater( const TreeParamTrain &pparam, 
                          RTree &ptree,
                          std::vector<float> &pgrad,
                          std::vector<float> &phess,
                          const FMatrixS::Image &psmat, 
                          const std::vector<unsigned> &pgroup_id ):
                param( pparam ), tree( ptree ), grad( pgrad ), hess( phess ),
                smat( psmat ), group_id( pgroup_id ){
            }
            inline int do_boost( int &num_pruned ){
                this->init_tasks( grad.size() );
                this->max_depth = 0;
                this->num_pruned = 0;
                Task tsk;
                while( this->next_task( tsk ) ){
                    this->expand( tsk );
                }
                num_pruned = this->num_pruned;
                return max_depth;
            }
        };
        class RTreeTrainer : public IBooster{
        private:
            int silent;
            // tree of current shape 
            RTree tree;
            TreeParamTrain param;
        private:
            std::vector<float> tmp_feat;
            std::vector<bool>  tmp_funknown;
            inline void init_tmpfeat( void ){
                if( tmp_feat.size() != (size_t)tree.param.num_feature ){
                    tmp_feat.resize( tree.param.num_feature );
                    tmp_funknown.resize( tree.param.num_feature );
                    std::fill( tmp_funknown.begin(), tmp_funknown.end(), true );
                }
            }
        public:
            virtual void SetParam( const char *name, const char *val ){
                if( !strcmp( name, "silent") )  silent = atoi( val );
                param.SetParam( name, val );
                tree.param.SetParam( name, val );
            }
            virtual void LoadModel( utils::IStream &fi ){
                tree.LoadModel( fi );
            }
            virtual void SaveModel( utils::IStream &fo ) const{
                tree.SaveModel( fo );
            }
            virtual void InitModel( void ){
                tree.InitModel();
            }
        private:
            inline int get_next( int pid, float fvalue, bool is_unknown ){
                float split_value = tree[ pid ].split_cond();
                if( is_unknown ){
                    if( tree[ pid ].default_left() ) return tree[ pid ].cleft();
                else return tree[ pid ].cright();
                }else{
                    if( fvalue < split_value ) return tree[ pid ].cleft();
                    else return tree[ pid ].cright();
                }
            }
        public:
            virtual void DoBoost( std::vector<float> &grad, 
                                  std::vector<float> &hess,
                                  const FMatrixS::Image &smat,
                                  const std::vector<unsigned> &group_id ){
                utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
                if( !silent ){
                    printf( "\nbuild GBRT with %u instances\n", (unsigned)grad.size() );
                }
                // start with a id set
                RTreeUpdater updater( param, tree, grad, hess, smat, group_id );
                int num_pruned;
                tree.param.max_depth = updater.do_boost( num_pruned );
                if( !silent ){
                    printf( "tree train end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n", 
                            tree.param.num_roots, tree.num_extra_nodes(), num_pruned, tree.param.max_depth );
                }
            }
            virtual int GetLeafIndex( const std::vector<float> &feat,
                                      const std::vector<bool>  &funknown,
                                  unsigned gid = 0 ){
                // start from groups that belongs to current data
                int pid = (int)gid;
                // tranverse tree
                while( !tree[ pid ].is_leaf() ){
                    unsigned split_index = tree[ pid ].split_index();
                    pid = this->get_next( pid, feat[ split_index ], funknown[ split_index ] );
                }
                return pid;
            }
            virtual float Predict( const FMatrixS::Line &feat, unsigned gid = 0 ){
                this->init_tmpfeat();
                for( unsigned i = 0; i < feat.len; i ++ ){
                    utils::Assert( feat.findex[i] < (unsigned)tmp_funknown.size() , "input feature execeed bound" );
                    tmp_funknown[ feat.findex[i] ] = false;
                    tmp_feat[ feat.findex[i] ] = feat.fvalue[i];
                } 
                int pid = this->GetLeafIndex( tmp_feat, tmp_funknown, gid );
                // set back
                for( unsigned i = 0; i < feat.len; i ++ ){
                    tmp_funknown[ feat.findex[i] ] = true;
                }            
                return tree[ pid ].leaf_value();
            }
            virtual float Predict( const std::vector<float> &feat, 
                                   const std::vector<bool>  &funknown,
                                   unsigned gid = 0 ){
                utils::Assert( feat.size() >= (size_t)tree.param.num_feature,
                               "input data smaller than num feature" );
                int pid = this->GetLeafIndex( feat, funknown, gid );
                return tree[ pid ].leaf_value();
            }        
        public:
            RTreeTrainer( void ){ silent = 0; }
            virtual ~RTreeTrainer( void ){}
        };
    };
 };
 #endif
--- a/booster/tree/xgboost_tree_model.h
+++ b/booster/tree/xgboost_tree_model.h
@ -0,0 +1,415 @@
 #ifndef _XGBOOST_TREE_MODEL_H_
 #define _XGBOOST_TREE_MODEL_H_
 /*!
 * \file xgboost_tree_model.h
 * \brief generic definition of model structure used in tree models
 *        used to support learning of boosting tree
 * \author Tianqi Chen: tianqi.tchen@gmail.com
 */
 #include <cstring>
 #include "../../utils/xgboost_utils.h"
 #include "../../utils/xgboost_stream.h"
 namespace xgboost{
    namespace booster{
        /*!
         * \brief template class of TreeModel 
         * \tparam TSplitCond data type to indicate split condition
         * \tparam TNodeStat auxiliary statistics of node to help tree building
         */
        template<typename TSplitCond,typename TNodeStat>
        class TreeModel{
        public:
            /*! \brief data type to indicate split condition */
            typedef TNodeStat  NodeStat;
            /*! \brief auxiliary statistics of node to help tree building */
            typedef TSplitCond SplitCond;
        public:
            /*! \brief parameters of the tree */
            struct Param{
                /*! \brief number of start root */
                int num_roots;
                /*! \brief total number of nodes */
                int num_nodes;
                /*!\brief number of deleted nodes */
                int num_deleted;
                /*! \brief maximum depth, this is a statistics of the tree */
                int max_depth;
                /*! \brief  number of features used for tree construction */
                int num_feature;
                /*! \brief reserved part */
                int reserved[ 32 ];
                /*! \brief constructor */
                Param( void ){
                    max_depth = 0;
                    memset( reserved, 0, sizeof( reserved ) );
                }
                /*! 
                 * \brief set parameters from outside 
                 * \param name name of the parameter
                 * \param val  value of the parameter
                 */
                inline void SetParam( const char *name, const char *val ){
                    if( !strcmp("num_roots", name ) )    num_roots = atoi( val );
                    if( !strcmp("num_feature", name ) )  num_feature = atoi( val );
                }
            };
            /*! \brief tree node */
            class Node{
            private:
                friend class TreeModel<TSplitCond,TNodeStat>;
                /*! 
                 * \brief in leaf node, we have weights, in non-leaf nodes, 
                 *        we have split condition 
                 */
                union Info{
                    float leaf_value;
                    TSplitCond split_cond;
                };
            private:
                // pointer to parent, highest bit is used to indicate whether it's a left child or not 
                int sparent;
                // pointer to left, right
                int left, right;
                // split feature index, left split or right split depends on the highest bit
                unsigned sindex;            
                // extra info
                Info info;
            private:
                inline void set_parent( int pidx, bool is_left_child = true ){
                    if( is_left_child ) pidx |= (1U << 31);
                    this->sparent = pidx;
                }
            public:
                /*! \brief index of left child */
                inline int cleft( void ) const{
                    return this->left;
                }
                /*! \brief index of right child */
                inline int cright( void ) const{
                    return this->right;
                }
                /*! \brief feature index of split condition */
                inline unsigned split_index( void ) const{
                    return sindex & ( (1U<<31) - 1U );
                }
                /*! \brief when feature is unknown, whether goes to left child */
                inline bool default_left( void ) const{
                    return (sindex >> 31) != 0;
                } 
                /*! \brief whether current node is leaf node */
                inline bool is_leaf( void ) const{
                    return left == -1;
                }
                /*! \brief get leaf value of leaf node */
                inline float leaf_value( void ) const{
                    return (this->info).leaf_value;
                }
                /*! \brief get split condition of the node */
                inline TSplitCond split_cond( void ) const{
                    return (this->info).split_cond;
                }
                /*! \brief get parent of the node */
                inline int parent( void ) const{
                    return sparent & ( (1U << 31) - 1 );
                } 
                /*! \brief whether current node is left child */
                inline bool is_left_child( void ) const{
                    return ( sparent & (1U << 31)) != 0;
                }
                /*! \brief whether current node is root */
                inline bool is_root( void ) const{
                    return sparent == -1;
                }
                /*! 
                 * \brief set the right child 
                 * \param nide node id to right child
                 */
                inline void set_right_child( int nid ){
                    this->right = nid;
                }
                /*! 
                 * \brief set split condition of current node 
                 * \param split_index feature index to split
                 * \param split_cond  split condition
                 * \param default_left the default direction when feature is unknown
                 */
                inline void set_split( unsigned split_index, TSplitCond split_cond, bool default_left = false ){
                    if( default_left ) split_index |= (1U << 31);
                    this->sindex = split_index;
                    (this->info).split_cond = split_cond;
                }
                /*! 
                 * \brief set the leaf value of the node
                 * \param value leaf value
                 * \param right right index, could be used to store 
                 *        additional information
                 */
                inline void set_leaf( float value, int right = -1 ){
                    (this->info).leaf_value = value;
                    this->left = -1;
                    this->right = right;
                }
            };
        protected:
            // vector of nodes
            std::vector<Node> nodes;
            // stats of nodes
            std::vector<TNodeStat> stats;
        protected:
            // free node space, used during training process
            std::vector<int>  deleted_nodes;
            // allocate a new node, 
            // !!!!!! NOTE: may cause BUG here, nodes.resize
            inline int AllocNode( void ){
                if( param.num_deleted != 0 ){
                    int nd = deleted_nodes.back();
                    deleted_nodes.pop_back();
                    param.num_deleted --;
                    return nd;
                }
                int nd = param.num_nodes ++;
                nodes.resize( param.num_nodes );
                stats.resize( param.num_nodes );
                return nd;
            }
            // delete a tree node
            inline void DeleteNode( int nid ){
                utils::Assert( nid >= param.num_roots, "can not delete root");
                deleted_nodes.push_back( nid );
                nodes[ nid ].set_parent( -1 );
                param.num_deleted ++;
            }
        public:
            /*! 
             * \brief change a non leaf node to a leaf node, delete its children
             * \param rid node id of the node
             * \param new leaf value
             */
            inline void ChangeToLeaf( int rid, float value ){
                utils::Assert( nodes[ nodes[rid].left  ].is_leaf(), "can not delete a non termial child");
                utils::Assert( nodes[ nodes[rid].right ].is_leaf(), "can not delete a non termial child");
                this->DeleteNode( nodes[ rid ].left ); 
                this->DeleteNode( nodes[ rid ].right );
                nodes[ rid ].set_leaf( value );
            }
        public:
            /*! \brief model parameter */
            Param param;
        public:
            /*! \brief constructor */
            TreeModel( void ){
                param.num_nodes = 1;
                param.num_roots = 1;
                param.num_deleted = 0;
                nodes.resize( 1 );
            }
            /*! \brief get node given nid */
            inline Node &operator[]( int nid ){
                return nodes[ nid ];
            }
            /*! \brief get node statistics given nid */
            inline NodeStat &stat( int nid ){
                return stats[ nid ];
            }
            /*! \brief initialize the model */
            inline void InitModel( void ){
                param.num_nodes = param.num_roots;
                nodes.resize( param.num_nodes );
                stats.resize( param.num_nodes );
                for( int i = 0; i < param.num_nodes; i ++ ){
                    nodes[i].set_leaf( 0.0f );
                    nodes[i].set_parent( -1 );
                }
            }
            /*! 
             * \brief load model from stream
             * \param fi input stream
             */
            inline void LoadModel( utils::IStream &fi ){
                utils::Assert( fi.Read( &param, sizeof(Param) ) > 0, "TreeModel" );
                nodes.resize( param.num_nodes );
                utils::Assert( fi.Read( &nodes[0], sizeof(Node) * nodes.size() ) > 0, "TreeModel::Node" );
                deleted_nodes.resize( 0 );
                for( int i = param.num_roots; i < param.num_nodes; i ++ ){
                    if( nodes[i].is_root() ) deleted_nodes.push_back( i );
                }
                utils::Assert( (int)deleted_nodes.size() == param.num_deleted, "number of deleted nodes do not match" );
            }
            /*! 
             * \brief save model to stream
             * \param fo output stream
             */
            inline void SaveModel( utils::IStream &fo ) const{
                utils::Assert( param.num_nodes == (int)nodes.size() );
                fo.Write( &param, sizeof(Param) );
                fo.Write( &nodes[0], sizeof(Node) * nodes.size() );
            }
            /*! 
             * \brief add child nodes to node
             * \param nid node id to add childs
             */
            inline void AddChilds( int nid ){
                int pleft  = this->AllocNode();
                int pright = this->AllocNode();
                nodes[ nid ].left  = pleft;
                nodes[ nid ].right = pright;
                nodes[ nodes[ nid ].left  ].set_parent( nid, true );
                nodes[ nodes[ nid ].right ].set_parent( nid, false );
            }
            /*! 
             * \brief only add a right child to a leaf node 
             * \param node id to add right child
             */
            inline void AddRightChild( int nid ){
                int pright = this->AllocNode();
                nodes[ nid ].right  = pright;
                nodes[ nodes[ nid ].right  ].set_parent( nid, false );
            }
            /*!
             * \brief get current depth
             * \param nid node id
             * \param pass_rchild whether right child is not counted in depth
             */
            inline int GetDepth( int nid, bool pass_rchild = false ) const{
                int depth = 0;
                while( !nodes[ nid ].is_root() ){
                    if( !pass_rchild || nodes[ nid ].is_left_child() ) depth ++;
                    nid = nodes[ nid ].parent();
                }
                return depth;
            }
            /*! \brief number of extra nodes besides the root */
            inline int num_extra_nodes( void ) const {
                return param.num_nodes - param.num_roots - param.num_deleted;
            }
        };
    };
    namespace booster{
        /*! \brief training parameters for regression tree */
        struct TreeParamTrain{
            // learning step size for a time
            float learning_rate;
            // minimum loss change required for a split
            float min_split_loss;
            // maximum depth of a tree
            int   max_depth;
            //----- the rest parameters are less important ----
            // minimum amount of hessian(weight) allowed in a child
            float min_child_weight;
            // weight decay parameter used to control leaf fitting
            float reg_lambda;
            // reg method
            int   reg_method;
            // default direction choice
            int   default_direction;
            // whether we want to do subsample
            float subsample;
            // whether to use layerwise aware regularization
            int   use_layerwise;
            /*! \brief constructor */
            TreeParamTrain( void ){
                learning_rate = 0.3f;
                min_child_weight = 1.0f;
                max_depth = 6;
                reg_lambda = 1.0f;
                reg_method = 2;
                default_direction = 0;
                subsample = 1.0f;
                use_layerwise = 0;
            }
            /*! 
             * \brief set parameters from outside 
             * \param name name of the parameter
             * \param val  value of the parameter
             */            
            inline void SetParam( const char *name, const char *val ){
                // sync-names 
                if( !strcmp( name, "gamma") )  min_split_loss = (float)atof( val );
                if( !strcmp( name, "eta") )    learning_rate  = (float)atof( val );
                if( !strcmp( name, "lambda") ) reg_lambda = (float)atof( val );
                // normal tree prameters
                if( !strcmp( name, "learning_rate") )     learning_rate = (float)atof( val );
                if( !strcmp( name, "min_child_weight") )  min_child_weight = (float)atof( val );
                if( !strcmp( name, "min_split_loss") )    min_split_loss = (float)atof( val );
                if( !strcmp( name, "max_depth") )         max_depth = atoi( val );           
                if( !strcmp( name, "reg_lambda") )        reg_lambda = (float)atof( val );
                if( !strcmp( name, "reg_method") )        reg_method = (float)atof( val );
                if( !strcmp( name, "subsample") )         subsample  = (float)atof( val );
                if( !strcmp( name, "use_layerwise") )     use_layerwise = atoi( val );
                if( !strcmp( name, "default_direction") ) {
                    if( !strcmp( val, "learn") )  default_direction = 0;
                    if( !strcmp( val, "left") )   default_direction = 1;
                    if( !strcmp( val, "right") )  default_direction = 2;
                }
            }
        protected:
            // functions for L1 cost
            static inline double ThresholdL1( double w, double lambda ){
                if( w > +lambda ) return w - lambda;
                if( w < -lambda ) return w + lambda;
                return 0.0;
            }
            inline double CalcWeight( double sum_grad, double sum_hess )const{
                if( sum_hess < min_child_weight ){
                    return 0.0;
                }else{
                    switch( reg_method ){
                    case 1: return - ThresholdL1( sum_grad, reg_lambda ) / sum_hess;
                    case 2: return - sum_grad / ( sum_hess + reg_lambda );
                        // elstic net
                    case 3: return - ThresholdL1( sum_grad, 0.5 * reg_lambda ) / ( sum_hess + 0.5 * reg_lambda );
                    default: return - sum_grad / sum_hess;
                    }
                }
            }
        private:
            inline static double Sqr( double a ){
                return a * a;
            }
        public:
            // calculate the cost of loss function
            inline double CalcCost( double sum_grad, double sum_hess ) const{
                if( sum_hess < min_child_weight ){
                    return 0.0;
                }
                switch( reg_method ){
                case 1 : return Sqr( ThresholdL1( sum_grad, reg_lambda ) ) / sum_hess;
                case 2 : return Sqr( sum_grad ) / ( sum_hess + reg_lambda );
                    // elstic net
                case 3 : return Sqr( ThresholdL1( sum_grad, 0.5 * reg_lambda ) ) / ( sum_hess + 0.5 * reg_lambda );
                default: return Sqr( sum_grad ) / sum_hess;
                }        
            }
            // KEY:layerwise
            // calculate cost of root
            inline double CalcRootCost( double sum_grad, double sum_hess ) const{
                if( use_layerwise == 0 ) return this->CalcCost( sum_grad, sum_hess );
                else return 0.0;
            }
            // KEY:layerwise
            // calculate the cost after split
            // base_weight: the base_weight of parent           
            inline double CalcCost( double sum_grad, double sum_hess, double base_weight ) const{
                if( use_layerwise == 0 ) return this->CalcCost( sum_grad, sum_hess );
                else return this->CalcCost( sum_grad + sum_hess * base_weight, sum_hess );
            }
            // calculate the weight of leaf
            inline double CalcWeight( double sum_grad, double sum_hess, double parent_base_weight )const{
                if( use_layerwise == 0 ) return CalcWeight( sum_grad, sum_hess );
                else return parent_base_weight + CalcWeight( sum_grad + parent_base_weight * sum_hess, sum_hess );
            }           
            /*! \brief given the loss change, whether we need to invode prunning */
            inline bool need_prune( double loss_chg, int depth ) const{
                return loss_chg < min_split_loss;
            }
            /*! \brief whether we can split with current hessian */
            inline bool cannot_split( double sum_hess, int depth ) const{
                return sum_hess < min_child_weight * 2.0; 
            }
        };
    };
 };
 #endif
--- a/booster/xgboost.cpp
+++ b/booster/xgboost.cpp
@ -10,6 +10,9 @@
 #include "xgboost.h"
 #include "../utils/xgboost_utils.h"
 #include "xgboost_gbmbase.h"
 // implementations of boosters
 #include "tree/xgboost_svdf_tree.hpp"
 namespace xgboost{
    namespace booster{
        /*! 
--- a/booster/xgboost.h
+++ b/booster/xgboost.h
@ -3,6 +3,7 @@
 /*!
 * \file xgboost.h
 * \brief the general gradient boosting interface
 * 
 * \author Tianqi Chen: tianqi.tchen@gmail.com
 */
 #include <vector>
--- a/booster/xgboost_gbmbase.h
+++ b/booster/xgboost_gbmbase.h
@ -53,7 +53,7 @@ namespace xgboost{
                /*! \brief type of tree used */
                int booster_type;
                /*! \brief number of root: default 0, means single tree */
-                int num_root;
+                int num_roots;
                /*! \brief number of features to be used by boosters */
                int num_feature;
                /*! \brief size of predicton buffer allocated for buffering boosting computation */
@ -69,7 +69,7 @@ namespace xgboost{
                Param( void ){
                    num_boosters = 0; 
                    booster_type = 0;
-                    num_root = num_feature = 0;                    
+                    num_roots = num_feature = 0;                    
                    do_reboost = 0;
                    num_pbuffer = 0;
                    memset( reserved, 0, sizeof( reserved ) );     
@ -83,7 +83,7 @@ namespace xgboost{
                    if( !strcmp("booster_type", name ) )     booster_type = atoi( val );
                    if( !strcmp("num_pbuffer", name ) )      num_pbuffer = atoi( val );
                    if( !strcmp("do_reboost", name ) )       do_reboost  = atoi( val );
-                    if( !strcmp("bst:num_root", name ) )     num_root = atoi( val );
+                    if( !strcmp("bst:num_roots", name ) )    num_roots = atoi( val );
                    if( !strcmp("bst:num_feature", name ) )  num_feature = atoi( val );
                }
            };
@ -98,9 +98,12 @@ namespace xgboost{
             * \param val  value of the parameter
             */
            inline void SetParam( const char *name, const char *val ){
-                if( strncmp( name, "bst:", 4 ) == 0 ){
+                if( !strncmp( name, "bst:", 4 ) ){
                    cfg.PushBack( name + 4, val );
                }
                if( !strcmp( name, "silent") ){
                    cfg.PushBack( name, val );
                }
                if( boosters.size() == 0 ) param.SetParam( name, val );
            }
            /*! 
--- a/utils/xgboost_matrix_csr.h
+++ b/utils/xgboost_matrix_csr.h
@ -0,0 +1,152 @@
 /*!
 * \file xgboost_matrix_csr.h
 * \brief this file defines some easy to use STL based class for in memory sparse CSR matrix
 * \author Tianqi Chen: tianqi.tchen@gmail.com
 */
 #ifndef _XGBOOST_MATRIX_CSR_H_
 #define _XGBOOST_MATRIX_CSR_H_
 #include <vector>
 #include <algorithm>
 #include "xgboost_utils.h"
 namespace xgboost{
    namespace utils{
        /*! 
         * \brief a class used to help construct CSR format matrix, 
         *        can be used to convert row major CSR to column major CSR
         * \tparam IndexType type of index used to store the index position, usually unsigned or size_t
         * \tparam whether enabling the usage of aclist, this option must be enabled manually
         */
        template<typename IndexType,bool UseAcList = false>
        struct SparseCSRMBuilder{
        private:
            /*! \brief dummy variable used in the indicator matrix construction */
            std::vector<size_t> dummy_aclist;
            /*! \brief pointer to each of the row */
            std::vector<size_t>    &rptr;
            /*! \brief index of nonzero entries in each row */
            std::vector<IndexType> &findex;
            /*! \brief a list of active rows, used when many rows are empty */
            std::vector<size_t>    &aclist;
        public:
            SparseCSRMBuilder( std::vector<size_t> &p_rptr,
                               std::vector<IndexType> &p_findex )
                :rptr(p_rptr), findex( p_findex ), aclist( dummy_aclist ){
                Assert( !UseAcList, "enabling bug" );
            }  
            /*! \brief use with caution! rptr must be cleaned before use */     
            SparseCSRMBuilder( std::vector<size_t> &p_rptr,
                               std::vector<IndexType> &p_findex,
                               std::vector<size_t> &p_aclist )
                :rptr(p_rptr), findex( p_findex ), aclist( p_aclist ){
                Assert( UseAcList, "must manually enable the option use aclist" );
            }
        public:
            /*! 
             * \brief step 1: initialize the number of rows in the data
             * \nrows number of rows in the matrix 
             */
            inline void InitBudget( size_t nrows ){
                if( !UseAcList ){
                    rptr.resize( nrows + 1 );
                    std::fill( rptr.begin(), rptr.end(), 0 );
                }else{
                    Assert( nrows + 1 == rptr.size(), "rptr must be initialized already" );
                    this->Cleanup();
                }
            }
            /*! 
             * \brief step 2: add budget to each rows, this function is called when aclist is used
             * \param row_id the id of the row
             * \param nelem  number of element budget add to this row
             */
            inline void AddBudget( size_t row_id, size_t nelem = 1 ){
                if( UseAcList ){
                    if( rptr[ row_id + 1 ] == 0 ) aclist.push_back( row_id );
                }
                rptr[ row_id + 1 ] += nelem;
            }
            /*! \brief step 3: initialize the necessary storage */
            inline void InitStorage( void ){
                // initialize rptr to be beginning of each segment
                size_t start = 0;
                if( !UseAcList ){
                    for( size_t i = 1; i < rptr.size(); i ++ ){
                        size_t rlen = rptr[ i ];
                        rptr[ i ] = start;
                        start += rlen;
                    }
                }else{
                    // case with active list
                    std::sort( aclist.begin(), aclist.end() );
                    for( size_t i = 0; i < aclist.size(); i ++ ){
                        size_t ridx = aclist[ i ];
                        size_t rlen = rptr[ ridx + 1 ];
                        rptr[ ridx + 1 ] = start;
                        // set previous rptr to right position if previous feature is not active
                        if( i == 0 || ridx != aclist[i-1] + 1 ) rptr[ ridx ] = start;
                        start += rlen;
                    }
                }
                findex.resize( start );
            }
            /*! 
             * \brief step 4: 
             * used in indicator matrix construction, add new 
             * element to each row, the number of calls shall be exactly same as add_budget 
             */
            inline void PushElem( size_t row_id, IndexType col_id ){ 
                size_t &rp = rptr[ row_id + 1 ];
                findex[ rp ++ ] = col_id;
            }
            /*! 
             * \brief step 5: only needed when aclist is used
             * clean up the rptr for next usage
             */        
            inline void Cleanup( void ){
                Assert( UseAcList, "this function can only be called use AcList" );
                for( size_t i = 0; i < aclist.size(); i ++ ){
                    const size_t ridx = aclist[i];
                    rptr[ ridx ] = 0; rptr[ ridx + 1 ] = 0;
                }
                aclist.clear();
            }
        };
    };
    namespace utils{
        /*! 
         * \brief simple sparse matrix container
         * \tparam IndexType type of index used to store the index position, usually unsigned or size_t
         */    
        template<typename IndexType>
        struct SparseCSRMat{
        private:
            /*! \brief pointer to each of the row */
            std::vector<size_t>    rptr;
            /*! \brief index of nonzero entries in each row */
            std::vector<IndexType> findex;
        public:
            /*! \brief matrix builder*/
            SparseCSRMBuilder<IndexType> builder;
        public:
            SparseCSRMat( void ):builder( rptr, findex ){
            }  
        public:
            /*! \return number of rows in the matrx */
            inline size_t NumRow( void ) const{
                return rptr.size() - 1;
            }
            /*! \return number of elements r-th row */
            inline size_t NumElem( size_t r ) const{
                return rptr[ r + 1 ] - rptr[ r ];
            }
            /*! \return r-th row */        
            inline const IndexType *operator[]( size_t r ) const{
                return &findex[ rptr[r] ];
            } 
        };    
    };
 };
 #endif
--- a/utils/xgboost_random.h
+++ b/utils/xgboost_random.h
@ -0,0 +1,131 @@
 #ifndef _XGBOOST_RANDOM_H_
 #define _XGBOOST_RANDOM_H_
 /*!
 * \file xgboost_random.h
 * \brief PRNG to support random number generation
 * \author Tianqi Chen: tianqi.tchen@gmail.com
 *
 * Use standard PRNG from stdlib
 */
 #include <cmath>
 #include <cstdlib>
 #include <vector>
 #ifdef _MSC_VER
 typedef unsigned char uint8_t;
 typedef unsigned short int uint16_t;
 typedef unsigned int  uint32_t;
 #else
 #include <inttypes.h>
 #endif
 /*! namespace of PRNG */
 namespace xgboost{
    namespace random{
        /*! \brief seed the PRNG */
        inline void Seed( uint32_t seed ){
            srand( seed );
        }
        /*! \brief return a real number uniform in [0,1) */
        inline double NextDouble(){
            return static_cast<double>( rand() ) / (static_cast<double>( RAND_MAX )+1.0);
        }
        /*! \brief return a real numer uniform in (0,1) */
        inline double NextDouble2(){
            return (static_cast<double>( rand() ) + 1.0 ) / (static_cast<double>(RAND_MAX) + 2.0);
        }
    };
    namespace random{
        /*! \brief return a random number */
        inline uint32_t NextUInt32( void ){
            return (uint32_t)rand();
        }
        /*! \brief return a random number in n */
        inline uint32_t NextUInt32( uint32_t n ){
            return (uint32_t) floor( NextDouble() * n ) ;
        }  
        /*! \brief return  x~N(0,1) */
        inline double SampleNormal(){
            double x,y,s;
            do{
                x = 2 * NextDouble2() - 1.0;
                y = 2 * NextDouble2() - 1.0;
                s = x*x + y*y;
            }while( s >= 1.0 || s == 0.0 );
            return x * sqrt( -2.0 * log(s) / s ) ;
        }
        /*! \brief return iid x,y ~N(0,1) */
        inline void SampleNormal2D( double &xx, double &yy ){
            double x,y,s;
            do{
                x = 2 * NextDouble2() - 1.0;
                y = 2 * NextDouble2() - 1.0;
                s = x*x + y*y;
            }while( s >= 1.0 || s == 0.0 );
            double t = sqrt( -2.0 * log(s) / s ) ;
            xx = x * t; 
            yy = y * t;
        }
        /*! \brief return  x~N(mu,sigma^2) */
        inline double SampleNormal( double mu, double sigma ){
            return SampleNormal() * sigma + mu;
        }
        /*! \brief  return 1 with probability p, coin flip */
        inline int SampleBinary( double p ){
            return NextDouble() <  p;  
        }
        /*! \brief  return distribution from Gamma( alpha, beta ) */
        inline double SampleGamma( double alpha, double beta ) {
            if ( alpha < 1.0 ) {
                double u;
                do {
                    u = NextDouble();
                } while (u == 0.0);
                return SampleGamma(alpha + 1.0, beta) * pow(u, 1.0 / alpha);
            } else {
                double d,c,x,v,u;
                d = alpha - 1.0/3.0;
                c = 1.0 / sqrt( 9.0 * d );
                do {
                    do {
                        x = SampleNormal();
                        v = 1.0 + c*x;
                    } while ( v <= 0.0 );
                    v = v * v * v;
                    u = NextDouble();
                } while ( (u >= (1.0 - 0.0331 * (x*x) * (x*x)))
                          && (log(u) >= (0.5 * x * x + d * (1.0 - v + log(v)))) );
                return d * v / beta;
            }
        }
        template<typename T>
        inline void Exchange( T &a, T &b ){
            T c;
            c = a;
            a = b;
            b = c;
        }
        template<typename T>
        inline void Shuffle( T *data, size_t sz ){
            if( sz == 0 ) return;
            for( uint32_t i = (uint32_t)sz - 1; i > 0; i-- ){
                Exchange( data[i], data[ NextUInt32( i+1 ) ] );
            } 
        }
        // random shuffle the data inside, require PRNG 
        template<typename T>
        inline void Shuffle( std::vector<T> &data ){
            Shuffle( &data[0], data.size() );
        }
    };
 };
 #endif