add linear booster

2014-02-08 11:24:35 -08:00 · 2014-02-08 11:24:35 -08:00 · 33acaaa3ae
commit 33acaaa3ae
parent d656d9df2c
6 changed files with 306 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -4,9 +4,9 @@ Creater: Tianqi Chen: tianqi.tchen AT gmail
 General Purpose Gradient Boosting Library
-Intention: A stand-alone efficient library to do machine learning in functional space
+Goal: A stand-alone efficient library to do learning via boosting in functional space
-Planned key components (TODO):
+Planned key components:
 (1) Gradient boosting models: 
    - regression tree
@ -17,11 +17,11 @@ Planned key components (TODO):
    - ranking
    - matrix factorization
    - structured prediction
-(3) OpenMP support for parallelization(optional)
+(3) OpenMP implementation(optional)
 File extension convention: 
 (1) .h are interface, utils anddata structures, with detailed comment; 
 (2) .cpp are implementations that will be compiled, with less comment; 
 (3) .hpp are implementations that will be included by .cpp, with less comment
-Parameters Usage: see wiki in github
+Parameters Usage: https://github.com/tqchen/xgboost/wiki
--- a/booster/linear/xgboost_linear.hpp
+++ b/booster/linear/xgboost_linear.hpp
@ -0,0 +1,258 @@
 #ifndef _XGBOOST_LINEAR_HPP_
 #define _XGBOOST_LINEAR_HPP_
 /*!
 * \file xgboost_linear.h
 * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
 *        the update rule is coordinate descent
 * \author Tianqi Chen: tianqi.tchen@gmail.com
 */
 #include <vector>
 #include <algorithm>
 #include "../xgboost.h"
 #include "../../utils/xgboost_utils.h"
 #include "../../utils/xgboost_matrix_csr.h"
 namespace xgboost{
    namespace booster{
        /*! \brief linear model, with L1/L2 regularization */
        class LinearBooster : public IBooster{
        public:
            LinearBooster( void ){ silent = 0;}
            virtual ~LinearBooster( void ){}
        public:
            virtual void SetParam( const char *name, const char *val ){
                if( !strcmp( name, "silent") )  silent = atoi( val );
                if( model.weight.size() == 0 )  model.param.SetParam( name, val );
                param.SetParam( name, val );
            }
            virtual void LoadModel( utils::IStream &fi ){
                model.LoadModel( fi );
            }
            virtual void SaveModel( utils::IStream &fo ) const{
                model.SaveModel( fo );
            }
            virtual void InitModel( void ){
                model.InitModel();
            }
        public:
            virtual void DoBoost( std::vector<float> &grad, 
                                  std::vector<float> &hess,
                                  const FMatrixS::Image &smat,
                                  const std::vector<unsigned> &root_index ){
                utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
                this->Update( smat, grad, hess );
            }            
            virtual float Predict( const FMatrixS::Line &sp, unsigned rid = 0 ){
                float sum = model.bias();
                for( unsigned i = 0; i < sp.len; i ++ ){
                    sum += model.weight[ sp.findex[i] ] * sp.fvalue[i];
                }
                return sum;
            }
            virtual float Predict( const std::vector<float> &feat, 
                                   const std::vector<bool>  &funknown,
                                   unsigned rid = 0 ){
                float sum = model.bias();
                for( size_t i = 0; i < feat.size(); i ++ ){
                    if( funknown[i] ) continue;
                    sum += model.weight[ i ] * feat[ i ];
                }
                return sum;
            }
        protected:
            // training parameter
            struct ParamTrain{
                /*! \brief learning_rate */
                float learning_rate;
                /*! \brief regularization weight for L2 norm */
                float reg_lambda;
                /*! \brief regularization weight for L1 norm */
                float reg_alpha;
                 /*! \brief regularization weight for L2 norm  in bias */               
                float reg_lambda_bias;
                ParamTrain( void ){
                    reg_alpha = 0.0f; reg_lambda = 0.0f; reg_lambda_bias = 0.0f;
                    learning_rate = 1.0f;
                }            
                inline void SetParam( const char *name, const char *val ){
                    // sync-names
                    if( !strcmp( "eta", name ) )    learning_rate = (float)atof( val );
                    if( !strcmp( "lambda", name ) ) reg_lambda = (float)atof( val );
                    if( !strcmp( "alpha", name ) )  reg_alpha  = (float)atof( val );
                    if( !strcmp( "lambda_bias", name ) ) reg_lambda_bias = (float)atof( val );
                    // real names
                    if( !strcmp( "learning_rate", name ) ) learning_rate = (float)atof( val );     
                    if( !strcmp( "reg_lambda", name ) )    reg_lambda = (float)atof( val );
                    if( !strcmp( "reg_alpha", name ) )     reg_alpha = (float)atof( val );
                    if( !strcmp( "reg_lambda_bias", name ) )    reg_lambda_bias = (float)atof( val );
                }
                // given original weight calculate delta 
                inline double CalcDelta( double sum_grad, double sum_hess, double w ){
                    if( sum_hess < 1e-5f ) return 0.0f;
                    double tmp = w - ( sum_grad + reg_lambda*w )/( sum_hess + reg_lambda );
                    if ( tmp >=0 ){
                        return std::max(-( sum_grad + reg_lambda*w + reg_alpha)/(sum_hess+reg_lambda),-w);
                    }else{
                        return std::min(-( sum_grad + reg_lambda*w - reg_alpha)/(sum_hess+reg_lambda),-w);
                    }
                }
                // given original weight calculate delta bias
                inline double CalcDeltaBias( double sum_grad, double sum_hess, double w ){
                    return - (sum_grad + reg_lambda_bias*w) / (sum_hess + reg_lambda_bias );
                }
            };
            // model for linear booster
            class Model{
            public:
                // model parameter
                struct Param{
                    // number of feature dimension
                    int num_feature;
                    // reserved field
                    int reserved[ 32 ];
                    // constructor
                    Param( void ){
                        num_feature = 0;
                        memset( reserved, 0, sizeof(reserved) );
                    }
                    inline void SetParam( const char *name, const char *val ){
                        if( !strcmp( name, "num_feature" ) )  num_feature = atoi( val );
                    }
                };
            public:
                Param param;
                // weight for each of feature, bias is the last one
                std::vector<float> weight;
            public:
                // initialize the model parameter
                inline void InitModel( void ){
                    // bias is the last weight
                    weight.resize( param.num_feature + 1 );
                    std::fill( weight.begin(), weight.end(), 0.0f );
                }
                // save the model to file 
                inline void SaveModel( utils::IStream &fo ) const{
                    fo.Write( &param, sizeof(Param) );
                    fo.Write( &weight[0], sizeof(float) * weight.size() );
                }
                // load model from file
                inline void LoadModel( utils::IStream &fi ){
                    utils::Assert( fi.Read( &param, sizeof(Param) ) != 0, "Load LinearBooster" );
                    weight.resize( param.num_feature + 1 );
                    utils::Assert( fi.Read( &weight[0], sizeof(float) * weight.size() ) != 0, "Load LinearBooster" );
                }
                // model bias
                inline float &bias( void ){
                    return weight.back();
                }
            };
            /*! \brief array entry for column based feature construction */
            struct SCEntry{
                /*! \brief feature value */
                float    fvalue;
                /*! \brief row index related to each row */
                unsigned rindex;
                /*! \brief default constructor */
                SCEntry( void ){}
                /*! \brief constructor using entry */
                SCEntry( float fvalue, unsigned rindex ){
                    this->fvalue = fvalue; this->rindex = rindex;
                }
            };
        private:
            int silent;
        protected:
            Model model;
            ParamTrain param;
        protected:
            inline void UpdateWeights( std::vector<float> &grad,                       
                                       const std::vector<float> &hess,
                                       const std::vector<size_t> &rptr,
                                       const std::vector<SCEntry> &entry ){
                {// optimize bias
                    double sum_grad = 0.0, sum_hess = 0.0;
                    for( size_t i = 0; i < grad.size(); i ++ ){
                        sum_grad += grad[ i ]; sum_hess += hess[ i ];
                    }
                    // remove bias effect
                    double dw = param.learning_rate * param.CalcDeltaBias( sum_grad, sum_hess, model.bias() );
                    model.bias() += dw;
                    // update grad value 
                    for( size_t i = 0; i < grad.size(); i ++ ){
                        grad[ i ] += dw * hess[ i ];
                    }
                }
                // optimize weight
                const int nfeat = model.param.num_feature;                               
                for( int i = 0; i < nfeat; i ++ ){
                    size_t start = rptr[i];
                    size_t end   = rptr[i+1];
                    if( start >= end ) continue;
                    double sum_grad = 0.0, sum_hess = 0.0;
                    for( size_t j = start; j < end; j ++ ){
                        const float v = entry[j].fvalue; 
                        sum_grad += grad[ entry[j].rindex ] * v;
                        sum_hess += hess[ entry[j].rindex ] * v * v;
                    }
                    float w = model.weight[ i ];
                    double dw = param.learning_rate * param.CalcDelta( sum_grad, sum_hess, w );
                    model.weight[ i ] += dw;
                    // update grad value 
                    for( size_t j = start; j < end; j ++ ){
                        const float v = entry[j].fvalue; 
                        grad[ entry[j].rindex ] += hess[ entry[j].rindex ] * v * dw;
                    }
                }
            }
            inline void MakeCmajor( std::vector<size_t> &rptr,
                                    std::vector<SCEntry> &entry,
                                    const std::vector<float> &hess,                                 
                                    const FMatrixS::Image &smat ){
                // transform to column order first
                const int nfeat = model.param.num_feature;            
                // build CSR column major format data
                utils::SparseCSRMBuilder<SCEntry> builder( rptr, entry );
                builder.InitBudget( nfeat );
                for( unsigned i = 0; i < (unsigned)hess.size(); i ++ ){
                // skip deleted entries
                    if( hess[i] < 0.0f ) continue;
                    // add sparse part budget
                    FMatrixS::Line sp = smat[ i ];
                    for( unsigned j = 0; j < sp.len; j ++ ){
                        if( j == 0 || sp.findex[j-1] != sp.findex[j] ){
                            builder.AddBudget( sp.findex[j] );
                        }
                    }
                }
                builder.InitStorage();
                for( unsigned i = 0; i < (unsigned)hess.size(); i ++ ){
                    // skip deleted entries
                    if( hess[i] < 0.0f ) continue;
                    // add sparse part budget
                    FMatrixS::Line sp = smat[ i ];
                    for( unsigned j = 0; j < sp.len; j ++ ){
                        // skip duplicated terms
                        if( j == 0 || sp.findex[j-1] != sp.findex[j] ){
                            builder.PushElem( sp.findex[j], SCEntry( sp.fvalue[j], i ) );
                        }
                    }
                }
            }
        protected:
            virtual void Update( const FMatrixS::Image &smat,
                                 std::vector<float> &grad,
                                 const std::vector<float> &hess ){
                std::vector<size_t> rptr;
                std::vector<SCEntry> entry;
                this->MakeCmajor( rptr, entry, hess, smat );
                this->UpdateWeights( grad, hess, rptr, entry );
            }
        };
    };
 };
 #endif
--- a/booster/xgboost.cpp
+++ b/booster/xgboost.cpp
@ -12,6 +12,7 @@
 #include "xgboost_gbmbase.h"
 // implementations of boosters
 #include "tree/xgboost_svdf_tree.hpp"
 #include "linear/xgboost_linear.hpp"
 namespace xgboost{
    namespace booster{
@ -21,7 +22,11 @@ namespace xgboost{
         * \return the pointer to the gradient booster created
         */
        IBooster *CreateBooster( int booster_type ){
-            return new RTreeTrainer();
+            switch( booster_type ){
            case 0: return new RTreeTrainer();
            case 1: return new LinearBooster();
            default: utils::Error("unknown booster_type"); return NULL;
            }
        }
    };
 };
--- a/booster/xgboost.h
+++ b/booster/xgboost.h
@ -51,7 +51,8 @@ namespace xgboost{
            virtual void InitModel( void ) = 0;
        public:
            /*! 
-             * \brief do gradient boost training for one step, using the information given
+             * \brief do gradient boost training for one step, using the information given, 
             *        Note: content of grad and hess can change after DoBoost
             * \param grad first order gradient of each instance
             * \param hess second order gradient of each instance
             * \param feats features of each instance
@ -64,7 +65,7 @@ namespace xgboost{
                                  const std::vector<unsigned> &root_index ) = 0;
            /*! 
             * \brief predict values for given sparse feature vector
-             *   NOTE: in tree implementation, this is not threadsafe
+             *   NOTE: in tree implementation, this is not threadsafe, used dense version to ensure threadsafety
             * \param feat vector in sparse format
             * \param rid root id of current instance, default = 0
             * \return prediction 
--- a/booster/xgboost_data.h
+++ b/booster/xgboost_data.h
@ -129,6 +129,40 @@ namespace xgboost{
                sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
                return sp;
            }
        public:
            /*!
             * \brief save data to binary stream 
             *        note: since we have size_t in row_ptr, 
             *              the function is not consistent between 64bit and 32bit machine
             * \param fo output stream
             */
            inline void SaveBinary( utils::IStream &fo ) const{
                size_t nrow = this->NumRow();
                fo.Write( &nrow, sizeof(size_t) );
                fo.Write( &row_ptr[0], row_ptr.size() * sizeof(size_t) );
                if( findex.size() != 0 ){
                    fo.Write( &findex[0] , findex.size() * sizeof(bst_uint) );
                    fo.Write( &fvalue[0] , fvalue.size() * sizeof(bst_float) );
                }
            }
            /*!
             * \brief load data from binary stream 
             *        note: since we have size_t in row_ptr, 
             *              the function is not consistent between 64bit and 32bit machine
             * \param fi output stream
             */
            inline void LoadBinary( utils::IStream &fi ){
                size_t nrow;
                utils::Assert( fi.Read( &nrow, sizeof(size_t) ) != 0, "Load FMatrixS" );
                row_ptr.resize( nrow + 1 );
                utils::Assert( fi.Read( &row_ptr[0], row_ptr.size() * sizeof(size_t) ), "Load FMatrixS" );
                findex.resize( row_ptr.back() ); fvalue.resize( row_ptr.back() );                
                if( findex.size() != 0 ){
                    utils::Assert( fi.Read( &findex[0] , findex.size() * sizeof(bst_uint) ) , "Load FMatrixS" );
                    utils::Assert( fi.Read( &fvalue[0] , fvalue.size() * sizeof(bst_float) ), "Load FMatrixS" );
                }
            }
        };
    };
 };
--- a/booster/xgboost_gbmbase.h
+++ b/booster/xgboost_gbmbase.h
@ -172,6 +172,7 @@ namespace xgboost{
        public:
            /*! 
             * \brief do gradient boost training for one step, using the information given
             *        Note: content of grad and hess can change after DoBoost
             * \param grad first order gradient of each instance
             * \param hess second order gradient of each instance
             * \param feats features of each instance