diff --git a/README.md b/README.md index 9b9fe1cb8..4f0518f58 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ Creater: Tianqi Chen: tianqi.tchen AT gmail General Purpose Gradient Boosting Library -Intention: A stand-alone efficient library to do machine learning in functional space +Goal: A stand-alone efficient library to do learning via boosting in functional space -Planned key components (TODO): +Planned key components: (1) Gradient boosting models: - regression tree @@ -17,11 +17,11 @@ Planned key components (TODO): - ranking - matrix factorization - structured prediction -(3) OpenMP support for parallelization(optional) +(3) OpenMP implementation(optional) File extension convention: (1) .h are interface, utils anddata structures, with detailed comment; (2) .cpp are implementations that will be compiled, with less comment; (3) .hpp are implementations that will be included by .cpp, with less comment -Parameters Usage: see wiki in github +Parameters Usage: https://github.com/tqchen/xgboost/wiki diff --git a/booster/linear/xgboost_linear.hpp b/booster/linear/xgboost_linear.hpp new file mode 100644 index 000000000..852bbc7fe --- /dev/null +++ b/booster/linear/xgboost_linear.hpp @@ -0,0 +1,258 @@ +#ifndef _XGBOOST_LINEAR_HPP_ +#define _XGBOOST_LINEAR_HPP_ +/*! + * \file xgboost_linear.h + * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net + * the update rule is coordinate descent + * \author Tianqi Chen: tianqi.tchen@gmail.com + */ +#include +#include + +#include "../xgboost.h" +#include "../../utils/xgboost_utils.h" +#include "../../utils/xgboost_matrix_csr.h" + +namespace xgboost{ + namespace booster{ + /*! \brief linear model, with L1/L2 regularization */ + class LinearBooster : public IBooster{ + public: + LinearBooster( void ){ silent = 0;} + virtual ~LinearBooster( void ){} + public: + virtual void SetParam( const char *name, const char *val ){ + if( !strcmp( name, "silent") ) silent = atoi( val ); + if( model.weight.size() == 0 ) model.param.SetParam( name, val ); + param.SetParam( name, val ); + } + virtual void LoadModel( utils::IStream &fi ){ + model.LoadModel( fi ); + } + virtual void SaveModel( utils::IStream &fo ) const{ + model.SaveModel( fo ); + } + virtual void InitModel( void ){ + model.InitModel(); + } + public: + virtual void DoBoost( std::vector &grad, + std::vector &hess, + const FMatrixS::Image &smat, + const std::vector &root_index ){ + utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" ); + this->Update( smat, grad, hess ); + } + virtual float Predict( const FMatrixS::Line &sp, unsigned rid = 0 ){ + float sum = model.bias(); + for( unsigned i = 0; i < sp.len; i ++ ){ + sum += model.weight[ sp.findex[i] ] * sp.fvalue[i]; + } + return sum; + } + virtual float Predict( const std::vector &feat, + const std::vector &funknown, + unsigned rid = 0 ){ + float sum = model.bias(); + for( size_t i = 0; i < feat.size(); i ++ ){ + if( funknown[i] ) continue; + sum += model.weight[ i ] * feat[ i ]; + } + return sum; + } + protected: + // training parameter + struct ParamTrain{ + /*! \brief learning_rate */ + float learning_rate; + /*! \brief regularization weight for L2 norm */ + float reg_lambda; + /*! \brief regularization weight for L1 norm */ + float reg_alpha; + /*! \brief regularization weight for L2 norm in bias */ + float reg_lambda_bias; + + ParamTrain( void ){ + reg_alpha = 0.0f; reg_lambda = 0.0f; reg_lambda_bias = 0.0f; + learning_rate = 1.0f; + } + inline void SetParam( const char *name, const char *val ){ + // sync-names + if( !strcmp( "eta", name ) ) learning_rate = (float)atof( val ); + if( !strcmp( "lambda", name ) ) reg_lambda = (float)atof( val ); + if( !strcmp( "alpha", name ) ) reg_alpha = (float)atof( val ); + if( !strcmp( "lambda_bias", name ) ) reg_lambda_bias = (float)atof( val ); + // real names + if( !strcmp( "learning_rate", name ) ) learning_rate = (float)atof( val ); + if( !strcmp( "reg_lambda", name ) ) reg_lambda = (float)atof( val ); + if( !strcmp( "reg_alpha", name ) ) reg_alpha = (float)atof( val ); + if( !strcmp( "reg_lambda_bias", name ) ) reg_lambda_bias = (float)atof( val ); + } + // given original weight calculate delta + inline double CalcDelta( double sum_grad, double sum_hess, double w ){ + if( sum_hess < 1e-5f ) return 0.0f; + double tmp = w - ( sum_grad + reg_lambda*w )/( sum_hess + reg_lambda ); + if ( tmp >=0 ){ + return std::max(-( sum_grad + reg_lambda*w + reg_alpha)/(sum_hess+reg_lambda),-w); + }else{ + return std::min(-( sum_grad + reg_lambda*w - reg_alpha)/(sum_hess+reg_lambda),-w); + } + } + // given original weight calculate delta bias + inline double CalcDeltaBias( double sum_grad, double sum_hess, double w ){ + return - (sum_grad + reg_lambda_bias*w) / (sum_hess + reg_lambda_bias ); + } + }; + + // model for linear booster + class Model{ + public: + // model parameter + struct Param{ + // number of feature dimension + int num_feature; + // reserved field + int reserved[ 32 ]; + // constructor + Param( void ){ + num_feature = 0; + memset( reserved, 0, sizeof(reserved) ); + } + inline void SetParam( const char *name, const char *val ){ + if( !strcmp( name, "num_feature" ) ) num_feature = atoi( val ); + } + }; + public: + Param param; + // weight for each of feature, bias is the last one + std::vector weight; + public: + // initialize the model parameter + inline void InitModel( void ){ + // bias is the last weight + weight.resize( param.num_feature + 1 ); + std::fill( weight.begin(), weight.end(), 0.0f ); + } + // save the model to file + inline void SaveModel( utils::IStream &fo ) const{ + fo.Write( ¶m, sizeof(Param) ); + fo.Write( &weight[0], sizeof(float) * weight.size() ); + } + // load model from file + inline void LoadModel( utils::IStream &fi ){ + utils::Assert( fi.Read( ¶m, sizeof(Param) ) != 0, "Load LinearBooster" ); + weight.resize( param.num_feature + 1 ); + utils::Assert( fi.Read( &weight[0], sizeof(float) * weight.size() ) != 0, "Load LinearBooster" ); + } + // model bias + inline float &bias( void ){ + return weight.back(); + } + }; + /*! \brief array entry for column based feature construction */ + struct SCEntry{ + /*! \brief feature value */ + float fvalue; + /*! \brief row index related to each row */ + unsigned rindex; + /*! \brief default constructor */ + SCEntry( void ){} + /*! \brief constructor using entry */ + SCEntry( float fvalue, unsigned rindex ){ + this->fvalue = fvalue; this->rindex = rindex; + } + }; + private: + int silent; + protected: + Model model; + ParamTrain param; + protected: + inline void UpdateWeights( std::vector &grad, + const std::vector &hess, + const std::vector &rptr, + const std::vector &entry ){ + {// optimize bias + double sum_grad = 0.0, sum_hess = 0.0; + for( size_t i = 0; i < grad.size(); i ++ ){ + sum_grad += grad[ i ]; sum_hess += hess[ i ]; + } + // remove bias effect + double dw = param.learning_rate * param.CalcDeltaBias( sum_grad, sum_hess, model.bias() ); + model.bias() += dw; + // update grad value + for( size_t i = 0; i < grad.size(); i ++ ){ + grad[ i ] += dw * hess[ i ]; + } + } + + // optimize weight + const int nfeat = model.param.num_feature; + for( int i = 0; i < nfeat; i ++ ){ + size_t start = rptr[i]; + size_t end = rptr[i+1]; + if( start >= end ) continue; + double sum_grad = 0.0, sum_hess = 0.0; + for( size_t j = start; j < end; j ++ ){ + const float v = entry[j].fvalue; + sum_grad += grad[ entry[j].rindex ] * v; + sum_hess += hess[ entry[j].rindex ] * v * v; + } + float w = model.weight[ i ]; + double dw = param.learning_rate * param.CalcDelta( sum_grad, sum_hess, w ); + model.weight[ i ] += dw; + // update grad value + for( size_t j = start; j < end; j ++ ){ + const float v = entry[j].fvalue; + grad[ entry[j].rindex ] += hess[ entry[j].rindex ] * v * dw; + } + } + } + + inline void MakeCmajor( std::vector &rptr, + std::vector &entry, + const std::vector &hess, + const FMatrixS::Image &smat ){ + // transform to column order first + const int nfeat = model.param.num_feature; + // build CSR column major format data + utils::SparseCSRMBuilder builder( rptr, entry ); + builder.InitBudget( nfeat ); + for( unsigned i = 0; i < (unsigned)hess.size(); i ++ ){ + // skip deleted entries + if( hess[i] < 0.0f ) continue; + // add sparse part budget + FMatrixS::Line sp = smat[ i ]; + for( unsigned j = 0; j < sp.len; j ++ ){ + if( j == 0 || sp.findex[j-1] != sp.findex[j] ){ + builder.AddBudget( sp.findex[j] ); + } + } + } + builder.InitStorage(); + for( unsigned i = 0; i < (unsigned)hess.size(); i ++ ){ + // skip deleted entries + if( hess[i] < 0.0f ) continue; + // add sparse part budget + FMatrixS::Line sp = smat[ i ]; + for( unsigned j = 0; j < sp.len; j ++ ){ + // skip duplicated terms + if( j == 0 || sp.findex[j-1] != sp.findex[j] ){ + builder.PushElem( sp.findex[j], SCEntry( sp.fvalue[j], i ) ); + } + } + } + } + protected: + virtual void Update( const FMatrixS::Image &smat, + std::vector &grad, + const std::vector &hess ){ + std::vector rptr; + std::vector entry; + this->MakeCmajor( rptr, entry, hess, smat ); + this->UpdateWeights( grad, hess, rptr, entry ); + } + }; + }; +}; +#endif diff --git a/booster/xgboost.cpp b/booster/xgboost.cpp index 5c445dabf..f60d2b2f8 100644 --- a/booster/xgboost.cpp +++ b/booster/xgboost.cpp @@ -12,6 +12,7 @@ #include "xgboost_gbmbase.h" // implementations of boosters #include "tree/xgboost_svdf_tree.hpp" +#include "linear/xgboost_linear.hpp" namespace xgboost{ namespace booster{ @@ -21,7 +22,11 @@ namespace xgboost{ * \return the pointer to the gradient booster created */ IBooster *CreateBooster( int booster_type ){ - return new RTreeTrainer(); + switch( booster_type ){ + case 0: return new RTreeTrainer(); + case 1: return new LinearBooster(); + default: utils::Error("unknown booster_type"); return NULL; + } } }; }; diff --git a/booster/xgboost.h b/booster/xgboost.h index 5e7bd47f6..207e589f6 100644 --- a/booster/xgboost.h +++ b/booster/xgboost.h @@ -51,7 +51,8 @@ namespace xgboost{ virtual void InitModel( void ) = 0; public: /*! - * \brief do gradient boost training for one step, using the information given + * \brief do gradient boost training for one step, using the information given, + * Note: content of grad and hess can change after DoBoost * \param grad first order gradient of each instance * \param hess second order gradient of each instance * \param feats features of each instance @@ -64,7 +65,7 @@ namespace xgboost{ const std::vector &root_index ) = 0; /*! * \brief predict values for given sparse feature vector - * NOTE: in tree implementation, this is not threadsafe + * NOTE: in tree implementation, this is not threadsafe, used dense version to ensure threadsafety * \param feat vector in sparse format * \param rid root id of current instance, default = 0 * \return prediction diff --git a/booster/xgboost_data.h b/booster/xgboost_data.h index 289f2e056..f56a5bc8f 100644 --- a/booster/xgboost_data.h +++ b/booster/xgboost_data.h @@ -129,6 +129,40 @@ namespace xgboost{ sp.fvalue = &fvalue[ row_ptr[ sidx ] ]; return sp; } + public: + /*! + * \brief save data to binary stream + * note: since we have size_t in row_ptr, + * the function is not consistent between 64bit and 32bit machine + * \param fo output stream + */ + inline void SaveBinary( utils::IStream &fo ) const{ + size_t nrow = this->NumRow(); + fo.Write( &nrow, sizeof(size_t) ); + fo.Write( &row_ptr[0], row_ptr.size() * sizeof(size_t) ); + if( findex.size() != 0 ){ + fo.Write( &findex[0] , findex.size() * sizeof(bst_uint) ); + fo.Write( &fvalue[0] , fvalue.size() * sizeof(bst_float) ); + } + } + /*! + * \brief load data from binary stream + * note: since we have size_t in row_ptr, + * the function is not consistent between 64bit and 32bit machine + * \param fi output stream + */ + inline void LoadBinary( utils::IStream &fi ){ + size_t nrow; + utils::Assert( fi.Read( &nrow, sizeof(size_t) ) != 0, "Load FMatrixS" ); + row_ptr.resize( nrow + 1 ); + utils::Assert( fi.Read( &row_ptr[0], row_ptr.size() * sizeof(size_t) ), "Load FMatrixS" ); + + findex.resize( row_ptr.back() ); fvalue.resize( row_ptr.back() ); + if( findex.size() != 0 ){ + utils::Assert( fi.Read( &findex[0] , findex.size() * sizeof(bst_uint) ) , "Load FMatrixS" ); + utils::Assert( fi.Read( &fvalue[0] , fvalue.size() * sizeof(bst_float) ), "Load FMatrixS" ); + } + } }; }; }; diff --git a/booster/xgboost_gbmbase.h b/booster/xgboost_gbmbase.h index c43f79871..7ce8d844d 100644 --- a/booster/xgboost_gbmbase.h +++ b/booster/xgboost_gbmbase.h @@ -172,6 +172,7 @@ namespace xgboost{ public: /*! * \brief do gradient boost training for one step, using the information given + * Note: content of grad and hess can change after DoBoost * \param grad first order gradient of each instance * \param hess second order gradient of each instance * \param feats features of each instance