diff --git a/README.md b/README.md
index 9b9fe1cb8..4f0518f58 100644
--- a/README.md
+++ b/README.md
@@ -4,9 +4,9 @@ Creater: Tianqi Chen: tianqi.tchen AT gmail
 
 General Purpose Gradient Boosting Library
 
-Intention: A stand-alone efficient library to do machine learning in functional space
+Goal: A stand-alone efficient library to do learning via boosting in functional space
 
-Planned key components (TODO):
+Planned key components:
 
 (1) Gradient boosting models: 
     - regression tree
@@ -17,11 +17,11 @@ Planned key components (TODO):
     - ranking
     - matrix factorization
     - structured prediction
-(3) OpenMP support for parallelization(optional)
+(3) OpenMP implementation(optional)
 
 File extension convention: 
 (1) .h are interface, utils anddata structures, with detailed comment; 
 (2) .cpp are implementations that will be compiled, with less comment; 
 (3) .hpp are implementations that will be included by .cpp, with less comment
 
-Parameters Usage: see wiki in github
+Parameters Usage: https://github.com/tqchen/xgboost/wiki
diff --git a/booster/linear/xgboost_linear.hpp b/booster/linear/xgboost_linear.hpp
new file mode 100644
index 000000000..852bbc7fe
--- /dev/null
+++ b/booster/linear/xgboost_linear.hpp
@@ -0,0 +1,258 @@
+#ifndef _XGBOOST_LINEAR_HPP_
+#define _XGBOOST_LINEAR_HPP_
+/*!
+ * \file xgboost_linear.h
+ * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
+ *        the update rule is coordinate descent
+ * \author Tianqi Chen: tianqi.tchen@gmail.com
+ */
+#include <vector>
+#include <algorithm>
+
+#include "../xgboost.h"
+#include "../../utils/xgboost_utils.h"
+#include "../../utils/xgboost_matrix_csr.h"
+
+namespace xgboost{
+    namespace booster{
+        /*! \brief linear model, with L1/L2 regularization */
+        class LinearBooster : public IBooster{
+        public:
+            LinearBooster( void ){ silent = 0;}
+            virtual ~LinearBooster( void ){}
+        public:
+            virtual void SetParam( const char *name, const char *val ){
+                if( !strcmp( name, "silent") )  silent = atoi( val );
+                if( model.weight.size() == 0 )  model.param.SetParam( name, val );
+                param.SetParam( name, val );
+            }
+            virtual void LoadModel( utils::IStream &fi ){
+                model.LoadModel( fi );
+            }
+            virtual void SaveModel( utils::IStream &fo ) const{
+                model.SaveModel( fo );
+            }
+            virtual void InitModel( void ){
+                model.InitModel();
+            }
+        public:
+            virtual void DoBoost( std::vector<float> &grad, 
+                                  std::vector<float> &hess,
+                                  const FMatrixS::Image &smat,
+                                  const std::vector<unsigned> &root_index ){
+                utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
+                this->Update( smat, grad, hess );
+            }            
+            virtual float Predict( const FMatrixS::Line &sp, unsigned rid = 0 ){
+                float sum = model.bias();
+                for( unsigned i = 0; i < sp.len; i ++ ){
+                    sum += model.weight[ sp.findex[i] ] * sp.fvalue[i];
+                }
+                return sum;
+            }
+            virtual float Predict( const std::vector<float> &feat, 
+                                   const std::vector<bool>  &funknown,
+                                   unsigned rid = 0 ){
+                float sum = model.bias();
+                for( size_t i = 0; i < feat.size(); i ++ ){
+                    if( funknown[i] ) continue;
+                    sum += model.weight[ i ] * feat[ i ];
+                }
+                return sum;
+            }
+        protected:
+            // training parameter
+            struct ParamTrain{
+                /*! \brief learning_rate */
+                float learning_rate;
+                /*! \brief regularization weight for L2 norm */
+                float reg_lambda;
+                /*! \brief regularization weight for L1 norm */
+                float reg_alpha;
+                 /*! \brief regularization weight for L2 norm  in bias */               
+                float reg_lambda_bias;
+                
+                ParamTrain( void ){
+                    reg_alpha = 0.0f; reg_lambda = 0.0f; reg_lambda_bias = 0.0f;
+                    learning_rate = 1.0f;
+                }            
+                inline void SetParam( const char *name, const char *val ){
+                    // sync-names
+                    if( !strcmp( "eta", name ) )    learning_rate = (float)atof( val );
+                    if( !strcmp( "lambda", name ) ) reg_lambda = (float)atof( val );
+                    if( !strcmp( "alpha", name ) )  reg_alpha  = (float)atof( val );
+                    if( !strcmp( "lambda_bias", name ) ) reg_lambda_bias = (float)atof( val );
+                    // real names
+                    if( !strcmp( "learning_rate", name ) ) learning_rate = (float)atof( val );     
+                    if( !strcmp( "reg_lambda", name ) )    reg_lambda = (float)atof( val );
+                    if( !strcmp( "reg_alpha", name ) )     reg_alpha = (float)atof( val );
+                    if( !strcmp( "reg_lambda_bias", name ) )    reg_lambda_bias = (float)atof( val );
+                }
+                // given original weight calculate delta 
+                inline double CalcDelta( double sum_grad, double sum_hess, double w ){
+                    if( sum_hess < 1e-5f ) return 0.0f;
+                    double tmp = w - ( sum_grad + reg_lambda*w )/( sum_hess + reg_lambda );
+                    if ( tmp >=0 ){
+                        return std::max(-( sum_grad + reg_lambda*w + reg_alpha)/(sum_hess+reg_lambda),-w);
+                    }else{
+                        return std::min(-( sum_grad + reg_lambda*w - reg_alpha)/(sum_hess+reg_lambda),-w);
+                    }
+                }
+                // given original weight calculate delta bias
+                inline double CalcDeltaBias( double sum_grad, double sum_hess, double w ){
+                    return - (sum_grad + reg_lambda_bias*w) / (sum_hess + reg_lambda_bias );
+                }
+            };
+            
+            // model for linear booster
+            class Model{
+            public:
+                // model parameter
+                struct Param{
+                    // number of feature dimension
+                    int num_feature;
+                    // reserved field
+                    int reserved[ 32 ];
+                    // constructor
+                    Param( void ){
+                        num_feature = 0;
+                        memset( reserved, 0, sizeof(reserved) );
+                    }
+                    inline void SetParam( const char *name, const char *val ){
+                        if( !strcmp( name, "num_feature" ) )  num_feature = atoi( val );
+                    }
+                };
+            public:
+                Param param;
+                // weight for each of feature, bias is the last one
+                std::vector<float> weight;
+            public:
+                // initialize the model parameter
+                inline void InitModel( void ){
+                    // bias is the last weight
+                    weight.resize( param.num_feature + 1 );
+                    std::fill( weight.begin(), weight.end(), 0.0f );
+                }
+                // save the model to file 
+                inline void SaveModel( utils::IStream &fo ) const{
+                    fo.Write( &param, sizeof(Param) );
+                    fo.Write( &weight[0], sizeof(float) * weight.size() );
+                }
+                // load model from file
+                inline void LoadModel( utils::IStream &fi ){
+                    utils::Assert( fi.Read( &param, sizeof(Param) ) != 0, "Load LinearBooster" );
+                    weight.resize( param.num_feature + 1 );
+                    utils::Assert( fi.Read( &weight[0], sizeof(float) * weight.size() ) != 0, "Load LinearBooster" );
+                }
+                // model bias
+                inline float &bias( void ){
+                    return weight.back();
+                }
+            };
+            /*! \brief array entry for column based feature construction */
+            struct SCEntry{
+                /*! \brief feature value */
+                float    fvalue;
+                /*! \brief row index related to each row */
+                unsigned rindex;
+                /*! \brief default constructor */
+                SCEntry( void ){}
+                /*! \brief constructor using entry */
+                SCEntry( float fvalue, unsigned rindex ){
+                    this->fvalue = fvalue; this->rindex = rindex;
+                }
+            };
+        private:
+            int silent;
+        protected:
+            Model model;
+            ParamTrain param;
+        protected:
+            inline void UpdateWeights( std::vector<float> &grad,                       
+                                       const std::vector<float> &hess,
+                                       const std::vector<size_t> &rptr,
+                                       const std::vector<SCEntry> &entry ){
+                {// optimize bias
+                    double sum_grad = 0.0, sum_hess = 0.0;
+                    for( size_t i = 0; i < grad.size(); i ++ ){
+                        sum_grad += grad[ i ]; sum_hess += hess[ i ];
+                    }
+                    // remove bias effect
+                    double dw = param.learning_rate * param.CalcDeltaBias( sum_grad, sum_hess, model.bias() );
+                    model.bias() += dw;
+                    // update grad value 
+                    for( size_t i = 0; i < grad.size(); i ++ ){
+                        grad[ i ] += dw * hess[ i ];
+                    }
+                }
+
+                // optimize weight
+                const int nfeat = model.param.num_feature;                               
+                for( int i = 0; i < nfeat; i ++ ){
+                    size_t start = rptr[i];
+                    size_t end   = rptr[i+1];
+                    if( start >= end ) continue;
+                    double sum_grad = 0.0, sum_hess = 0.0;
+                    for( size_t j = start; j < end; j ++ ){
+                        const float v = entry[j].fvalue; 
+                        sum_grad += grad[ entry[j].rindex ] * v;
+                        sum_hess += hess[ entry[j].rindex ] * v * v;
+                    }
+                    float w = model.weight[ i ];
+                    double dw = param.learning_rate * param.CalcDelta( sum_grad, sum_hess, w );
+                    model.weight[ i ] += dw;
+                    // update grad value 
+                    for( size_t j = start; j < end; j ++ ){
+                        const float v = entry[j].fvalue; 
+                        grad[ entry[j].rindex ] += hess[ entry[j].rindex ] * v * dw;
+                    }
+                }
+            }
+            
+            inline void MakeCmajor( std::vector<size_t> &rptr,
+                                    std::vector<SCEntry> &entry,
+                                    const std::vector<float> &hess,                                 
+                                    const FMatrixS::Image &smat ){
+                // transform to column order first
+                const int nfeat = model.param.num_feature;            
+                // build CSR column major format data
+                utils::SparseCSRMBuilder<SCEntry> builder( rptr, entry );
+                builder.InitBudget( nfeat );
+                for( unsigned i = 0; i < (unsigned)hess.size(); i ++ ){
+                // skip deleted entries
+                    if( hess[i] < 0.0f ) continue;
+                    // add sparse part budget
+                    FMatrixS::Line sp = smat[ i ];
+                    for( unsigned j = 0; j < sp.len; j ++ ){
+                        if( j == 0 || sp.findex[j-1] != sp.findex[j] ){
+                            builder.AddBudget( sp.findex[j] );
+                        }
+                    }
+                }
+                builder.InitStorage();
+                for( unsigned i = 0; i < (unsigned)hess.size(); i ++ ){
+                    // skip deleted entries
+                    if( hess[i] < 0.0f ) continue;
+                    // add sparse part budget
+                    FMatrixS::Line sp = smat[ i ];
+                    for( unsigned j = 0; j < sp.len; j ++ ){
+                        // skip duplicated terms
+                        if( j == 0 || sp.findex[j-1] != sp.findex[j] ){
+                            builder.PushElem( sp.findex[j], SCEntry( sp.fvalue[j], i ) );
+                        }
+                    }
+                }
+            }
+        protected:
+            virtual void Update( const FMatrixS::Image &smat,
+                                 std::vector<float> &grad,
+                                 const std::vector<float> &hess ){
+                std::vector<size_t> rptr;
+                std::vector<SCEntry> entry;
+                this->MakeCmajor( rptr, entry, hess, smat );
+                this->UpdateWeights( grad, hess, rptr, entry );
+            }
+        };
+    };
+};
+#endif
diff --git a/booster/xgboost.cpp b/booster/xgboost.cpp
index 5c445dabf..f60d2b2f8 100644
--- a/booster/xgboost.cpp
+++ b/booster/xgboost.cpp
@@ -12,6 +12,7 @@
 #include "xgboost_gbmbase.h"
 // implementations of boosters
 #include "tree/xgboost_svdf_tree.hpp"
+#include "linear/xgboost_linear.hpp"
 
 namespace xgboost{
     namespace booster{
@@ -21,7 +22,11 @@ namespace xgboost{
          * \return the pointer to the gradient booster created
          */
         IBooster *CreateBooster( int booster_type ){
-            return new RTreeTrainer();
+            switch( booster_type ){
+            case 0: return new RTreeTrainer();
+            case 1: return new LinearBooster();
+            default: utils::Error("unknown booster_type"); return NULL;
+            }
         }
     };
 };
diff --git a/booster/xgboost.h b/booster/xgboost.h
index 5e7bd47f6..207e589f6 100644
--- a/booster/xgboost.h
+++ b/booster/xgboost.h
@@ -51,7 +51,8 @@ namespace xgboost{
             virtual void InitModel( void ) = 0;
         public:
             /*! 
-             * \brief do gradient boost training for one step, using the information given
+             * \brief do gradient boost training for one step, using the information given, 
+             *        Note: content of grad and hess can change after DoBoost
              * \param grad first order gradient of each instance
              * \param hess second order gradient of each instance
              * \param feats features of each instance
@@ -64,7 +65,7 @@ namespace xgboost{
                                   const std::vector<unsigned> &root_index ) = 0;
             /*! 
              * \brief predict values for given sparse feature vector
-             *   NOTE: in tree implementation, this is not threadsafe
+             *   NOTE: in tree implementation, this is not threadsafe, used dense version to ensure threadsafety
              * \param feat vector in sparse format
              * \param rid root id of current instance, default = 0
              * \return prediction 
diff --git a/booster/xgboost_data.h b/booster/xgboost_data.h
index 289f2e056..f56a5bc8f 100644
--- a/booster/xgboost_data.h
+++ b/booster/xgboost_data.h
@@ -129,6 +129,40 @@ namespace xgboost{
                 sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
                 return sp;
             }
+        public:
+            /*!
+             * \brief save data to binary stream 
+             *        note: since we have size_t in row_ptr, 
+             *              the function is not consistent between 64bit and 32bit machine
+             * \param fo output stream
+             */
+            inline void SaveBinary( utils::IStream &fo ) const{
+                size_t nrow = this->NumRow();
+                fo.Write( &nrow, sizeof(size_t) );
+                fo.Write( &row_ptr[0], row_ptr.size() * sizeof(size_t) );
+                if( findex.size() != 0 ){
+                    fo.Write( &findex[0] , findex.size() * sizeof(bst_uint) );
+                    fo.Write( &fvalue[0] , fvalue.size() * sizeof(bst_float) );
+                }
+            }
+            /*!
+             * \brief load data from binary stream 
+             *        note: since we have size_t in row_ptr, 
+             *              the function is not consistent between 64bit and 32bit machine
+             * \param fi output stream
+             */
+            inline void LoadBinary( utils::IStream &fi ){
+                size_t nrow;
+                utils::Assert( fi.Read( &nrow, sizeof(size_t) ) != 0, "Load FMatrixS" );
+                row_ptr.resize( nrow + 1 );
+                utils::Assert( fi.Read( &row_ptr[0], row_ptr.size() * sizeof(size_t) ), "Load FMatrixS" );
+
+                findex.resize( row_ptr.back() ); fvalue.resize( row_ptr.back() );                
+                if( findex.size() != 0 ){
+                    utils::Assert( fi.Read( &findex[0] , findex.size() * sizeof(bst_uint) ) , "Load FMatrixS" );
+                    utils::Assert( fi.Read( &fvalue[0] , fvalue.size() * sizeof(bst_float) ), "Load FMatrixS" );
+                }
+            }
         };
     };
 };
diff --git a/booster/xgboost_gbmbase.h b/booster/xgboost_gbmbase.h
index c43f79871..7ce8d844d 100644
--- a/booster/xgboost_gbmbase.h
+++ b/booster/xgboost_gbmbase.h
@@ -172,6 +172,7 @@ namespace xgboost{
         public:
             /*! 
              * \brief do gradient boost training for one step, using the information given
+             *        Note: content of grad and hess can change after DoBoost
              * \param grad first order gradient of each instance
              * \param hess second order gradient of each instance
              * \param feats features of each instance