From 2a92c82b92e12abdb36f0fa819b2ef6a9b74b8ab Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 15 Aug 2014 20:15:58 -0700
Subject: [PATCH 01/52] start unity refactor

---
 Makefile                                |  12 +-
 README.md                               |  45 +-
 booster/linear/xgboost_linear.hpp       | 200 ---------
 booster/tree/xgboost_base_treemaker.hpp | 147 -------
 booster/tree/xgboost_col_treemaker.hpp  | 335 --------------
 booster/tree/xgboost_row_treemaker.hpp  | 386 -----------------
 booster/tree/xgboost_svdf_tree.hpp      | 429 ------------------
 booster/tree/xgboost_tree.hpp           | 268 ------------
 booster/tree/xgboost_tree_model.h       | 554 ------------------------
 booster/xgboost-inl.hpp                 |  39 --
 booster/xgboost.h                       | 157 -------
 booster/xgboost_data.h                  | 396 -----------------
 booster/xgboost_gbmbase.h               | 429 ------------------
 data.h                                  | 293 +++++++++++++
 gbm/gbm.h                               |  82 ++++
 gbm/gbtree-inl.hpp                      | 365 ++++++++++++++++
 learner/dmatrix.h                       |  84 ++++
 learner/evaluation-inl.hpp              | 346 +++++++++++++++
 learner/evaluation.h                    |  82 ++++
 learner/helper_utils.h                  |  50 +++
 learner/learner-inl.hpp                 | 296 +++++++++++++
 learner/objective-inl.hpp               | 137 ++++++
 learner/objective.h                     |  80 ++++
 regrank/xgboost_regrank.h               | 401 -----------------
 regrank/xgboost_regrank_data.h          | 260 -----------
 regrank/xgboost_regrank_eval.h          | 375 ----------------
 regrank/xgboost_regrank_main.cpp        | 303 -------------
 regrank/xgboost_regrank_obj.h           | 131 ------
 regrank/xgboost_regrank_obj.hpp         | 353 ---------------
 regrank/xgboost_regrank_utils.h         |  45 --
 tree/model.h                            | 492 +++++++++++++++++++++
 tree/param.h                            | 262 +++++++++++
 tree/updater.h                          |  70 +++
 tree/updater_colmaker-inl.hpp           | 357 +++++++++++++++
 tree/updater_prune-inl.hpp              |  67 +++
 utils/{xgboost_config.h => config.h}    |   0
 utils/fmap.h                            |  80 ++++
 utils/io.h                              | 104 +++++
 utils/iterator.h                        |  40 ++
 utils/matrix_csr.h                      | 123 ++++++
 utils/{xgboost_omp.h => omp.h}          |  12 +-
 utils/random.h                          | 102 +++++
 utils/utils.h                           |  94 ++++
 utils/xgboost_fmap.h                    | 123 ------
 utils/xgboost_matrix_csr.h              | 157 -------
 utils/xgboost_random.h                  | 148 -------
 utils/xgboost_stream.h                  |  54 ---
 utils/xgboost_utils.h                   |  70 ---
 xgunity.cpp                             |  27 ++
 49 files changed, 3659 insertions(+), 5803 deletions(-)
 delete mode 100644 booster/linear/xgboost_linear.hpp
 delete mode 100644 booster/tree/xgboost_base_treemaker.hpp
 delete mode 100644 booster/tree/xgboost_col_treemaker.hpp
 delete mode 100644 booster/tree/xgboost_row_treemaker.hpp
 delete mode 100644 booster/tree/xgboost_svdf_tree.hpp
 delete mode 100644 booster/tree/xgboost_tree.hpp
 delete mode 100644 booster/tree/xgboost_tree_model.h
 delete mode 100644 booster/xgboost-inl.hpp
 delete mode 100644 booster/xgboost.h
 delete mode 100644 booster/xgboost_data.h
 delete mode 100644 booster/xgboost_gbmbase.h
 create mode 100644 data.h
 create mode 100644 gbm/gbm.h
 create mode 100644 gbm/gbtree-inl.hpp
 create mode 100644 learner/dmatrix.h
 create mode 100644 learner/evaluation-inl.hpp
 create mode 100644 learner/evaluation.h
 create mode 100644 learner/helper_utils.h
 create mode 100644 learner/learner-inl.hpp
 create mode 100644 learner/objective-inl.hpp
 create mode 100644 learner/objective.h
 delete mode 100644 regrank/xgboost_regrank.h
 delete mode 100644 regrank/xgboost_regrank_data.h
 delete mode 100644 regrank/xgboost_regrank_eval.h
 delete mode 100644 regrank/xgboost_regrank_main.cpp
 delete mode 100644 regrank/xgboost_regrank_obj.h
 delete mode 100644 regrank/xgboost_regrank_obj.hpp
 delete mode 100644 regrank/xgboost_regrank_utils.h
 create mode 100644 tree/model.h
 create mode 100644 tree/param.h
 create mode 100644 tree/updater.h
 create mode 100644 tree/updater_colmaker-inl.hpp
 create mode 100644 tree/updater_prune-inl.hpp
 rename utils/{xgboost_config.h => config.h} (100%)
 create mode 100644 utils/fmap.h
 create mode 100644 utils/io.h
 create mode 100644 utils/iterator.h
 create mode 100644 utils/matrix_csr.h
 rename utils/{xgboost_omp.h => omp.h} (71%)
 create mode 100644 utils/random.h
 create mode 100644 utils/utils.h
 delete mode 100644 utils/xgboost_fmap.h
 delete mode 100644 utils/xgboost_matrix_csr.h
 delete mode 100644 utils/xgboost_random.h
 delete mode 100644 utils/xgboost_stream.h
 delete mode 100644 utils/xgboost_utils.h
 create mode 100644 xgunity.cpp

diff --git a/Makefile b/Makefile
index 780066aca..3a816b78e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,16 @@
-export CC  = gcc
-export CXX = g++
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fopenmp 
+export CC  = clang
+export CXX = clang++
+export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas 
 
 # specify tensor path
-BIN = xgboost
+BIN = xgunity.exe
 OBJ = 
 .PHONY: clean all
 
 all: $(BIN) $(OBJ)
 export LDFLAGS= -pthread -lm 
 
-xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h regrank/*.hpp booster/*.h booster/*/*.hpp booster/*.hpp
+xgunity.exe: xgunity.cpp
 
 
 $(BIN) : 
@@ -23,4 +23,4 @@ install:
 	cp -f -r $(BIN)  $(INSTALL_PATH)
 
 clean:
-	$(RM) $(OBJ) $(BIN) *~
+	$(RM) $(OBJ) $(BIN) *~ */*~
diff --git a/README.md b/README.md
index 7459aa585..1d0a836b1 100644
--- a/README.md
+++ b/README.md
@@ -8,33 +8,9 @@ Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
 
 Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.com/tqchen/xgboost/issues?q=is%3Aissue+label%3Aquestion)
 
-Features
+xgboost-unity
 =======
-* Sparse feature format:
-  - Sparse feature format allows easy handling of missing values, and improve computation efficiency.
-* Push the limit on single machine:
-  - Efficient implementation that optimizes memory and computation.
-* Speed: XGBoost is very fast
-  - IN [demo/higgs/speedtest.py](demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
-* Layout of gradient boosting algorithm to support user defined objective
-* Python interface, works with numpy and scipy.sparse matrix
-
-Supported key components
-=======
-* Gradient boosting models: 
-    - regression tree (GBRT)
-    - linear model/lasso
-* Objectives to support tasks: 
-    - regression
-    - classification
-* OpenMP implementation
-
-Planned components
-=======
-* More objective to support tasks: 
-    - ranking
-    - matrix factorization
-    - structured prediction
+experimental branch: refactor xgboost, cleaner code, more flexibility
 
 Build
 ======
@@ -42,8 +18,17 @@ Build
 * If your compiler does not come with OpenMP support, it will fire an warning telling you that the code will compile into single thread mode, and you will get single thread xgboost
   - You may get a error: -lgomp is not found, you can remove -fopenmp flag in Makefile to get single thread xgboost, or upgrade your compiler to compile multi-thread version
 
-File extension convention
+Project Logical Layout
 =======
-* .h are interface, utils and data structures, with detailed comment; 
-* .cpp are implementations that will be compiled, with less comment; 
-* .hpp are implementations that will be included by .cpp, with less comment
+* Dependency order: learner->gbm->tree
+* tree are implementations of tree construction algorithms.
+* gbm is gradient boosting interface, that takes trees and other base learner to do boosting.
+  - gbm only takes gradient as sufficient statistics, it does not compute the gradient.
+* learner is learning module that computes gradient for specific object, and pass it to GBM
+
+File Naming Convention
+======= 
+* The project is templatized, to make it easy to adjust input data structure.
+* .h files are data structures and interface, which are needed to use functions in that layer.
+* -inl.hpp files are implementations of interface, like cpp file in most project.
+  - You only need to understand the interface file to understand the usage of that layer
diff --git a/booster/linear/xgboost_linear.hpp b/booster/linear/xgboost_linear.hpp
deleted file mode 100644
index 8979bee72..000000000
--- a/booster/linear/xgboost_linear.hpp
+++ /dev/null
@@ -1,200 +0,0 @@
-#ifndef XGBOOST_LINEAR_HPP
-#define XGBOOST_LINEAR_HPP
-/*!
- * \file xgboost_linear.h
- * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
- *        the update rule is coordinate descent, require column major format
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-#include <vector>
-#include <algorithm>
-
-#include "../xgboost.h"
-#include "../../utils/xgboost_utils.h"
-
-namespace xgboost{
-    namespace booster{
-        /*! \brief linear model, with L1/L2 regularization */
-        template<typename FMatrix>
-        class LinearBooster : public InterfaceBooster<FMatrix>{
-        public:
-            LinearBooster( void ){ silent = 0;}
-            virtual ~LinearBooster( void ){}
-        public:
-            virtual void SetParam( const char *name, const char *val ){
-                if( !strcmp( name, "silent") )  silent = atoi( val );
-                if( model.weight.size() == 0 )  model.param.SetParam( name, val );
-                param.SetParam( name, val );
-            }
-            virtual void LoadModel( utils::IStream &fi ){
-                model.LoadModel( fi );
-            }
-            virtual void SaveModel( utils::IStream &fo ) const{
-                model.SaveModel( fo );
-            }
-            virtual void InitModel( void ){
-                model.InitModel();
-            }
-        public:
-            virtual void DoBoost( std::vector<float> &grad, 
-                                  std::vector<float> &hess,
-                                  const FMatrix &fmat,
-                                  const std::vector<unsigned> &root_index ){
-                utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
-                this->UpdateWeights( grad, hess, fmat );
-            }
-            inline float Predict( const FMatrix &fmat, bst_uint ridx, unsigned root_index ){
-                float sum = model.bias();
-                for( typename FMatrix::RowIter it = fmat.GetRow(ridx); it.Next(); ){ 
-                    sum += model.weight[ it.findex() ] * it.fvalue();
-                }
-                return sum;
-            }
-            virtual float Predict( const std::vector<float> &feat, 
-                                   const std::vector<bool>  &funknown,
-                                   unsigned rid = 0 ){
-                float sum = model.bias();
-                for( size_t i = 0; i < feat.size(); i ++ ){
-                    if( funknown[i] ) continue;
-                    sum += model.weight[ i ] * feat[ i ];
-                }
-                return sum;
-            }
-            
-        protected:
-            // training parameter
-            struct ParamTrain{
-                /*! \brief learning_rate */
-                float learning_rate;
-                /*! \brief regularization weight for L2 norm */
-                float reg_lambda;
-                /*! \brief regularization weight for L1 norm */
-                float reg_alpha;
-                 /*! \brief regularization weight for L2 norm  in bias */               
-                float reg_lambda_bias;
-                
-                ParamTrain( void ){
-                    reg_alpha = 0.0f; reg_lambda = 0.0f; reg_lambda_bias = 0.0f;
-                    learning_rate = 1.0f;
-                }            
-                inline void SetParam( const char *name, const char *val ){
-                    // sync-names
-                    if( !strcmp( "eta", name ) )    learning_rate = (float)atof( val );
-                    if( !strcmp( "lambda", name ) ) reg_lambda = (float)atof( val );
-                    if( !strcmp( "alpha", name ) )  reg_alpha  = (float)atof( val );
-                    if( !strcmp( "lambda_bias", name ) ) reg_lambda_bias = (float)atof( val );
-                    // real names
-                    if( !strcmp( "learning_rate", name ) ) learning_rate = (float)atof( val );     
-                    if( !strcmp( "reg_lambda", name ) )    reg_lambda = (float)atof( val );
-                    if( !strcmp( "reg_alpha", name ) )     reg_alpha = (float)atof( val );
-                    if( !strcmp( "reg_lambda_bias", name ) )    reg_lambda_bias = (float)atof( val );
-                }
-                // given original weight calculate delta 
-                inline double CalcDelta( double sum_grad, double sum_hess, double w ){
-                    if( sum_hess < 1e-5f ) return 0.0f;
-                    double tmp = w - ( sum_grad + reg_lambda*w )/( sum_hess + reg_lambda );
-                    if ( tmp >=0 ){
-                        return std::max(-( sum_grad + reg_lambda*w + reg_alpha)/(sum_hess+reg_lambda),-w);
-                    }else{
-                        return std::min(-( sum_grad + reg_lambda*w - reg_alpha)/(sum_hess+reg_lambda),-w);
-                    }
-                }
-                // given original weight calculate delta bias
-                inline double CalcDeltaBias( double sum_grad, double sum_hess, double w ){
-                    return - (sum_grad + reg_lambda_bias*w) / (sum_hess + reg_lambda_bias );
-                }
-            };
-            
-            // model for linear booster
-            class Model{
-            public:
-                // model parameter
-                struct Param{
-                    // number of feature dimension
-                    int num_feature;
-                    // reserved field
-                    int reserved[ 32 ];
-                    // constructor
-                    Param( void ){
-                        num_feature = 0;
-                        memset( reserved, 0, sizeof(reserved) );
-                    }
-                    inline void SetParam( const char *name, const char *val ){
-                        if( !strcmp( name, "num_feature" ) )  num_feature = atoi( val );
-                    }
-                };
-            public:
-                Param param;
-                // weight for each of feature, bias is the last one
-                std::vector<float> weight;
-            public:
-                // initialize the model parameter
-                inline void InitModel( void ){
-                    // bias is the last weight
-                    weight.resize( param.num_feature + 1 );
-                    std::fill( weight.begin(), weight.end(), 0.0f );
-                }
-                // save the model to file 
-                inline void SaveModel( utils::IStream &fo ) const{
-                    fo.Write( &param, sizeof(Param) );
-                    fo.Write( &weight[0], sizeof(float) * weight.size() );
-                }
-                // load model from file
-                inline void LoadModel( utils::IStream &fi ){
-                    utils::Assert( fi.Read( &param, sizeof(Param) ) != 0, "Load LinearBooster" );
-                    weight.resize( param.num_feature + 1 );
-                    utils::Assert( fi.Read( &weight[0], sizeof(float) * weight.size() ) != 0, "Load LinearBooster" );
-                }
-                // model bias
-                inline float &bias( void ){
-                    return weight.back();
-                }
-            };
-        private:
-            int silent;
-        protected:
-            Model model;
-            ParamTrain param;
-        protected:
-            // update weights, should work for any FMatrix
-            inline void UpdateWeights( std::vector<float> &grad,                       
-                                       const std::vector<float> &hess,
-                                       const FMatrix &smat ){
-                {// optimize bias
-                    double sum_grad = 0.0, sum_hess = 0.0;
-                    for( size_t i = 0; i < grad.size(); i ++ ){
-                        sum_grad += grad[ i ]; sum_hess += hess[ i ];
-                    }
-                    // remove bias effect
-                    double dw = param.learning_rate * param.CalcDeltaBias( sum_grad, sum_hess, model.bias() );
-                    model.bias() += dw;
-                    // update grad value 
-                    for( size_t i = 0; i < grad.size(); i ++ ){
-                        grad[ i ] += dw * hess[ i ];
-                    }
-                }
-
-                // optimize weight
-                const unsigned nfeat= (unsigned)smat.NumCol();                           
-                for( unsigned i = 0; i < nfeat; i ++ ){
-                    if( !smat.GetSortedCol( i ).Next() ) continue;
-                    double sum_grad = 0.0, sum_hess = 0.0;
-                    for( typename FMatrix::ColIter it = smat.GetSortedCol(i); it.Next(); ){
-                        const float v = it.fvalue();
-                        sum_grad += grad[ it.rindex() ] * v;
-                        sum_hess += hess[ it.rindex() ] * v * v;
-                    }
-                    float w = model.weight[ i ];
-                    double dw = param.learning_rate * param.CalcDelta( sum_grad, sum_hess, w );
-                    model.weight[ i ] += dw;
-                    // update grad value 
-                    for( typename FMatrix::ColIter it = smat.GetSortedCol(i); it.Next(); ){
-                        const float v = it.fvalue();
-                        grad[ it.rindex() ] += hess[ it.rindex() ] * v * dw;
-                    }
-                }
-            }
-        };
-    };
-};
-#endif
diff --git a/booster/tree/xgboost_base_treemaker.hpp b/booster/tree/xgboost_base_treemaker.hpp
deleted file mode 100644
index 17c094336..000000000
--- a/booster/tree/xgboost_base_treemaker.hpp
+++ /dev/null
@@ -1,147 +0,0 @@
-#ifndef XGBOOST_BASE_TREEMAKER_HPP
-#define XGBOOST_BASE_TREEMAKER_HPP
-/*!
- * \file xgboost_base_treemaker.hpp
- * \brief implementation of base data structure for regression tree maker,
- *         gives common operations of tree construction steps template 
- * 
- * \author Tianqi Chen: tianqi.tchen@gmail.com 
- */
-#include <vector>
-#include "xgboost_tree_model.h"
-
-namespace xgboost{
-    namespace booster{
-        class BaseTreeMaker{
-        protected:
-            BaseTreeMaker( RegTree &tree,
-                           const TreeParamTrain &param )
-                : tree( tree ), param( param ){}
-        protected:
-            // statistics that is helpful to decide a split
-            struct SplitEntry{
-                /*! \brief loss change after split this node */
-                float  loss_chg;
-                /*! \brief split index */
-                unsigned  sindex;
-                /*! \brief split value */
-                float     split_value;
-                /*! \brief constructor */
-                SplitEntry( void ){
-                    loss_chg = 0.0f;
-                    split_value = 0.0f; sindex = 0;
-                }
-                // This function gives better priority to lower index when loss_chg equals
-                // not the best way, but helps to give consistent result during multi-thread execution
-                inline bool NeedReplace( float loss_chg, unsigned split_index ) const{
-                    if( this->split_index() <= split_index ){
-                        return loss_chg > this->loss_chg; 
-                    }else{
-                        return !(this->loss_chg > loss_chg);
-                    }
-                }
-                inline bool Update( const SplitEntry &e ){
-                    if( this->NeedReplace( e.loss_chg, e.split_index() ) ){
-                        this->loss_chg = e.loss_chg;
-                        this->sindex = e.sindex;
-                        this->split_value = e.split_value;
-                        return true;
-                    } else{
-                        return false;
-                    }
-                }
-                inline bool Update( float loss_chg, unsigned split_index, float split_value, bool default_left ){                    
-                    if( this->NeedReplace( loss_chg, split_index ) ){
-                        this->loss_chg = loss_chg;
-                        if( default_left ) split_index |= (1U << 31);
-                        this->sindex = split_index;
-                        this->split_value = split_value;
-                        return true;
-                    }else{
-                        return false;
-                    }
-                }
-                inline unsigned split_index( void ) const{
-                    return sindex & ( (1U<<31) - 1U );
-                }
-                inline bool default_left( void ) const{
-                    return (sindex >> 31) != 0;
-                }
-            };
-            struct NodeEntry{
-                /*! \brief sum gradient statistics */
-                double sum_grad;
-                /*! \brief sum hessian statistics */
-                double sum_hess;
-                /*! \brief loss of this node, without split */
-                float  root_gain;
-                /*! \brief weight calculated related to current data */
-                float  weight;
-                /*! \brief current best solution */
-                SplitEntry best;
-                NodeEntry( void ){
-                    sum_grad = sum_hess = 0.0;
-                    weight = root_gain = 0.0f;
-                }
-            };
-        private:
-            // try to prune off current leaf, return true if successful
-            inline void TryPruneLeaf( int nid, int depth ){
-                if( tree[ nid ].is_root() ) return;
-                int pid = tree[ nid ].parent();
-                RegTree::NodeStat &s = tree.stat( pid );
-                ++ s.leaf_child_cnt;
-                
-                if( s.leaf_child_cnt >= 2 && param.need_prune( s.loss_chg, depth - 1 ) ){
-                    this->stat_num_pruned += 2;
-                    // need to be pruned
-                    tree.ChangeToLeaf( pid, param.learning_rate * s.base_weight );
-                    // tail recursion
-                    this->TryPruneLeaf( pid, depth - 1 );
-                }
-            }
-        protected:
-            /*! \brief do prunning of a tree */
-            inline int DoPrune( void ){
-                this->stat_num_pruned = 0;
-                // initialize auxiliary statistics
-                for( int nid = 0; nid < tree.param.num_nodes; ++ nid ){
-                    tree.stat( nid ).leaf_child_cnt = 0;
-                    tree.stat( nid ).loss_chg = snode[ nid ].best.loss_chg;
-                    tree.stat( nid ).sum_hess = static_cast<float>( snode[ nid ].sum_hess );
-                }
-                for( int nid = 0; nid < tree.param.num_nodes; ++ nid ){
-                    if( tree[ nid ].is_leaf() ) this->TryPruneLeaf( nid, tree.GetDepth(nid) );
-                }
-                return this->stat_num_pruned;
-            }
-        protected:
-            /*! \brief update queue expand add in new leaves */
-            inline void UpdateQueueExpand( std::vector<int> &qexpand ){
-                std::vector<int> newnodes;
-                for( size_t i = 0; i < qexpand.size(); ++ i ){
-                    const int nid = qexpand[i];
-                    if( !tree[ nid ].is_leaf() ){
-                        newnodes.push_back( tree[nid].cleft() );
-                        newnodes.push_back( tree[nid].cright() );
-                    }
-                }
-                // use new nodes for qexpand
-                qexpand = newnodes;
-            }
-        protected:
-            // local helper tmp data structure
-            // statistics
-            int stat_num_pruned;
-            /*! \brief queue of nodes to be expanded */
-            std::vector<int> qexpand;
-            /*! \brief TreeNode Data: statistics for each constructed node, the derived class must maintain this */
-            std::vector<NodeEntry> snode;
-        protected:
-            // original data that supports tree construction
-            RegTree &tree;
-            const TreeParamTrain &param;
-        };
-    }; // namespace booster
-}; // namespace xgboost
-#endif // XGBOOST_BASE_TREEMAKER_HPP
diff --git a/booster/tree/xgboost_col_treemaker.hpp b/booster/tree/xgboost_col_treemaker.hpp
deleted file mode 100644
index 865439b57..000000000
--- a/booster/tree/xgboost_col_treemaker.hpp
+++ /dev/null
@@ -1,335 +0,0 @@
-#ifndef XGBOOST_COL_TREEMAKER_HPP
-#define XGBOOST_COL_TREEMAKER_HPP
-/*!
- * \file xgboost_col_treemaker.hpp
- * \brief implementation of regression tree maker,
- *        use a column based approach, with OpenMP 
- * \author Tianqi Chen: tianqi.tchen@gmail.com 
- */
-// use openmp
-#include <vector>
-#include "xgboost_tree_model.h"
-#include "../../utils/xgboost_omp.h"
-#include "../../utils/xgboost_random.h"
-#include "../../utils/xgboost_fmap.h"
-#include "xgboost_base_treemaker.hpp"
-
-namespace xgboost{
-    namespace booster{
-        template<typename FMatrix>
-        class ColTreeMaker : protected BaseTreeMaker{
-        public:
-            ColTreeMaker( RegTree &tree,
-                          const TreeParamTrain &param, 
-                          const std::vector<float> &grad,
-                          const std::vector<float> &hess,
-                          const FMatrix &smat, 
-                          const std::vector<unsigned> &root_index, 
-                          const utils::FeatConstrain  &constrain )
-                : BaseTreeMaker( tree, param ), 
-                  grad(grad), hess(hess), 
-                  smat(smat), root_index(root_index), constrain(constrain) {
-                utils::Assert( grad.size() == hess.size(), "booster:invalid input" );
-                utils::Assert( smat.NumRow() == hess.size(), "booster:invalid input" );
-                utils::Assert( root_index.size() == 0 || root_index.size() == hess.size(), "booster:invalid input" );                
-                utils::Assert( smat.HaveColAccess(), "ColTreeMaker: need column access matrix" );
-            }
-            inline void Make( int& stat_max_depth, int& stat_num_pruned ){
-                this->InitData();
-                this->InitNewNode( this->qexpand );
-                stat_max_depth = 0;
-                
-                for( int depth = 0; depth < param.max_depth; ++ depth ){
-                    this->FindSplit( depth );
-                    this->UpdateQueueExpand( this->qexpand );
-                    this->InitNewNode( this->qexpand );
-                    // if nothing left to be expand, break
-                    if( qexpand.size() == 0 ) break;
-                    stat_max_depth = depth + 1;
-                }
-                // set all the rest expanding nodes to leaf
-                for( size_t i = 0; i < qexpand.size(); ++ i ){
-                    const int nid = qexpand[i];
-                    tree[ nid ].set_leaf( snode[nid].weight * param.learning_rate );                        
-                }
-                // start prunning the tree
-                stat_num_pruned = this->DoPrune();
-            }
-        private:
-            /*! \brief per thread x per node entry to store tmp data */
-            struct ThreadEntry{
-                /*! \brief sum gradient statistics */
-                double sum_grad;
-                /*! \brief sum hessian statistics */
-                double sum_hess;
-                /*! \brief last feature value scanned */
-                float  last_fvalue;
-                /*! \brief current best solution */
-                SplitEntry best;
-                /*! \brief constructor */
-                ThreadEntry( void ){                    
-                    this->ClearStats();
-                }
-                /*! \brief clear statistics */
-                inline void ClearStats( void ){
-                    sum_grad = sum_hess = 0.0;
-                }
-            };
-        private:
-            // make leaf nodes for all qexpand, update node statistics, mark leaf value
-            inline void InitNewNode( const std::vector<int> &qexpand ){
-                {// setup statistics space for each tree node
-                   for( size_t i = 0; i < stemp.size(); ++ i ){
-                        stemp[i].resize( tree.param.num_nodes, ThreadEntry() );
-                   }
-                    snode.resize( tree.param.num_nodes, NodeEntry() );
-                }
-
-                const unsigned ndata = static_cast<unsigned>( position.size() );
-                
-                #pragma omp parallel for schedule( static )
-                for( unsigned i = 0; i < ndata; ++ i ){
-                    const int tid = omp_get_thread_num();
-                    if( position[i] < 0 ) continue; 
-                    stemp[tid][ position[i] ].sum_grad += grad[i];
-                    stemp[tid][ position[i] ].sum_hess += hess[i];
-                }
-
-                for( size_t j = 0; j < qexpand.size(); ++ j ){
-                    const int nid = qexpand[ j ];
-                    double sum_grad = 0.0, sum_hess = 0.0;
-                    for( size_t tid = 0; tid < stemp.size(); tid ++ ){
-                        sum_grad += stemp[tid][nid].sum_grad;
-                        sum_hess += stemp[tid][nid].sum_hess;
-                    }
-                    // update node statistics
-                    snode[nid].sum_grad = sum_grad; 
-                    snode[nid].sum_hess = sum_hess;
-                    snode[nid].root_gain = param.CalcRootGain( sum_grad, sum_hess );
-                    if( !tree[nid].is_root() ){
-                        snode[nid].weight = param.CalcWeight( sum_grad, sum_hess, tree.stat( tree[nid].parent() ).base_weight );
-                        tree.stat(nid).base_weight = snode[nid].weight;
-                    }else{
-                        snode[nid].weight = param.CalcWeight( sum_grad, sum_hess, 0.0f );
-                        tree.stat(nid).base_weight = snode[nid].weight;
-                    }
-                }
-            }
-        private:
-            // enumerate the split values of specific feature
-            template<typename Iter>
-            inline void EnumerateSplit( Iter it, const unsigned fid, std::vector<ThreadEntry> &temp, bool is_forward_search ){
-                // clear all the temp statistics
-                for( size_t j = 0; j < qexpand.size(); ++ j ){
-                    temp[ qexpand[j] ].ClearStats();
-                }
-                
-                while( it.Next() ){
-                    const bst_uint ridx = it.rindex();
-                    const int nid = position[ ridx ];
-                    if( nid < 0 ) continue;
-
-                    const float fvalue = it.fvalue();           
-                    ThreadEntry &e = temp[ nid ];
-
-                    // test if first hit, this is fine, because we set 0 during init
-                    if( e.sum_hess == 0.0 ){
-                        e.sum_grad = grad[ ridx ];
-                        e.sum_hess = hess[ ridx ];
-                        e.last_fvalue = fvalue;
-                    }else{
-                        // try to find a split
-                        if( fabsf(fvalue - e.last_fvalue) > rt_2eps && e.sum_hess >= param.min_child_weight ){
-                            const double csum_hess = snode[ nid ].sum_hess - e.sum_hess;
-                            if( csum_hess >= param.min_child_weight ){
-                                const double csum_grad = snode[nid].sum_grad - e.sum_grad; 
-                                const double loss_chg = 
-                                    + param.CalcGain( e.sum_grad, e.sum_hess, snode[nid].weight ) 
-                                    + param.CalcGain( csum_grad , csum_hess , snode[nid].weight )
-                                    - snode[nid].root_gain;
-                                e.best.Update( loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, !is_forward_search );
-                            }
-                        }
-                        // update the statistics
-                        e.sum_grad += grad[ ridx ];
-                        e.sum_hess += hess[ ridx ];
-                        e.last_fvalue = fvalue;
-                    }
-                }
-                // finish updating all statistics, check if it is possible to include all sum statistics
-                for( size_t i = 0; i < qexpand.size(); ++ i ){
-                    const int nid = qexpand[ i ];
-                    ThreadEntry &e = temp[ nid ];
-                    const double csum_hess = snode[nid].sum_hess - e.sum_hess;
-
-                    if( e.sum_hess >= param.min_child_weight && csum_hess >= param.min_child_weight ){
-                        const double csum_grad = snode[nid].sum_grad - e.sum_grad; 
-                        const double loss_chg = 
-                            + param.CalcGain( e.sum_grad, e.sum_hess, snode[nid].weight ) 
-                            + param.CalcGain(  csum_grad,  csum_hess, snode[nid].weight )
-                            - snode[nid].root_gain;
-                        const float delta = is_forward_search ? rt_eps:-rt_eps;
-                        e.best.Update( loss_chg, fid, e.last_fvalue + delta, !is_forward_search );
-                    }
-                }
-            }
-
-            // find splits at current level
-            inline void FindSplit( int depth ){
-                const unsigned nsize = static_cast<unsigned>( feat_index.size() );
-                
-                #pragma omp parallel for schedule( dynamic, 1 )
-                for( unsigned i = 0; i < nsize; ++ i ){
-                    const unsigned fid = feat_index[i];
-                    const int tid = omp_get_thread_num();
-                    if( param.need_forward_search() ){
-                        this->EnumerateSplit( smat.GetSortedCol(fid), fid, stemp[tid], true );
-                    }
-                    if( param.need_backward_search() ){
-                        this->EnumerateSplit( smat.GetReverseSortedCol(fid), fid, stemp[tid], false );
-                    }
-                }
-
-                // after this each thread's stemp will get the best candidates, aggregate results
-                for( size_t i = 0; i < qexpand.size(); ++ i ){
-                    const int nid = qexpand[ i ];
-                    NodeEntry &e = snode[ nid ];
-                    for( int tid = 0; tid < this->nthread; ++ tid ){
-                        e.best.Update( stemp[ tid ][ nid ].best );
-                    }
-                    
-                    // now we know the solution in snode[ nid ], set split
-                    if( e.best.loss_chg > rt_eps ){
-                        tree.AddChilds( nid );
-                        tree[ nid ].set_split( e.best.split_index(), e.best.split_value, e.best.default_left() );
-                    } else{
-                        tree[ nid ].set_leaf( e.weight * param.learning_rate );
-                    }  
-                }
-
-                {// reset position 
-                    // step 1, set default direct nodes to default, and leaf nodes to -1, 
-                    const unsigned ndata = static_cast<unsigned>( position.size() );
-                    #pragma omp parallel for schedule( static )
-                    for( unsigned i = 0; i < ndata; ++ i ){
-                        const int nid = position[i];
-                        if( nid >= 0 ){
-                            if( tree[ nid ].is_leaf() ){
-                                position[i] = -1;
-                            }else{
-                                // push to default branch, correct latter
-                                position[i] = tree[nid].default_left() ? tree[nid].cleft(): tree[nid].cright();
-                            }
-                        }
-                    }
-
-                    // step 2, classify the non-default data into right places
-                    std::vector<unsigned> fsplits;
-
-                    for( size_t i = 0; i < qexpand.size(); ++ i ){
-                        const int nid = qexpand[i];
-                        if( !tree[nid].is_leaf() ) fsplits.push_back( tree[nid].split_index() );
-                    }
-                    std::sort( fsplits.begin(), fsplits.end() );
-                    fsplits.resize( std::unique( fsplits.begin(), fsplits.end() ) - fsplits.begin() );
-
-                    const unsigned nfeats = static_cast<unsigned>( fsplits.size() );
-                    #pragma omp parallel for schedule( dynamic, 1 )
-                    for( unsigned i = 0; i < nfeats; ++ i ){
-                        const unsigned fid = fsplits[i];
-                        for( typename FMatrix::ColIter it = smat.GetSortedCol( fid ); it.Next(); ){
-                            const bst_uint ridx = it.rindex();
-                            int nid = position[ ridx ];
-                            if( nid == -1 ) continue;
-                            // go back to parent, correct those who are not default
-                            nid = tree[ nid ].parent();
-                            if( tree[ nid ].split_index() == fid ){
-                                if( it.fvalue() < tree[nid].split_cond() ){
-                                    position[ ridx ] = tree[ nid ].cleft();
-                                }else{
-                                    position[ ridx ] = tree[ nid ].cright();
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        private:
-            // initialize temp data structure
-            inline void InitData( void ){
-                {
-                    position.resize( grad.size() );
-                    if( root_index.size() == 0 ){
-                        std::fill( position.begin(), position.end(), 0 );
-                    }else{
-                        for( size_t i = 0; i < root_index.size(); ++ i ){
-                            position[i] = root_index[i];
-                            utils::Assert( root_index[i] < (unsigned)tree.param.num_roots, "root index exceed setting" );
-                        }
-                    }
-                    // mark delete for the deleted datas
-                    for( size_t i = 0; i < grad.size(); ++ i ){
-                        if( hess[i] < 0.0f ) position[i] = -1;
-                    }
-                    if( param.subsample < 1.0f - 1e-6f ){
-                        for( size_t i = 0; i < grad.size(); ++ i ){
-                            if( hess[i] < 0.0f ) continue;
-                            if( random::SampleBinary( param.subsample) == 0 ){
-                                position[ i ] = -1;
-                            }
-                        }
-                    }
-                }
-                
-                {// initialize feature index
-                    int ncol = static_cast<int>( smat.NumCol() );
-                    for( int i = 0; i < ncol; i ++ ){
-                        if( smat.GetSortedCol(i).Next() && constrain.NotBanned(i) ){
-                            feat_index.push_back( i );
-                        }
-                    }
-                    random::Shuffle( feat_index );
-                }
-                {// setup temp space for each thread
-                    if( param.nthread != 0 ){
-                        omp_set_num_threads( param.nthread );
-                    }
-                    #pragma omp parallel
-                    {
-                        this->nthread = omp_get_num_threads();
-                    }
-
-                    // reserve a small space
-                    stemp.resize( this->nthread, std::vector<ThreadEntry>() );
-                    for( size_t i = 0; i < stemp.size(); ++ i ){
-                        stemp[i].reserve( 256 );
-                    }
-                    snode.reserve( 256 );
-                }
-                
-                {// expand query
-                    qexpand.reserve( 256 ); qexpand.clear();
-                    for( int i = 0; i < tree.param.num_roots; ++ i ){
-                        qexpand.push_back( i );
-                    }
-                }
-            }
-        private:
-            // number of omp thread used during training
-            int nthread;
-            // Per feature: shuffle index of each feature index
-            std::vector<int> feat_index;
-            // Instance Data: current node position in the tree of each instance
-            std::vector<int> position;
-            // PerThread x PerTreeNode: statistics for per thread construction
-            std::vector< std::vector<ThreadEntry> > stemp;
-        private:
-            const std::vector<float> &grad;
-            const std::vector<float> &hess;
-            const FMatrix            &smat;
-            const std::vector<unsigned> &root_index;
-            const utils::FeatConstrain  &constrain;
-        };
-    };
-};
-#endif
diff --git a/booster/tree/xgboost_row_treemaker.hpp b/booster/tree/xgboost_row_treemaker.hpp
deleted file mode 100644
index e9b005b79..000000000
--- a/booster/tree/xgboost_row_treemaker.hpp
+++ /dev/null
@@ -1,386 +0,0 @@
-#ifndef XGBOOST_ROW_TREEMAKER_HPP
-#define XGBOOST_ROW_TREEMAKER_HPP
-/*!
- * \file xgboost_row_treemaker.hpp
- * \brief implementation of regression tree maker,
- *        use a row based approach
- * \author Tianqi Chen: tianqi.tchen@gmail.com 
- */
-// use openmp
-#include <vector>
-#include "xgboost_tree_model.h"
-#include "../../utils/xgboost_omp.h"
-#include "../../utils/xgboost_random.h"
-#include "../../utils/xgboost_fmap.h"
-#include "xgboost_base_treemaker.hpp"
-
-namespace xgboost{
-    namespace booster{
-        template<typename FMatrix>
-        class RowTreeMaker : protected BaseTreeMaker{
-        public:
-            RowTreeMaker( RegTree &tree,
-                          const TreeParamTrain &param, 
-                          const std::vector<float> &grad,
-                          const std::vector<float> &hess,
-                          const FMatrix &smat, 
-                          const std::vector<unsigned> &root_index, 
-                          const utils::FeatConstrain &constrain )
-                : BaseTreeMaker( tree, param ), 
-                  grad(grad), hess(hess), 
-                  smat(smat), root_index(root_index), constrain(constrain) {
-                utils::Assert( grad.size() == hess.size(), "booster:invalid input" );
-                utils::Assert( smat.NumRow() == hess.size(), "booster:invalid input" );
-                utils::Assert( root_index.size() == 0 || root_index.size() == hess.size(), "booster:invalid input" ); 
-                {// setup temp space for each thread
-                    if( param.nthread != 0 ){
-                        omp_set_num_threads( param.nthread );
-                    }
-                    #pragma omp parallel
-                    {
-                        this->nthread = omp_get_num_threads();
-                    }
-                    tmp_rptr.resize( this->nthread, std::vector<size_t>() );
-                    snode.reserve( 256 );
-                }
-            }
-            inline void Make( int& stat_max_depth, int& stat_num_pruned ){
-                this->InitData();
-                this->InitNewNode( this->qexpand );
-                stat_max_depth = 0;
-                
-                for( int depth = 0; depth < param.max_depth; ++ depth ){                                        
-                    this->FindSplit( this->qexpand, depth );
-                    this->UpdateQueueExpand( this->qexpand );
-                    this->InitNewNode( this->qexpand );
-                    // if nothing left to be expand, break
-                    if( qexpand.size() == 0 ) break;
-                    stat_max_depth = depth + 1;
-                }
-                // set all the rest expanding nodes to leaf
-                for( size_t i = 0; i < qexpand.size(); ++ i ){
-                    const int nid = qexpand[i];
-                    tree[ nid ].set_leaf( snode[nid].weight * param.learning_rate );
-                }
-                // start prunning the tree
-                stat_num_pruned = this->DoPrune();
-            }
-            // expand a specific node
-            inline bool Expand( const std::vector<bst_uint> &valid_index, int nid ){
-                if( valid_index.size() == 0 ) return false;
-                this->InitDataExpand( valid_index, nid );
-                this->InitNewNode( this->qexpand );
-                this->FindSplit( nid, tmp_rptr[0] );
-
-                // update node statistics
-                for( size_t i = 0; i < qexpand.size(); ++ i ){
-                    const int nid = qexpand[i];
-                    tree.stat( nid ).loss_chg = snode[ nid ].best.loss_chg;
-                    tree.stat( nid ).sum_hess = static_cast<float>( snode[ nid ].sum_hess );
-                }
-                // change the leaf
-                this->UpdateQueueExpand( this->qexpand );
-                this->InitNewNode( this->qexpand );
-                
-                // set all the rest expanding nodes to leaf
-                for( size_t i = 0; i < qexpand.size(); ++ i ){
-                    const int nid = qexpand[i];
-                    
-                    tree[ nid ].set_leaf( snode[nid].weight * param.learning_rate );
-                    tree.stat( nid ).loss_chg = 0.0f;
-                    tree.stat( nid ).sum_hess = static_cast<float>( snode[ nid ].sum_hess );
-                    tree.param.max_depth = std::max( tree.param.max_depth, tree.GetDepth( nid ) );
-                }
-                if( qexpand.size() != 0 ) {
-                    return true;
-                }else{
-                    return false;
-                }
-            }
-            // collapse specific node
-            inline void Collapse( const std::vector<bst_uint> &valid_index, int nid ){
-                if( valid_index.size() == 0 ) return;
-                this->InitDataExpand( valid_index, nid );
-                this->InitNewNode( this->qexpand );
-                tree.stat( nid ).loss_chg = 0.0f;
-                tree.stat( nid ).sum_hess = static_cast<float>( snode[ nid ].sum_hess );
-                tree.CollapseToLeaf( nid, snode[nid].weight * param.learning_rate );
-            }
-        private:
-            // make leaf nodes for all qexpand, update node statistics, mark leaf value
-            inline void InitNewNode( const std::vector<int> &qexpand ){
-                snode.resize( tree.param.num_nodes, NodeEntry() );
-
-                for( size_t j = 0; j < qexpand.size(); ++j ){
-                    const int nid = qexpand[ j ];
-                    double sum_grad = 0.0, sum_hess = 0.0;
-
-                    for( bst_uint i = node_bound[nid].first; i < node_bound[nid].second; ++i ){
-                        const bst_uint ridx = row_index_set[i];
-                        sum_grad += grad[ridx]; sum_hess += hess[ridx];
-                    }
-                    // update node statistics
-                    snode[nid].sum_grad = sum_grad; 
-                    snode[nid].sum_hess = sum_hess;
-
-                    snode[nid].root_gain = param.CalcRootGain( sum_grad, sum_hess );
-                    if( !tree[nid].is_root() ){
-                        snode[nid].weight = param.CalcWeight( sum_grad, sum_hess, tree.stat( tree[nid].parent() ).base_weight );
-                        tree.stat(nid).base_weight = snode[nid].weight;
-                    }else{
-                        snode[nid].weight = param.CalcWeight( sum_grad, sum_hess, 0.0f );
-                        tree.stat(nid).base_weight = snode[nid].weight;
-                    }
-                }
-            }
-        private:
-            // enumerate the split values of specific feature
-            template<typename Iter>
-            inline void EnumerateSplit( Iter it, SplitEntry &best, const int nid, const unsigned fid, bool is_forward_search ){
-                float last_fvalue = 0.0f;
-                double sum_hess = 0.0, sum_grad = 0.0;
-                const NodeEntry enode = snode[ nid ];
-
-                while( it.Next() ){
-                    const bst_uint ridx = it.rindex();
-                    const float fvalue = it.fvalue();           
-                    
-                    if( sum_hess == 0.0 ){
-                        sum_grad = grad[ ridx ];
-                        sum_hess = hess[ ridx ];
-                        last_fvalue = fvalue;
-                    }else{
-                        // try to find a split
-                        if( fabsf(fvalue - last_fvalue) > rt_2eps && sum_hess >= param.min_child_weight ){
-                            const double csum_hess = enode.sum_hess - sum_hess;
-                            if( csum_hess >= param.min_child_weight ){
-                                const double csum_grad = enode.sum_grad - sum_grad; 
-                                const double loss_chg = 
-                                    + param.CalcGain(  sum_grad,  sum_hess, enode.weight ) 
-                                    + param.CalcGain( csum_grad, csum_hess, enode.weight )
-                                    - enode.root_gain;
-                                best.Update( loss_chg, fid, (fvalue + last_fvalue) * 0.5f, !is_forward_search );
-                            }else{
-                                // the rest part doesn't meet split condition anyway, return 
-                                return;
-                            }
-                        }
-                        // update the statistics
-                        sum_grad += grad[ ridx ];
-                        sum_hess += hess[ ridx ];
-                        last_fvalue = fvalue;
-                    }                    
-                }
-
-                const double csum_hess = enode.sum_hess - sum_hess;
-                if( sum_hess >= param.min_child_weight && csum_hess >= param.min_child_weight ){
-                    const double csum_grad = enode.sum_grad - sum_grad; 
-                    const double loss_chg = 
-                        + param.CalcGain(   sum_grad,   sum_hess, enode.weight ) 
-                        + param.CalcGain(  csum_grad,  csum_hess, enode.weight )
-                        - snode[nid].root_gain;
-                    const float delta = is_forward_search ? rt_eps:-rt_eps;
-                    best.Update( loss_chg, fid, last_fvalue + delta, !is_forward_search );
-                }
-            }
-        private:
-            inline void FindSplit( const std::vector<int> &qexpand, int depth ){
-                int nexpand = (int)qexpand.size();
-                if( depth < 3 ){ 
-                    for( int i = 0; i < nexpand; ++ i ){
-                        this->FindSplit( qexpand[i], tmp_rptr[0] );
-                    }
-                }else{
-                    // if get to enough depth, parallelize over node
-                    #pragma omp parallel for schedule(dynamic,1)
-                    for( int i = 0; i < nexpand; ++ i ){
-                        const int tid = omp_get_thread_num();
-                        utils::Assert( tid < (int)tmp_rptr.size(), "BUG: FindSplit, tid exceed tmp_rptr size" );
-                        this->FindSplit( qexpand[i], tmp_rptr[tid] );
-                    }
-                }
-            }
-        private:
-            inline void MakeSplit( int nid, unsigned gid ){
-                node_bound.resize( tree.param.num_nodes );
-                // re-organize the row_index_set after split on nid
-                const unsigned split_index = tree[nid].split_index();
-                const float    split_value = tree[nid].split_cond();
-
-                std::vector<bst_uint> right;
-                bst_uint top = node_bound[nid].first;
-                for( bst_uint i = node_bound[ nid ].first; i < node_bound[ nid ].second; ++i ){
-                    const bst_uint ridx = row_index_set[i];                    
-                    bool goleft = tree[ nid ].default_left();                    
-                    for( typename FMatrix::RowIter it = smat.GetRow(ridx,gid); it.Next(); ){
-                        if( it.findex() == split_index ){
-                            if( it.fvalue() < split_value ){
-                                goleft = true;  break;
-                            }else{
-                                goleft = false; break;
-                            }
-                        }
-                    }
-                    if( goleft ) {
-                        row_index_set[ top ++ ] = ridx;
-                    }else{
-                        right.push_back( ridx );
-                    }
-                }
-                node_bound[ tree[nid].cleft() ]  = std::make_pair( node_bound[nid].first, top );
-                node_bound[ tree[nid].cright() ] = std::make_pair( top, node_bound[nid].second );
-
-                utils::Assert( node_bound[nid].second - top == (bst_uint)right.size(), "BUG:MakeSplit" );
-                for( size_t i = 0; i < right.size(); ++ i ){
-                    row_index_set[ top ++ ] = right[ i ];
-                }
-            }
-                        
-            // find splits at current level
-            inline void FindSplit( int nid, std::vector<size_t> &tmp_rptr ){
-                if( tmp_rptr.size() == 0 ){
-                    tmp_rptr.resize( tree.param.num_feature + 1, 0 );
-                }
-                const bst_uint begin = node_bound[ nid ].first;
-                const bst_uint end   = node_bound[ nid ].second;
-                const unsigned ncgroup = smat.NumColGroup();
-                unsigned best_group = 0;
-
-                for( unsigned gid = 0; gid < ncgroup; ++gid ){
-                    // records the columns
-                    std::vector<FMatrixS::REntry> centry;
-                    // records the active features
-                    std::vector<size_t>  aclist;
-                    utils::SparseCSRMBuilder<FMatrixS::REntry,true> builder( tmp_rptr, centry, aclist );
-                    builder.InitBudget( tree.param.num_feature );
-                    for( bst_uint i = begin; i < end; ++i ){
-                        const bst_uint ridx = row_index_set[i];
-                        for( typename FMatrix::RowIter it = smat.GetRow(ridx,gid); it.Next(); ){
-                            const bst_uint findex = it.findex();
-                            if( constrain.NotBanned( findex ) ) builder.AddBudget( findex );
-                        }
-                    }
-                    builder.InitStorage();
-                    for( bst_uint i = begin; i < end; ++i ){
-                        const bst_uint ridx = row_index_set[i];
-                        for( typename FMatrix::RowIter it = smat.GetRow(ridx,gid); it.Next(); ){
-                            const bst_uint findex = it.findex();
-                            if( constrain.NotBanned( findex ) ) {
-                                builder.PushElem( findex, FMatrixS::REntry( ridx, it.fvalue() ) );
-                            }
-                        }
-                    }
-                    // --- end of building column major matrix ---                    
-                    // after this point, tmp_rptr and entry is ready to use                    
-                    int naclist = (int)aclist.size();
-                    // best entry for each thread
-                    SplitEntry nbest, tbest;
-                    #pragma omp parallel private(tbest)
-                    { 
-                        #pragma omp for schedule(dynamic,1)
-                        for( int j = 0; j < naclist; ++j ){
-                            bst_uint findex = static_cast<bst_uint>( aclist[j] );
-                            // local sort can be faster when the features are sparse
-                            std::sort( centry.begin() + tmp_rptr[findex], centry.begin() + tmp_rptr[findex+1], FMatrixS::REntry::cmp_fvalue );
-                            if( param.need_forward_search() ){
-                                this->EnumerateSplit( FMatrixS::ColIter( &centry[tmp_rptr[findex]]-1, &centry[tmp_rptr[findex+1]] - 1 ),
-                                                      tbest, nid, findex, true );
-                            }
-                            if( param.need_backward_search() ){
-                                this->EnumerateSplit( FMatrixS::ColBackIter( &centry[tmp_rptr[findex+1]], &centry[tmp_rptr[findex]] ),
-                                                      tbest, nid, findex, false );
-                            }
-                        }
-                        #pragma omp critical 
-                        {
-                            nbest.Update( tbest );
-                        }
-                    }
-                    // if current solution gives the best 
-                    if( snode[nid].best.Update( nbest ) ){
-                        best_group = gid;
-                    }
-                    // cleanup tmp_rptr for next usage
-                    builder.Cleanup();    
-                }
-                
-                // at this point, we already know the best split
-                if( snode[nid].best.loss_chg > rt_eps ){
-                    const SplitEntry &e = snode[nid].best;
-                    tree.AddChilds( nid );
-                    tree[ nid ].set_split( e.split_index(), e.split_value, e.default_left() );
-                    this->MakeSplit( nid, best_group );
-                }else{
-                    tree[ nid ].set_leaf( snode[nid].weight * param.learning_rate );                    
-                }
-            }
-        private:
-            // initialize temp data structure
-            inline void InitData( void ){
-                std::vector<bst_uint> valid_index;
-                for( size_t i = 0; i < grad.size(); ++i ){
-                    if( hess[ i ] < 0.0f ) continue;
-                    if( param.subsample > 1.0f-1e-6f || random::SampleBinary( param.subsample ) != 0 ){
-                        valid_index.push_back( static_cast<bst_uint>(i) );
-                    }
-                }
-                node_bound.resize( tree.param.num_roots );
-
-                if( root_index.size() == 0 ){
-                    row_index_set = valid_index;
-                    // set bound of root node
-                    node_bound[0] = std::make_pair( 0, (bst_uint)row_index_set.size() );
-                }else{                    
-                    std::vector<size_t>   rptr;
-                    utils::SparseCSRMBuilder<bst_uint> builder( rptr, row_index_set );
-                    builder.InitBudget( tree.param.num_roots );
-                    for( size_t i = 0; i < valid_index.size(); ++i ){
-                        const bst_uint rid = valid_index[ i ];
-                        utils::Assert( root_index[ rid ] < (unsigned)tree.param.num_roots, "root id exceed number of roots" );
-                        builder.AddBudget( root_index[ rid ] );
-                    }
-                    builder.InitStorage();
-                    for( size_t i = 0; i < valid_index.size(); ++i ){
-                        const bst_uint rid = valid_index[ i ];
-                        builder.PushElem( root_index[ rid ], rid );
-                    }
-                    for( size_t i = 1; i < rptr.size(); ++ i ){
-                        node_bound[i-1] = std::make_pair( rptr[ i - 1 ], rptr[ i ] );
-                    }
-                }
-
-                {// expand query
-                    qexpand.reserve( 256 ); qexpand.clear();
-                    for( int i = 0; i < tree.param.num_roots; ++ i ){
-                        qexpand.push_back( i );
-                    }
-                }
-            }
-
-            // initialize temp data structure
-            inline void InitDataExpand( const std::vector<bst_uint> &valid_index, int nid ){
-                row_index_set = valid_index;                
-                node_bound.resize( tree.param.num_nodes );
-                node_bound[ nid ] = std::make_pair( 0, (bst_uint)row_index_set.size() );
-             
-                qexpand.clear(); qexpand.push_back( nid );
-            }
-        private:
-            // number of omp thread used during training
-            int nthread;
-            // tmp row pointer, per thread, used for tmp data construction
-            std::vector< std::vector<size_t> > tmp_rptr;
-            // Instance row indexes corresponding to each node
-            std::vector<bst_uint> row_index_set;
-            // lower and upper bound of each nodes' row_index
-            std::vector< std::pair<bst_uint, bst_uint> > node_bound;
-        private:
-            const std::vector<float> &grad;
-            const std::vector<float> &hess;
-            const FMatrix            &smat;
-            const std::vector<unsigned> &root_index;
-            const utils::FeatConstrain  &constrain;
-        };
-    };
-};
-#endif
diff --git a/booster/tree/xgboost_svdf_tree.hpp b/booster/tree/xgboost_svdf_tree.hpp
deleted file mode 100644
index c4306bcb7..000000000
--- a/booster/tree/xgboost_svdf_tree.hpp
+++ /dev/null
@@ -1,429 +0,0 @@
-#ifndef XGBOOST_APEX_TREE_HPP
-#define XGBOOST_APEX_TREE_HPP
-/*!
- * \file xgboost_svdf_tree.hpp
- * \brief implementation of regression tree constructor, with layerwise support
- *        this file is adapted from GBRT implementation in SVDFeature project
- * \author Tianqi Chen: tqchen@apex.sjtu.edu.cn, tianqi.tchen@gmail.com 
- */
-#include <algorithm>
-#include "xgboost_tree_model.h"
-#include "../../utils/xgboost_random.h"
-#include "../../utils/xgboost_matrix_csr.h"
-
-namespace xgboost{
-    namespace booster{        
-        inline void assert_sorted( unsigned *idset, int len ){
-            if( !rt_debug || !check_bug ) return;
-            for( int i = 1; i < len; i ++ ){
-                utils::Assert( idset[i-1] < idset[i], "idset not sorted" );
-            }
-        }
-    };
-    
-    namespace booster{                
-        // selecter of rtree to find the suitable candidate
-        class RTSelecter{
-        public:
-            struct Entry{
-                float  loss_chg;
-                size_t start;
-                int    len;
-                unsigned sindex;
-                float    split_value;
-                Entry(){}
-                Entry( float loss_chg, size_t start, int len, unsigned split_index, float split_value, bool default_left ){
-                    this->loss_chg = loss_chg;
-                    this->start    = start;
-                    this->len      = len;
-                    if( default_left ) split_index |= (1U << 31);
-                    this->sindex = split_index;
-                    this->split_value = split_value;
-                }
-                inline unsigned split_index( void ) const{
-                    return sindex & ( (1U<<31) - 1U );
-                }
-                inline bool default_left( void ) const{
-                    return (sindex >> 31) != 0;
-                }
-            };
-        private:
-            Entry best_entry;
-        public:
-            RTSelecter( void ){
-                memset( &best_entry, 0, sizeof(best_entry) );
-                best_entry.loss_chg = 0.0f;
-            }
-            inline void push_back( const Entry &e ){
-                if( e.loss_chg > best_entry.loss_chg ) best_entry = e;
-            }
-            inline const Entry & select( void ){            
-                return best_entry;                
-            }
-        };
-        
-        
-        // updater of rtree, allows the parameters to be stored inside, key solver
-        template<typename FMatrix>
-        class RTreeUpdater{
-        protected:
-            // training task, element of single task
-            struct Task{
-                // node id in tree
-                int nid;
-                // idset pointer, instance id in [idset,idset+len)
-                unsigned *idset;
-                // length of idset
-                unsigned len;            
-                // base_weight of parent
-                float parent_base_weight;
-                Task(){}
-                Task( int nid, unsigned *idset, unsigned len, float pweight = 0.0f ){
-                    this->nid = nid;
-                    this->idset = idset;
-                    this->len = len;
-                    this->parent_base_weight = pweight;
-                }
-            };
-            
-            // sparse column entry
-            struct SCEntry{
-                // feature value 
-                float    fvalue;
-                // row index in grad
-                unsigned rindex;
-                SCEntry(){}
-                SCEntry( float fvalue, unsigned rindex ){
-                    this->fvalue = fvalue; this->rindex = rindex;
-                }
-                inline bool operator<( const SCEntry &p ) const{
-                    return fvalue < p.fvalue;
-                }
-            };
-        private:
-            // training parameter
-            const TreeParamTrain &param;
-            // parameters, reference
-            RegTree &tree;
-            std::vector<float> &grad;
-            std::vector<float> &hess;
-            const FMatrix &smat;
-            const std::vector<unsigned> &group_id;
-        private:
-            // maximum depth up to now
-            int max_depth;
-            // number of nodes being pruned
-            int num_pruned;
-            // stack to store current task
-            std::vector<Task> task_stack;
-            // temporal space for index set
-            std::vector<unsigned> idset;
-        private:
-            // task management: NOTE DFS here
-            inline void add_task( Task tsk ){
-                task_stack.push_back( tsk );
-            }
-            inline bool next_task( Task &tsk ){
-                if( task_stack.size() == 0 ) return false;
-                tsk = task_stack.back(); 
-                task_stack.pop_back();
-                return true;
-            } 
-        private:
-            // try to prune off current leaf, return true if successful
-            inline void try_prune_leaf( int nid, int depth ){
-                if( tree[ nid ].is_root() ) return;
-                int pid = tree[ nid ].parent();
-                RegTree::NodeStat &s = tree.stat( pid );
-                s.leaf_child_cnt ++;
-                
-                if( s.leaf_child_cnt >= 2 && param.need_prune( s.loss_chg, depth - 1 ) ){
-                    // need to be pruned
-                    tree.ChangeToLeaf( pid, param.learning_rate * s.base_weight );
-                    // add statistics to number of nodes pruned
-                    num_pruned += 2;
-                    // tail recursion
-                    this->try_prune_leaf( pid, depth - 1 );
-                }
-            }
-            // make leaf for current node :)
-            inline void make_leaf( Task tsk, double sum_grad, double sum_hess, bool compute ){
-                for( unsigned i = 0; i < tsk.len; i ++ ){
-                    const unsigned ridx = tsk.idset[i];
-                    if( compute ){
-                        sum_grad += grad[ ridx ];
-                        sum_hess += hess[ ridx ];
-                    }
-                }
-                tree.stat( tsk.nid ).sum_hess = static_cast<float>( sum_hess );
-                tree[ tsk.nid ].set_leaf( param.learning_rate * param.CalcWeight( sum_grad, sum_hess, tsk.parent_base_weight ) );
-                this->try_prune_leaf( tsk.nid, tree.GetDepth( tsk.nid ) );
-            }
-        private:
-            // make split for current task, re-arrange positions in idset
-            inline void make_split( Task tsk, const SCEntry *entry, int num, float loss_chg, double sum_hess, double base_weight ){
-                // before split, first prepare statistics
-                RegTree::NodeStat &s = tree.stat( tsk.nid );
-                s.loss_chg = loss_chg; 
-                s.leaf_child_cnt = 0;
-                s.sum_hess = static_cast<float>( sum_hess );
-                s.base_weight = static_cast<float>( base_weight );
-                
-                // add childs to current node
-                tree.AddChilds( tsk.nid );
-                // assert that idset is sorted
-                assert_sorted( tsk.idset, tsk.len );
-                // use merge sort style to get the solution
-                std::vector<unsigned> qset;
-                for( int i = 0; i < num; i ++ ){
-                    qset.push_back( entry[i].rindex );
-                }
-                std::sort( qset.begin(), qset.end() );
-                // do merge sort style, make the other set, remove elements in qset
-                for( unsigned i = 0, top = 0; i < tsk.len; i ++ ){
-                    if( top < qset.size() ){
-                        if( tsk.idset[ i ] != qset[ top ] ){
-                            tsk.idset[ i - top ] = tsk.idset[ i ];
-                        }else{
-                            top ++;
-                        }
-                    }else{
-                        tsk.idset[ i - qset.size() ] = tsk.idset[ i ];
-                    }
-                }
-                // get two parts 
-                RegTree::Node &n = tree[ tsk.nid ];
-                Task def_part( n.default_left() ? n.cleft() : n.cright(), tsk.idset, tsk.len - qset.size(), s.base_weight );
-                Task spl_part( n.default_left() ? n.cright(): n.cleft() , tsk.idset + def_part.len, qset.size(), s.base_weight );  
-                // fill back split part
-                for( unsigned i = 0; i < spl_part.len; i ++ ){
-                    spl_part.idset[ i ] = qset[ i ];
-                }
-                // add tasks to the queue
-                this->add_task( def_part ); 
-                this->add_task( spl_part );
-            }
-            
-            // enumerate split point of the tree
-            inline void enumerate_split( RTSelecter &sglobal, int tlen,
-                                         double rsum_grad, double rsum_hess, double root_gain,
-                                         const SCEntry *entry, size_t start, size_t end, 
-                                         int findex, float parent_base_weight ){
-                // local selecter
-                RTSelecter slocal;
-                
-                if( param.need_forward_search() ){
-                    // forward process, default right
-                    double csum_grad = 0.0, csum_hess = 0.0;
-                    for( size_t j = start; j < end; j ++ ){
-                        const unsigned ridx = entry[ j ].rindex;
-                        csum_grad += grad[ ridx ];
-                        csum_hess += hess[ ridx ];
-                        // check for split
-                        if( j == end - 1 || entry[j].fvalue + rt_2eps < entry[ j + 1 ].fvalue ){
-                            if( csum_hess < param.min_child_weight ) continue;
-                            const double dsum_hess = rsum_hess - csum_hess;
-                            if( dsum_hess < param.min_child_weight ) break;                        
-                            // change of loss 
-                            double loss_chg = 
-                            param.CalcGain( csum_grad, csum_hess, parent_base_weight ) + 
-                                param.CalcGain( rsum_grad - csum_grad, dsum_hess, parent_base_weight ) - root_gain;
-                            
-                            const int clen = static_cast<int>( j + 1 - start );
-                            // add candidate to selecter
-                            slocal.push_back( RTSelecter::Entry( loss_chg, start, clen, findex, 
-                                                                 j == end - 1 ? entry[j].fvalue + rt_eps : 0.5 * (entry[j].fvalue+entry[j+1].fvalue),
-                                                                 false ) );
-                        }
-                    }
-                }
-                
-                if( param.need_backward_search() ){
-                    // backward process, default left
-                    double csum_grad = 0.0, csum_hess = 0.0;
-                    for( size_t j = end; j > start; j -- ){
-                        const unsigned ridx = entry[ j - 1 ].rindex;
-                        csum_grad += grad[ ridx ];
-                        csum_hess += hess[ ridx ];
-                        // check for split
-                        if( j == start + 1 || entry[ j - 2 ].fvalue + rt_2eps < entry[ j - 1 ].fvalue ){
-                            if( csum_hess < param.min_child_weight ) continue;
-                            const double dsum_hess = rsum_hess - csum_hess;
-                            if( dsum_hess < param.min_child_weight ) break;
-                            double loss_chg = param.CalcGain( csum_grad, csum_hess, parent_base_weight ) + 
-                                param.CalcGain( rsum_grad - csum_grad, dsum_hess, parent_base_weight ) - root_gain;
-                            const int clen = static_cast<int>( end - j + 1 );
-                            // add candidate to selecter
-                            slocal.push_back( RTSelecter::Entry( loss_chg, j - 1, clen, findex,
-                                                                 j == start + 1 ? entry[j-1].fvalue - rt_eps : 0.5 * (entry[j-2].fvalue + entry[j-1].fvalue), 
-                                                                 true ) );
-                        }
-                    }
-                }
-                sglobal.push_back( slocal.select() );
-            }
-            
-        private:
-            // temporal storage for expand column major
-            std::vector<size_t>  tmp_rptr;        
-            // find split for current task, another implementation of expand in column major manner
-            // should be more memory frugal, avoid global sorting across feature       
-            inline void expand( Task tsk ){
-                // assert that idset is sorted
-                // if reach maximum depth, make leaf from current node
-                int depth = tree.GetDepth( tsk.nid );
-                // update statistiss
-                if( depth > max_depth ) max_depth = depth; 
-                // if bigger than max depth
-                if( depth >= param.max_depth ){
-                    this->make_leaf( tsk, 0.0, 0.0, true ); return;
-                }
-                // convert to column major CSR format
-                const int nrows = tree.param.num_feature;
-                if( tmp_rptr.size() == 0 ){
-                    // initialize tmp storage in first usage
-                    tmp_rptr.resize( nrows + 1 ); 
-                    std::fill( tmp_rptr.begin(), tmp_rptr.end(), 0 );
-                }
-                // records the columns
-                std::vector<SCEntry> entry;
-                // records the active features
-                std::vector<size_t>  aclist;
-                utils::SparseCSRMBuilder<SCEntry,true> builder( tmp_rptr, entry, aclist );
-                builder.InitBudget( nrows );
-                // statistics of root
-                double rsum_grad = 0.0, rsum_hess = 0.0;            
-                for( unsigned i = 0; i < tsk.len; i ++ ){
-                    const unsigned ridx = tsk.idset[i];
-                    rsum_grad  += grad[ ridx ];
-                    rsum_hess  += hess[ ridx ];
-                    
-                    for( typename FMatrix::RowIter it = smat.GetRow(ridx); it.Next(); ){
-                        builder.AddBudget( it.findex() );
-                    }
-                }
-                
-                // if minimum split weight is not meet
-                if( param.cannot_split( rsum_hess, depth )  ){
-                    this->make_leaf( tsk, rsum_grad, rsum_hess, false ); builder.Cleanup(); return; 
-                }
-                
-                builder.InitStorage();
-                for( unsigned i = 0; i < tsk.len; i ++ ){
-                    const unsigned ridx = tsk.idset[i];
-                    for( typename FMatrix::RowIter it = smat.GetRow(ridx); it.Next(); ){
-                        builder.PushElem( it.findex(), SCEntry( it.fvalue(), ridx ) );
-                    }
-                }
-                // --- end of building column major matrix ---
-                // after this point, tmp_rptr and entry is ready to use
-                
-                // global selecter
-                RTSelecter sglobal;
-                // gain root 
-                const double root_gain = param.CalcRootGain( rsum_grad, rsum_hess );
-                // KEY: layerwise, weight of current node if it is leaf
-                const double base_weight = param.CalcWeight( rsum_grad, rsum_hess, tsk.parent_base_weight );
-                // enumerate feature index
-                for( size_t i = 0; i < aclist.size(); i ++ ){
-                    int findex = static_cast<int>( aclist[i] );                
-                    size_t start = tmp_rptr[ findex ];
-                    size_t end   = tmp_rptr[ findex + 1 ];
-                    utils::Assert( start < end, "bug" );
-                    // local sort can be faster when the features are sparse
-                    std::sort( entry.begin() + start, entry.begin() + end );
-                    // local selecter
-                    this->enumerate_split( sglobal, tsk.len,
-                                           rsum_grad, rsum_hess, root_gain,
-                                           &entry[0], start, end, findex, base_weight );
-                }
-                // Cleanup tmp_rptr for next use
-                builder.Cleanup();
-                // get the best solution
-                const RTSelecter::Entry &e = sglobal.select();
-                // allowed to split
-                if( e.loss_chg > rt_eps ){
-                    // add splits
-                    tree[ tsk.nid ].set_split( e.split_index(), e.split_value, e.default_left() );
-                    // re-arrange idset, push tasks
-                    this->make_split( tsk, &entry[ e.start ], e.len, e.loss_chg, rsum_hess, base_weight ); 
-                }else{
-                    // make leaf if we didn't meet requirement
-                    this->make_leaf( tsk, rsum_grad, rsum_hess, false );
-                }
-            }
-        private:
-            // initialize the tasks
-            inline void init_tasks( size_t ngrads ){
-                // add group partition if necessary
-                if( group_id.size() == 0 ){       
-                    if( param.subsample > 1.0f - 1e-6f ){ 
-                        idset.resize( 0 );
-                        for( size_t i = 0; i < ngrads; i ++ ){
-                            if( hess[i] < 0.0f ) continue;
-                            idset.push_back( (unsigned)i );
-                        } 
-                    }else{
-                        idset.resize( 0 );
-                        for( size_t i = 0; i < ngrads; i ++ ){
-                            if( random::SampleBinary( param.subsample ) != 0 ){
-                                idset.push_back( (unsigned)i );
-                            }
-                        } 
-                    }
-                    this->add_task( Task( 0, &idset[0], idset.size() ) ); return;
-                }
-                
-                utils::Assert( group_id.size() == ngrads, "number of groups must be exact" );            
-                {// new method for grouping, use CSR builder
-                    std::vector<size_t>   rptr;
-                    utils::SparseCSRMBuilder<unsigned> builder( rptr, idset );
-                    builder.InitBudget( tree.param.num_roots );
-                    for( size_t i = 0; i < group_id.size(); i ++ ){
-                        // drop invalid elements
-                        if( hess[ i ] < 0.0f ) continue;
-                        utils::Assert( group_id[ i ] < (unsigned)tree.param.num_roots, 
-                                       "group id exceed number of roots" );
-                        builder.AddBudget( group_id[ i ] );
-                    }
-                    builder.InitStorage();
-                    for( size_t i = 0; i < group_id.size(); i ++ ){
-                        // drop invalid elements
-                        if( hess[ i ] < 0.0f ) continue;
-                        builder.PushElem( group_id[ i ], static_cast<unsigned>(i) );
-                    }
-                    for( size_t i = 1; i < rptr.size(); i ++ ){
-                        const size_t start = rptr[ i - 1 ], end = rptr[ i ];
-                        if( start < end ){
-                            this->add_task( Task( i - 1, &idset[ start ], end - start ) );
-                        }
-                    }
-                }
-            }
-        public:
-            RTreeUpdater( const TreeParamTrain &pparam, 
-                          RegTree &ptree,
-                          std::vector<float> &pgrad,
-                          std::vector<float> &phess,
-                          const FMatrix &psmat, 
-                          const std::vector<unsigned> &pgroup_id ):
-                param( pparam ), tree( ptree ), grad( pgrad ), hess( phess ),
-                smat( psmat ), group_id( pgroup_id ){
-            }
-            inline int do_boost( int &num_pruned ){
-                this->init_tasks( grad.size() );
-                this->max_depth = 0;
-                this->num_pruned = 0;
-                Task tsk;
-                while( this->next_task( tsk ) ){
-                    this->expand( tsk );
-                }
-                num_pruned = this->num_pruned;
-                return max_depth;
-            }
-        };
-    };
-};
-#endif
-
-
diff --git a/booster/tree/xgboost_tree.hpp b/booster/tree/xgboost_tree.hpp
deleted file mode 100644
index 7c4f740cc..000000000
--- a/booster/tree/xgboost_tree.hpp
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef XGBOOST_TREE_HPP
-#define XGBOOST_TREE_HPP
-/*!
- * \file xgboost_tree.hpp
- * \brief implementation of regression tree
- * \author Tianqi Chen: tianqi.tchen@gmail.com 
- */
-#include "xgboost_tree_model.h"
-
-namespace xgboost{
-    namespace booster{
-        const bool rt_debug = false;
-        // whether to check bugs
-        const bool check_bug = false;
-    
-        const float rt_eps = 1e-5f;
-        const float rt_2eps = rt_eps * 2.0f;
-        
-        inline double sqr( double a ){
-            return a * a;
-        }
-    };
-};
-#include "../../utils/xgboost_fmap.h"
-#include "xgboost_svdf_tree.hpp"
-#include "xgboost_col_treemaker.hpp"
-#include "xgboost_row_treemaker.hpp"
-
-namespace xgboost{
-    namespace booster{
-        // regression tree, construction algorithm is seperated from this class
-        // see RegTreeUpdater
-        template<typename FMatrix>
-        class RegTreeTrainer : public InterfaceBooster<FMatrix>{
-        public:
-            RegTreeTrainer( void ){ 
-                silent = 0; tree_maker = 1; 
-                // interact mode
-                interact_type = 0;
-                interact_node = 0;
-                // normally we won't have more than 64 OpenMP threads
-                threadtemp.resize( 64, ThreadEntry() );
-            }
-            virtual ~RegTreeTrainer( void ){}
-        public:
-            virtual void SetParam( const char *name, const char *val ){
-                if( !strcmp( name, "silent") )      silent = atoi( val );
-                if( !strcmp( name, "tree_maker") )  tree_maker = atoi( val );
-                if( !strncmp( name, "interact:", 9) ){
-                    const char *ename = name + 9;
-                    interact_node = atoi( val );
-                    if( !strcmp( ename, "expand") ) {
-                        interact_type = 1;
-                    }
-                    if( !strcmp( ename, "remove") ) {
-                        interact_type = 2;
-                    }                    
-                }
-                param.SetParam( name, val );
-                constrain.SetParam( name, val );
-                tree.param.SetParam( name, val );
-            }
-            virtual void LoadModel( utils::IStream &fi ){
-                tree.LoadModel( fi );
-            }
-            virtual void SaveModel( utils::IStream &fo ) const{
-                tree.SaveModel( fo );
-            }
-            virtual void InitModel( void ){
-                tree.InitModel();
-            }
-        public:
-            virtual void DoBoost( std::vector<float> &grad, 
-                                  std::vector<float> &hess,
-                                  const FMatrix &smat,
-                                  const std::vector<unsigned> &root_index ){
-                utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
-
-                // interactive update 
-                if( interact_type != 0 ){
-                    switch( interact_type ){
-                    case 1: this->ExpandNode( grad, hess, smat, root_index, interact_node ); return;
-                    case 2: this->CollapseNode( grad, hess, smat, root_index, interact_node ); return;
-                    default: utils::Error("unknown interact type");
-                    }
-                }
-
-                if( !silent ){
-                    printf( "\nbuild GBRT with %u instances\n", (unsigned)grad.size() );
-                }
-                int num_pruned;
-                switch( tree_maker ){
-                case 0: {
-                    utils::Assert( !constrain.HasConstrain(), "tree maker 0 does not support constrain" );
-                    RTreeUpdater<FMatrix> updater( param, tree, grad, hess, smat, root_index );
-                    tree.param.max_depth = updater.do_boost( num_pruned );
-                    break;
-                }
-                case 1:{
-                    ColTreeMaker<FMatrix> maker( tree, param, grad, hess, smat, root_index, constrain );
-                    maker.Make( tree.param.max_depth, num_pruned );
-                    break;
-                }
-                case 2:{
-                    RowTreeMaker<FMatrix> maker( tree, param, grad, hess, smat, root_index, constrain );
-                    maker.Make( tree.param.max_depth, num_pruned );
-                    break;
-                }                    
-                default: utils::Error("unknown tree maker");
-                }
-                if( !silent ){
-                    printf( "tree train end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n", 
-                            tree.param.num_roots, tree.num_extra_nodes(), num_pruned, tree.MaxDepth() );
-                }
-            }            
-            virtual float Predict( const FMatrix &fmat, bst_uint ridx, unsigned gid = 0 ){
-                ThreadEntry &e = this->InitTmp();
-                this->PrepareTmp( fmat.GetRow(ridx), e );
-                int pid = this->GetLeafIndex( e.feat, e.funknown, gid );
-                this->DropTmp( fmat.GetRow(ridx), e );
-                return tree[ pid ].leaf_value();          
-            }
-            virtual int GetLeafIndex( const std::vector<float> &feat,
-                                      const std::vector<bool>  &funknown,
-                                      unsigned gid = 0 ){
-                // start from groups that belongs to current data
-                int pid = (int)gid;
-                // tranverse tree
-                while( !tree[ pid ].is_leaf() ){
-                    unsigned split_index = tree[ pid ].split_index();
-                    pid = this->GetNext( pid, feat[ split_index ], funknown[ split_index ] );
-                }
-                return pid;
-            }
-
-            virtual void PredPath( std::vector<int> &path, const FMatrix &fmat, bst_uint ridx, unsigned gid = 0 ){
-                path.clear();
-                ThreadEntry &e = this->InitTmp();
-                this->PrepareTmp( fmat.GetRow(ridx), e );
-                
-                int pid = (int)gid;
-                path.push_back( pid );
-                // tranverse tree
-                while( !tree[ pid ].is_leaf() ){                    
-                    unsigned split_index = tree[ pid ].split_index();
-                    pid = this->GetNext( pid, e.feat[ split_index ], e.funknown[ split_index ] );
-                    path.push_back( pid );
-                }
-                this->DropTmp( fmat.GetRow(ridx), e );
-            }
-            virtual float Predict( const std::vector<float> &feat, 
-                                   const std::vector<bool>  &funknown,
-                                   unsigned gid = 0 ){
-                utils::Assert( feat.size() >= (size_t)tree.param.num_feature,
-                               "input data smaller than num feature" );
-                int pid = this->GetLeafIndex( feat, funknown, gid );
-                return tree[ pid ].leaf_value();
-            }            
-            virtual void DumpModel( FILE *fo, const utils::FeatMap &fmap, bool with_stats ){
-                tree.DumpModel( fo, fmap, with_stats );
-            }
-        private:
-            inline void CollapseNode( std::vector<float> &grad, 
-                                      std::vector<float> &hess,
-                                      const FMatrix &fmat,
-                                      const std::vector<unsigned> &root_index, 
-                                      int nid ){
-                std::vector<bst_uint> valid_index;
-                for( size_t i = 0; i < grad.size(); i ++ ){
-                    ThreadEntry &e = this->InitTmp();
-                    this->PrepareTmp( fmat.GetRow(i), e );
-                    int pid = root_index.size() == 0 ? 0 : (int)root_index[i];
-                    // tranverse tree
-                    while( !tree[ pid ].is_leaf() ){                    
-                        unsigned split_index = tree[ pid ].split_index();
-                        pid = this->GetNext( pid, e.feat[ split_index ], e.funknown[ split_index ] );
-                        if( pid == nid ){
-                            valid_index.push_back( static_cast<bst_uint>(i) );  break;
-                        }
-                    }
-                    this->DropTmp( fmat.GetRow(i), e );
-                }
-                RowTreeMaker<FMatrix> maker( tree, param, grad, hess, fmat, root_index, constrain ); 
-                maker.Collapse( valid_index, nid );
-                if( !silent ){
-                    printf( "tree collapse end, max_depth=%d\n", tree.param.max_depth );
-                }                
-            }
-            inline void ExpandNode( std::vector<float> &grad, 
-                                    std::vector<float> &hess,
-                                    const FMatrix &fmat,
-                                    const std::vector<unsigned> &root_index, 
-                                    int nid ){
-                std::vector<bst_uint> valid_index;
-                for( size_t i = 0; i < grad.size(); i ++ ){
-                    ThreadEntry &e = this->InitTmp();
-                    this->PrepareTmp( fmat.GetRow(i), e );
-                    unsigned rtidx = root_index.size() == 0 ? 0 : root_index[i]; 
-                    int pid = this->GetLeafIndex( e.feat, e.funknown, rtidx );
-                    this->DropTmp( fmat.GetRow(i), e );
-                    if( pid == nid ) valid_index.push_back( static_cast<bst_uint>(i) ); 
-                }
-                RowTreeMaker<FMatrix> maker( tree, param, grad, hess, fmat, root_index, constrain ); 
-                bool success =  maker.Expand( valid_index, nid );
-                if( !silent ){
-                    printf( "tree expand end, success=%d, max_depth=%d\n", (int)success, tree.MaxDepth() );
-                }                
-            }
-        private:
-            // silent 
-            int silent;
-            RegTree tree;
-            TreeParamTrain param;
-        private:
-            // some training parameters
-            // tree maker
-            int tree_maker;
-            // interaction
-            int interact_type;
-            int interact_node;         
-            // feature constrain
-            utils::FeatConstrain  constrain;   
-        private:
-            struct ThreadEntry{
-                std::vector<float> feat;
-                std::vector<bool>  funknown;
-            };
-            std::vector<ThreadEntry> threadtemp;
-        private:
-            inline ThreadEntry& InitTmp( void ){
-                const int tid = omp_get_thread_num();
-                utils::Assert( tid < (int)threadtemp.size(), "RTreeUpdater: threadtemp pool is too small" );
-                ThreadEntry &e = threadtemp[ tid ];
-                if( e.feat.size() != (size_t)tree.param.num_feature ){
-                    e.feat.resize( tree.param.num_feature );
-                    e.funknown.resize( tree.param.num_feature );
-                    std::fill( e.funknown.begin(), e.funknown.end(), true );
-                }
-                return e;
-            }
-            inline void PrepareTmp( typename FMatrix::RowIter it, ThreadEntry &e ){
-                while( it.Next() ){
-                    const bst_uint findex = it.findex();
-                    utils::Assert( findex < (unsigned)tree.param.num_feature , "input feature execeed bound" );
-                    e.funknown[ findex ] = false;
-                    e.feat[ findex ] = it.fvalue();
-                } 
-            }
-            inline void DropTmp( typename FMatrix::RowIter it, ThreadEntry &e ){
-                while( it.Next() ){
-                    e.funknown[ it.findex() ] = true;
-                }
-            }
-
-            inline int GetNext( int pid, float fvalue, bool is_unknown ){
-                float split_value = tree[ pid ].split_cond();
-                if( is_unknown ){ 
-                    return tree[ pid ].cdefault();
-                }else{
-                    if( fvalue < split_value ) return tree[ pid ].cleft();
-                    else return tree[ pid ].cright();
-                }
-            }
-        };
-    };
-};
-
-#endif
diff --git a/booster/tree/xgboost_tree_model.h b/booster/tree/xgboost_tree_model.h
deleted file mode 100644
index 2b2b636e7..000000000
--- a/booster/tree/xgboost_tree_model.h
+++ /dev/null
@@ -1,554 +0,0 @@
-#ifndef XGBOOST_TREE_MODEL_H
-#define XGBOOST_TREE_MODEL_H
-/*!
- * \file xgboost_tree_model.h
- * \brief generic definition of model structure used in tree models
- *        used to support learning of boosting tree
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-#include <cstring>
-#include "../../utils/xgboost_utils.h"
-#include "../../utils/xgboost_stream.h"
-
-namespace xgboost{
-    namespace booster{
-        /*!
-         * \brief template class of TreeModel 
-         * \tparam TSplitCond data type to indicate split condition
-         * \tparam TNodeStat auxiliary statistics of node to help tree building
-         */
-        template<typename TSplitCond,typename TNodeStat>
-        class TreeModel{
-        public:
-            /*! \brief data type to indicate split condition */
-            typedef TNodeStat  NodeStat;
-            /*! \brief auxiliary statistics of node to help tree building */
-            typedef TSplitCond SplitCond;
-        public:
-            /*! \brief parameters of the tree */
-            struct Param{
-                /*! \brief number of start root */
-                int num_roots;
-                /*! \brief total number of nodes */
-                int num_nodes;
-                /*!\brief number of deleted nodes */
-                int num_deleted;
-                /*! \brief maximum depth, this is a statistics of the tree */
-                int max_depth;
-                /*! \brief  number of features used for tree construction */
-                int num_feature;
-                /*! \brief reserved part */
-                int reserved[ 32 ];
-                /*! \brief constructor */
-                Param( void ){
-                    max_depth = 0;
-                    memset( reserved, 0, sizeof( reserved ) );
-                }
-                /*! 
-                 * \brief set parameters from outside 
-                 * \param name name of the parameter
-                 * \param val  value of the parameter
-                 */
-                inline void SetParam( const char *name, const char *val ){
-                    if( !strcmp("num_roots", name ) )    num_roots = atoi( val );
-                    if( !strcmp("num_feature", name ) )  num_feature = atoi( val );
-                }
-            };
-            /*! \brief tree node */
-            class Node{
-            private:
-                friend class TreeModel<TSplitCond,TNodeStat>;
-                /*! 
-                 * \brief in leaf node, we have weights, in non-leaf nodes, 
-                 *        we have split condition 
-                 */
-                union Info{
-                    float leaf_value;
-                    TSplitCond split_cond;
-                };
-            private:
-                // pointer to parent, highest bit is used to indicate whether it's a left child or not 
-                int parent_;
-                // pointer to left, right
-                int cleft_, cright_;
-                // split feature index, left split or right split depends on the highest bit
-                unsigned sindex_;            
-                // extra info
-                Info info_;
-            private:
-                inline void set_parent( int pidx, bool is_left_child = true ){
-                    if( is_left_child ) pidx |= (1U << 31);
-                    this->parent_ = pidx;
-                }
-            public:
-                /*! \brief index of left child */
-                inline int cleft( void ) const{
-                    return this->cleft_;
-                }
-                /*! \brief index of right child */
-                inline int cright( void ) const{
-                    return this->cright_;
-                }
-                /*! \brief index of default child when feature is missing */
-                inline int cdefault( void ) const{
-                    return this->default_left() ? this->cleft() : this->cright();
-                }
-                /*! \brief feature index of split condition */
-                inline unsigned split_index( void ) const{
-                    return sindex_ & ( (1U<<31) - 1U );
-                }
-                /*! \brief when feature is unknown, whether goes to left child */
-                inline bool default_left( void ) const{
-                    return (sindex_ >> 31) != 0;
-                } 
-                /*! \brief whether current node is leaf node */
-                inline bool is_leaf( void ) const{
-                    return cleft_ == -1;
-                }
-                /*! \brief get leaf value of leaf node */
-                inline float leaf_value( void ) const{
-                    return (this->info_).leaf_value;
-                }
-                /*! \brief get split condition of the node */
-                inline TSplitCond split_cond( void ) const{
-                    return (this->info_).split_cond;
-                }
-                /*! \brief get parent of the node */
-                inline int parent( void ) const{
-                    return parent_ & ( (1U << 31) - 1 );
-                } 
-                /*! \brief whether current node is left child */
-                inline bool is_left_child( void ) const{
-                    return ( parent_ & (1U << 31)) != 0;
-                }
-                /*! \brief whether current node is root */
-                inline bool is_root( void ) const{
-                    return parent_ == -1;
-                }
-                /*! 
-                 * \brief set the right child 
-                 * \param nide node id to right child
-                 */
-                inline void set_right_child( int nid ){
-                    this->cright_ = nid;
-                }
-                /*! 
-                 * \brief set split condition of current node 
-                 * \param split_index feature index to split
-                 * \param split_cond  split condition
-                 * \param default_left the default direction when feature is unknown
-                 */
-                inline void set_split( unsigned split_index, TSplitCond split_cond, bool default_left = false ){
-                    if( default_left ) split_index |= (1U << 31);
-                    this->sindex_ = split_index;
-                    (this->info_).split_cond = split_cond;
-                }
-                /*! 
-                 * \brief set the leaf value of the node
-                 * \param value leaf value
-                 * \param right right index, could be used to store 
-                 *        additional information
-                 */
-                inline void set_leaf( float value, int right = -1 ){
-                    (this->info_).leaf_value = value;
-                    this->cleft_ = -1;
-                    this->cright_ = right;
-                }
-            };
-        protected:
-            // vector of nodes
-            std::vector<Node> nodes;
-            // stats of nodes
-            std::vector<TNodeStat> stats;
-        protected:
-            // free node space, used during training process
-            std::vector<int>  deleted_nodes;
-            // allocate a new node, 
-            // !!!!!! NOTE: may cause BUG here, nodes.resize
-            inline int AllocNode( void ){
-                if( param.num_deleted != 0 ){
-                    int nd = deleted_nodes.back();
-                    deleted_nodes.pop_back();
-                    param.num_deleted --;
-                    return nd;
-                }
-                int nd = param.num_nodes ++;
-                nodes.resize( param.num_nodes );
-                stats.resize( param.num_nodes );
-                return nd;
-            }
-            // delete a tree node
-            inline void DeleteNode( int nid ){
-                utils::Assert( nid >= param.num_roots, "can not delete root");
-                deleted_nodes.push_back( nid );
-                nodes[ nid ].set_parent( -1 );
-                param.num_deleted ++;
-            }
-        public:
-            /*! 
-             * \brief change a non leaf node to a leaf node, delete its children
-             * \param rid node id of the node
-             * \param new leaf value
-             */
-            inline void ChangeToLeaf( int rid, float value ){
-                utils::Assert( nodes[ nodes[rid].cleft()  ].is_leaf(), "can not delete a non termial child");
-                utils::Assert( nodes[ nodes[rid].cright() ].is_leaf(), "can not delete a non termial child");
-                this->DeleteNode( nodes[ rid ].cleft() ); 
-                this->DeleteNode( nodes[ rid ].cright() );
-                nodes[ rid ].set_leaf( value );
-            }
-            /*! 
-             * \brief collapse a non leaf node to a leaf node, delete its children
-             * \param rid node id of the node
-             * \param new leaf value
-             */
-            inline void CollapseToLeaf( int rid, float value ){
-                if( nodes[rid].is_leaf() ) return;
-                if( !nodes[ nodes[rid].cleft()  ].is_leaf() ){
-                    CollapseToLeaf( nodes[rid].cleft(), 0.0f );
-                }
-                if( !nodes[ nodes[rid].cright()  ].is_leaf() ){
-                    CollapseToLeaf( nodes[rid].cright(), 0.0f );
-                }
-                this->ChangeToLeaf( rid, value );
-            }
-        public:
-            /*! \brief model parameter */
-            Param param;
-        public:
-            /*! \brief constructor */
-            TreeModel( void ){
-                param.num_nodes = 1;
-                param.num_roots = 1;
-                param.num_deleted = 0;
-                nodes.resize( 1 );
-            }
-            /*! \brief get node given nid */
-            inline Node &operator[]( int nid ){
-                return nodes[ nid ];
-            }
-            /*! \brief get node statistics given nid */
-            inline NodeStat &stat( int nid ){
-                return stats[ nid ];
-            }
-            /*! \brief initialize the model */
-            inline void InitModel( void ){
-                param.num_nodes = param.num_roots;
-                nodes.resize( param.num_nodes );
-                stats.resize( param.num_nodes );
-                for( int i = 0; i < param.num_nodes; i ++ ){
-                    nodes[i].set_leaf( 0.0f );
-                    nodes[i].set_parent( -1 );
-                }
-            }
-            /*! 
-             * \brief load model from stream
-             * \param fi input stream
-             */
-            inline void LoadModel( utils::IStream &fi ){
-                utils::Assert( fi.Read( &param, sizeof(Param) ) > 0, "TreeModel" );
-                nodes.resize( param.num_nodes ); stats.resize( param.num_nodes );
-                utils::Assert( fi.Read( &nodes[0], sizeof(Node) * nodes.size() ) > 0, "TreeModel::Node" );
-                utils::Assert( fi.Read( &stats[0], sizeof(NodeStat) * stats.size() ) > 0, "TreeModel::Node" );
-
-                deleted_nodes.resize( 0 );
-                for( int i = param.num_roots; i < param.num_nodes; i ++ ){
-                    if( nodes[i].is_root() ) deleted_nodes.push_back( i );
-                }
-                utils::Assert( (int)deleted_nodes.size() == param.num_deleted, "number of deleted nodes do not match" );
-            }
-            /*! 
-             * \brief save model to stream
-             * \param fo output stream
-             */
-            inline void SaveModel( utils::IStream &fo ) const{
-                utils::Assert( param.num_nodes == (int)nodes.size() );
-                utils::Assert( param.num_nodes == (int)stats.size() );
-                fo.Write( &param, sizeof(Param) );
-                fo.Write( &nodes[0], sizeof(Node) * nodes.size() );
-                fo.Write( &stats[0], sizeof(NodeStat) * nodes.size() );
-            }
-            /*! 
-             * \brief add child nodes to node
-             * \param nid node id to add childs
-             */
-            inline void AddChilds( int nid ){
-                int pleft  = this->AllocNode();
-                int pright = this->AllocNode();
-                nodes[ nid ].cleft_  = pleft;
-                nodes[ nid ].cright_ = pright;
-                nodes[ nodes[ nid ].cleft()  ].set_parent( nid, true );
-                nodes[ nodes[ nid ].cright() ].set_parent( nid, false );
-            }
-            /*! 
-             * \brief only add a right child to a leaf node 
-             * \param node id to add right child
-             */
-            inline void AddRightChild( int nid ){
-                int pright = this->AllocNode();
-                nodes[ nid ].right  = pright;
-                nodes[ nodes[ nid ].right  ].set_parent( nid, false );
-            }
-            /*!
-             * \brief get current depth
-             * \param nid node id
-             * \param pass_rchild whether right child is not counted in depth
-             */
-            inline int GetDepth( int nid, bool pass_rchild = false ) const{
-                int depth = 0;
-                while( !nodes[ nid ].is_root() ){
-                    if( !pass_rchild || nodes[ nid ].is_left_child() ) depth ++;
-                    nid = nodes[ nid ].parent();
-                }
-                return depth;
-            }
-            /*!
-             * \brief get maximum depth
-             * \param nid node id
-             */
-            inline int MaxDepth( int nid ) const{
-                if( nodes[nid].is_leaf() ) return 0;
-                return std::max( MaxDepth( nodes[nid].cleft() )+1, 
-                                 MaxDepth( nodes[nid].cright() )+1 );
-            }
-            /*!
-             * \brief get maximum depth
-             */
-            inline int MaxDepth( void ){
-                int maxd = 0;
-                for( int i = 0; i < param.num_roots; ++ i ){
-                    maxd = std::max( maxd, MaxDepth( i ) );
-                }
-                return maxd;
-            }
-            /*! \brief number of extra nodes besides the root */
-            inline int num_extra_nodes( void ) const {
-                return param.num_nodes - param.num_roots - param.num_deleted;
-            }
-            /*! \brief dump model to text file  */
-            inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){
-                this->Dump( 0, fo, fmap, 0, with_stats );
-            }
-        private:
-            void Dump( int nid, FILE *fo, const utils::FeatMap& fmap, int depth, bool with_stats ){
-                for( int  i = 0;  i < depth; ++ i ){
-                    fprintf( fo, "\t" );
-                }
-                if( nodes[ nid ].is_leaf() ){
-                    fprintf( fo, "%d:leaf=%f ", nid, nodes[ nid ].leaf_value() );
-                    if( with_stats ){
-                        stat( nid ).Print( fo, true );
-                    }
-                    fprintf( fo, "\n" );
-                }else{
-                    // right then left,
-                    TSplitCond cond = nodes[ nid ].split_cond();
-                    const unsigned split_index = nodes[ nid ].split_index();
-
-                    if( split_index < fmap.size() ){
-                        switch( fmap.type(split_index) ){
-                        case utils::FeatMap::kIndicator:{
-                            int nyes = nodes[ nid ].default_left()?nodes[nid].cright():nodes[nid].cleft();
-                            fprintf( fo, "%d:[%s] yes=%d,no=%d", 
-                                     nid, fmap.name( split_index ),
-                                     nyes, nodes[nid].cdefault() );
-                            break;                            
-                        }
-                        case utils::FeatMap::kInteger:{
-                            fprintf( fo, "%d:[%s<%d] yes=%d,no=%d,missing=%d", 
-                                     nid, fmap.name(split_index), int( float(cond)+1.0f), 
-                                     nodes[ nid ].cleft(), nodes[ nid ].cright(),
-                                     nodes[ nid ].cdefault() );
-                            break;
-                        }
-                        case utils::FeatMap::kFloat:
-                        case utils::FeatMap::kQuantitive:{
-                            fprintf( fo, "%d:[%s<%f] yes=%d,no=%d,missing=%d", 
-                                     nid, fmap.name(split_index), float(cond), 
-                                     nodes[ nid ].cleft(), nodes[ nid ].cright(),
-                                     nodes[ nid ].cdefault() );
-                            break;
-                        }
-                        default: utils::Error("unknown fmap type");
-                        }
-                    }else{
-                        fprintf( fo, "%d:[f%u<%f] yes=%d,no=%d,missing=%d", 
-                                 nid, split_index, float(cond), 
-                                 nodes[ nid ].cleft(), nodes[ nid ].cright(),
-                                 nodes[ nid ].cdefault() );
-                    }
-                    if( with_stats ){
-                        fprintf( fo, " ");
-                        stat( nid ).Print( fo, false );
-                    }
-                    fprintf( fo, "\n" );
-                    this->Dump( nodes[ nid ].cleft(), fo, fmap, depth+1, with_stats );
-                    this->Dump( nodes[ nid ].cright(), fo, fmap, depth+1, with_stats );
-                }                
-            } 
-        };
-    };
-    
-    namespace booster{
-        /*! \brief training parameters for regression tree */
-        struct TreeParamTrain{
-            // learning step size for a time
-            float learning_rate;
-            // minimum loss change required for a split
-            float min_split_loss;
-            // maximum depth of a tree
-            int   max_depth;
-            //----- the rest parameters are less important ----
-            // minimum amount of hessian(weight) allowed in a child
-            float min_child_weight;
-            // weight decay parameter used to control leaf fitting
-            float reg_lambda;
-            // reg method
-            int   reg_method;
-            // default direction choice
-            int   default_direction;
-            // whether we want to do subsample
-            float subsample;
-            // whether to use layerwise aware regularization
-            int   use_layerwise;
-            // number of threads to be used for tree construction, if OpenMP is enabled, if equals 0, use system default
-            int nthread;
-            /*! \brief constructor */
-            TreeParamTrain( void ){
-                learning_rate = 0.3f;
-                min_child_weight = 1.0f;
-                max_depth = 6;
-                reg_lambda = 1.0f;
-                reg_method = 2;
-                default_direction = 0;
-                subsample = 1.0f;
-                use_layerwise = 0;
-                nthread = 0;
-            }
-            /*! 
-             * \brief set parameters from outside 
-             * \param name name of the parameter
-             * \param val  value of the parameter
-             */            
-            inline void SetParam( const char *name, const char *val ){
-                // sync-names 
-                if( !strcmp( name, "gamma") )  min_split_loss = (float)atof( val );
-                if( !strcmp( name, "eta") )    learning_rate  = (float)atof( val );
-                if( !strcmp( name, "lambda") ) reg_lambda = (float)atof( val );
-                // normal tree prameters
-                if( !strcmp( name, "learning_rate") )     learning_rate = (float)atof( val );
-                if( !strcmp( name, "min_child_weight") )  min_child_weight = (float)atof( val );
-                if( !strcmp( name, "min_split_loss") )    min_split_loss = (float)atof( val );
-                if( !strcmp( name, "max_depth") )         max_depth = atoi( val );           
-                if( !strcmp( name, "reg_lambda") )        reg_lambda = (float)atof( val );
-                if( !strcmp( name, "reg_method") )        reg_method = (float)atof( val );
-                if( !strcmp( name, "subsample") )         subsample  = (float)atof( val );
-                if( !strcmp( name, "use_layerwise") )     use_layerwise = atoi( val );
-                if( !strcmp( name, "nthread") )           nthread = atoi( val );
-                if( !strcmp( name, "default_direction") ) {
-                    if( !strcmp( val, "learn") )  default_direction = 0;
-                    if( !strcmp( val, "left") )   default_direction = 1;
-                    if( !strcmp( val, "right") )  default_direction = 2;
-                }
-            }
-        protected:
-            // functions for L1 cost
-            static inline double ThresholdL1( double w, double lambda ){
-                if( w > +lambda ) return w - lambda;
-                if( w < -lambda ) return w + lambda;
-                return 0.0;
-            }
-            inline double CalcWeight( double sum_grad, double sum_hess )const{
-                if( sum_hess < min_child_weight ){
-                    return 0.0;
-                }else{
-                    switch( reg_method ){
-                    case 1: return - ThresholdL1( sum_grad, reg_lambda ) / sum_hess;
-                    case 2: return - sum_grad / ( sum_hess + reg_lambda );
-                        // elstic net
-                    case 3: return - ThresholdL1( sum_grad, 0.5 * reg_lambda ) / ( sum_hess + 0.5 * reg_lambda );
-                    default: return - sum_grad / sum_hess;
-                    }
-                }
-            }
-        private:
-            inline static double Sqr( double a ){
-                return a * a;
-            }
-        public:
-            // calculate the cost of loss function
-            inline double CalcGain( double sum_grad, double sum_hess ) const{
-                if( sum_hess < min_child_weight ){
-                    return 0.0;
-                }
-                switch( reg_method ){
-                case 1 : return Sqr( ThresholdL1( sum_grad, reg_lambda ) ) / sum_hess;
-                case 2 : return Sqr( sum_grad ) / ( sum_hess + reg_lambda );
-                    // elstic net
-                case 3 : return Sqr( ThresholdL1( sum_grad, 0.5 * reg_lambda ) ) / ( sum_hess + 0.5 * reg_lambda );
-                default: return Sqr( sum_grad ) / sum_hess;
-                }        
-            }
-            // KEY:layerwise
-            // calculate cost of root
-            inline double CalcRootGain( double sum_grad, double sum_hess ) const{
-                if( use_layerwise == 0 ) return this->CalcGain( sum_grad, sum_hess );
-                else return 0.0;
-            }
-            // KEY:layerwise
-            // calculate the cost after split
-            // base_weight: the base_weight of parent           
-            inline double CalcGain( double sum_grad, double sum_hess, double base_weight ) const{
-                if( use_layerwise == 0 ) return this->CalcGain( sum_grad, sum_hess );
-                else return this->CalcGain( sum_grad + sum_hess * base_weight, sum_hess );
-            }
-            // calculate the weight of leaf
-            inline double CalcWeight( double sum_grad, double sum_hess, double parent_base_weight )const{
-                if( use_layerwise == 0 ) return CalcWeight( sum_grad, sum_hess );
-                else return parent_base_weight + CalcWeight( sum_grad + parent_base_weight * sum_hess, sum_hess );
-            }           
-            /*! \brief whether need forward small to big search: default right */
-            inline bool need_forward_search( void ) const{
-                return this->default_direction != 1;
-            }
-            /*! \brief whether need forward big to small search: default left */
-            inline bool need_backward_search( void ) const{
-                return this->default_direction != 2;
-            }
-            /*! \brief given the loss change, whether we need to invode prunning */
-            inline bool need_prune( double loss_chg, int depth ) const{
-                return loss_chg < this->min_split_loss;
-            }
-            /*! \brief whether we can split with current hessian */
-            inline bool cannot_split( double sum_hess, int depth ) const{
-                return sum_hess < this->min_child_weight * 2.0; 
-            }
-        };
-    };
-    
-    namespace booster{
-        /*! \brief node statistics used in regression tree */
-        struct RTreeNodeStat{
-            /*! \brief loss chg caused by current split */
-            float loss_chg;
-            /*! \brief sum of hessian values, used to measure coverage of data */
-            float sum_hess;
-            /*! \brief weight of current node */
-            float base_weight;
-            /*! \brief number of child that is leaf node known up to now */
-            int   leaf_child_cnt;
-            /*! \brief print information of current stats to fo */
-            inline void Print( FILE *fo, bool is_leaf ) const{
-                if( !is_leaf ){
-                    fprintf( fo, "gain=%f,cover=%f", loss_chg, sum_hess );
-                }else{
-                    fprintf( fo, "cover=%f", sum_hess );
-                }
-            }
-        };
-        /*! \brief most comment structure of regression tree */
-        class RegTree: public TreeModel<bst_float,RTreeNodeStat>{
-        };
-    };
-};
-#endif
diff --git a/booster/xgboost-inl.hpp b/booster/xgboost-inl.hpp
deleted file mode 100644
index 95ba90d15..000000000
--- a/booster/xgboost-inl.hpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef XGBOOST_INL_HPP
-#define XGBOOST_INL_HPP
-/*!
- * \file xgboost-inl.hpp
- * \brief bootser implementations
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-// implementation of boosters go to here 
-
-// A good design should have minimum functions defined interface, user should only operate on interface
-// I break it a bit, by using template and let user 'see' the implementation
-// The user should pretend that they only can use the interface, and we are all cool
-// I find this is the only way so far I can think of to make boosters invariant of data structure, 
-// while keep everything fast
-#include "xgboost.h"
-#include "../utils/xgboost_utils.h"
-#include "tree/xgboost_tree.hpp"
-#include "linear/xgboost_linear.hpp"
-
-namespace xgboost{
-    namespace booster{
-        /*!
-         * \brief create a gradient booster, given type of booster
-         * \param booster_type type of gradient booster, can be used to specify implements
-         * \tparam FMatrix input data type for booster
-         * \return the pointer to the gradient booster created
-         */
-        template<typename FMatrix>
-        inline InterfaceBooster<FMatrix> *CreateBooster(int booster_type){
-            switch (booster_type){
-            case 0: return new RegTreeTrainer<FMatrix>();
-            case 1: return new LinearBooster<FMatrix>();
-            default: utils::Error("unknown booster_type"); return NULL;
-            }
-        }
-    }; // namespace booster
-}; // namespace xgboost
-
-#endif // XGBOOST_INL_HPP
diff --git a/booster/xgboost.h b/booster/xgboost.h
deleted file mode 100644
index 11d79b410..000000000
--- a/booster/xgboost.h
+++ /dev/null
@@ -1,157 +0,0 @@
-#ifndef XGBOOST_H
-#define XGBOOST_H
-/*!
- * \file xgboost.h
- * \brief the general gradient boosting interface
- *
- *   common practice of this header: use IBooster and CreateBooster<FMatrixS>
- *
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-#include <vector>
-#include "../utils/xgboost_utils.h"
-#include "../utils/xgboost_fmap.h"
-#include "../utils/xgboost_stream.h"
-#include "../utils/xgboost_config.h"
-#include "xgboost_data.h"
-
-/*! \brief namespace for xboost package */
-namespace xgboost{
-    /*! \brief namespace for boosters */
-    namespace booster{
-        /*!
-         * \brief interface of a gradient boosting learner
-         * \tparam FMatrix the feature matrix format that the booster takes
-         */
-        template<typename FMatrix>
-        class InterfaceBooster{
-        public:
-            // interface for model setting and loading
-            // calling procedure:
-            //  (1) booster->SetParam to setting necessary parameters
-            //  (2) if it is first time usage of the model: 
-            //          call booster->InitModel
-            //      else: 
-            //          call booster->LoadModel
-            //  (3) booster->DoBoost to update the model
-            //  (4) booster->Predict to get new prediction
-            /*!
-             * \brief set parameters from outside
-             * \param name name of the parameter
-             * \param val  value of the parameter
-             */
-            virtual void SetParam(const char *name, const char *val) = 0;
-            /*!
-             * \brief load model from stream
-             * \param fi input stream
-             */
-            virtual void LoadModel(utils::IStream &fi) = 0;
-            /*!
-             * \brief save model to stream
-             * \param fo output stream
-             */
-            virtual void SaveModel(utils::IStream &fo) const = 0;
-            /*!
-             * \brief initialize solver before training, called before training
-             * this function is reserved for solver to allocate necessary space and do other preparation
-             */
-            virtual void InitModel(void) = 0;
-        public:
-            /*!
-             * \brief do gradient boost training for one step, using the information given,
-             *        Note: content of grad and hess can change after DoBoost
-             * \param grad first order gradient of each instance
-             * \param hess second order gradient of each instance
-             * \param feats features of each instance
-             * \param root_index pre-partitioned root index of each instance,
-             *          root_index.size() can be 0 which indicates that no pre-partition involved
-             */
-            virtual void DoBoost(std::vector<float> &grad,
-                std::vector<float> &hess,
-                const FMatrix &feats,
-                const std::vector<unsigned> &root_index) = 0;
-            /*!
-             * \brief predict the path ids along a trees, for given sparse feature vector. When booster is a tree
-             * \param path the result of path
-             * \param feats feature matrix
-             * \param row_index  row index in the feature matrix
-             * \param root_index root id of current instance, default = 0
-             */
-            virtual void PredPath(std::vector<int> &path, const FMatrix &feats,
-                bst_uint row_index, unsigned root_index = 0){
-                utils::Error("not implemented");
-            }
-            /*!
-             * \brief predict values for given sparse feature vector
-             *
-             *   NOTE: in tree implementation, Sparse Predict is OpenMP threadsafe, but not threadsafe in general,
-             *         dense version of Predict to ensures threadsafety
-             * \param feats feature matrix
-             * \param row_index  row index in the feature matrix
-             * \param root_index root id of current instance, default = 0
-             * \return prediction
-             */
-            virtual float Predict(const FMatrix &feats, bst_uint row_index, unsigned root_index = 0){
-                utils::Error("not implemented");
-                return 0.0f;
-            }
-            /*!
-             * \brief predict values for given dense feature vector
-             * \param feat feature vector in dense format
-             * \param funknown indicator that the feature is missing
-             * \param rid root id of current instance, default = 0
-             * \return prediction
-             */
-            virtual float Predict(const std::vector<float> &feat,
-                const std::vector<bool>  &funknown,
-                unsigned rid = 0){
-                utils::Error("not implemented");
-                return 0.0f;
-            }
-            /*!
-             * \brief print information
-             * \param fo output stream
-             */
-            virtual void PrintInfo(FILE *fo){}
-            /*!
-             * \brief dump model into text file
-             * \param fo output stream
-             * \param fmap feature map that may help give interpretations of feature
-             * \param with_stats whether print statistics
-             */
-            virtual void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats = false){
-                utils::Error("not implemented");
-            }
-        public:
-            /*! \brief virtual destructor */
-            virtual ~InterfaceBooster(void){}
-        };
-    };
-    namespace booster{
-        /*!
-         * \brief this will is the most commonly used booster interface
-         *  we try to make booster invariant of data structures, but most cases, FMatrixS is what we wnat
-         */
-        typedef InterfaceBooster<FMatrixS> IBooster;
-    };
-};
-
-namespace xgboost{
-    namespace booster{
-        /*!
-         * \brief create a gradient booster, given type of booster
-         *    normally we use FMatrixS, by calling CreateBooster<FMatrixS>
-         * \param booster_type type of gradient booster, can be used to specify implements
-         * \tparam FMatrix input data type for booster
-         * \return the pointer to the gradient booster created
-         */
-        template<typename FMatrix>
-        inline InterfaceBooster<FMatrix> *CreateBooster(int booster_type);
-    };
-};
-
-// this file includes the template implementations of all boosters
-// the cost of using template is that the user can 'see' all the implementations, which is OK 
-// ignore implementations and focus on the interface:) 
-#include "xgboost-inl.hpp"
-#endif
diff --git a/booster/xgboost_data.h b/booster/xgboost_data.h
deleted file mode 100644
index 0f79833b5..000000000
--- a/booster/xgboost_data.h
+++ /dev/null
@@ -1,396 +0,0 @@
-#ifndef XGBOOST_DATA_H
-#define XGBOOST_DATA_H
-
-/*!
- * \file xgboost_data.h
- * \brief the input data structure for gradient boosting
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-
-#include <vector>
-#include <climits>
-#include "../utils/xgboost_utils.h"
-#include "../utils/xgboost_stream.h"
-#include "../utils/xgboost_matrix_csr.h"
-
-namespace xgboost{
-    namespace booster{
-        /*! \brief interger type used in boost */
-        typedef int bst_int;
-        /*! \brief unsigned interger type used in boost */
-        typedef unsigned bst_uint;
-        /*! \brief float type used in boost */
-        typedef float bst_float;
-        /*! \brief debug option for booster */
-        const bool bst_debug = false;
-    };
-};
-
-namespace xgboost{
-    namespace booster{
-        /**
-         * \brief This is a interface, defining the way to access features,
-         *        by column or by row. This interface is used to make implementation
-         *        of booster does not depend on how feature is stored.
-         *
-         *        Why template instead of virtual class: for efficiency
-         *          feature matrix is going to be used by most inner loop of the algorithm
-         *
-         * \tparam Derived type of actual implementation
-         * \sa FMatrixS: most of time FMatrixS is sufficient, refer to it if you find it confusing
-         */
-        template<typename Derived>
-        struct FMatrix{
-        public:
-            /*! \brief exmaple iterator over one row */
-            struct RowIter{
-                /*!
-                 * \brief move to next position
-                 * \return whether there is element in next position
-                 */
-
-                inline bool Next(void);
-                /*! \return feature index in current position */
-                inline bst_uint  findex(void) const;
-                /*! \return feature value in current position */
-                inline bst_float fvalue(void) const;
-            };
-            /*! \brief example iterator over one column */
-            struct ColIter{
-                /*!
-                 * \brief move to next position
-                 * \return whether there is element in next position
-                 */
-                inline bool Next(void);
-                /*! \return row index of current position  */
-                inline bst_uint  rindex(void) const;
-                /*! \return feature value in current position */
-                inline bst_float fvalue(void) const;
-            };
-            /*! \brief backward iterator over column */
-            struct ColBackIter : public ColIter {};
-        public:
-            /*!
-             * \brief get number of rows
-             * \return number of rows
-             */
-            inline size_t NumRow(void) const;
-            /*!
-             * \brief get number of columns
-             * \return number of columns
-             */
-            inline size_t NumCol(void) const;
-            /*!
-             * \brief get row iterator
-             * \param ridx row index
-             * \return row iterator
-             */
-            inline RowIter GetRow(size_t ridx) const;
-            /*!
-             * \brief get number of column groups, this ise used together with GetRow( ridx, gid )
-             * \return number of column group
-             */
-            inline unsigned NumColGroup(void) const{
-                return 1;
-            }
-            /*!
-             * \brief get row iterator, return iterator of specific column group
-             * \param ridx row index
-             * \param gid colmun group id
-             * \return row iterator, only iterates over features of specified column group
-             */
-            inline RowIter GetRow(size_t ridx, unsigned gid) const;
-
-            /*! \return whether column access is enabled */
-            inline bool HaveColAccess(void) const;
-            /*!
-             * \brief get column iterator, the columns must be sorted by feature value
-             * \param ridx column index
-             * \return column iterator
-             */
-            inline ColIter GetSortedCol(size_t ridx) const;
-            /*!
-             * \brief get column backward iterator, starts from biggest fvalue, and iterator back
-             * \param ridx column index
-             * \return reverse column iterator
-             */
-            inline ColBackIter GetReverseSortedCol(size_t ridx) const;
-        };
-    };
-};
-
-namespace xgboost{
-    namespace booster{
-        /*!
-         * \brief feature matrix to store training instance, in sparse CSR format
-         */
-        class FMatrixS : public FMatrix<FMatrixS>{
-        public:
-            /*! \brief one entry in a row */
-            struct REntry{
-                /*! \brief feature index */
-                bst_uint  findex;
-                /*! \brief feature value */
-                bst_float fvalue;
-                /*! \brief constructor */
-                REntry(void){}
-                /*! \brief constructor */
-                REntry(bst_uint findex, bst_float fvalue) : findex(findex), fvalue(fvalue){}
-                inline static bool cmp_fvalue(const REntry &a, const REntry &b){
-                    return a.fvalue < b.fvalue;
-                }
-            };
-            /*! \brief one row of sparse feature matrix */
-            struct Line{
-                /*! \brief array of feature index */
-                const REntry *data_;
-                /*! \brief size of the data */
-                bst_uint len;
-                /*! \brief get k-th element */
-                inline const REntry& operator[](unsigned i) const{
-                    return data_[i];
-                }
-            };
-            /*! \brief row iterator */
-            struct RowIter{
-                const REntry *dptr_, *end_;
-                RowIter(const REntry* dptr, const REntry* end)
-                    :dptr_(dptr), end_(end){}
-                inline bool Next(void){
-                    if (dptr_ == end_) return false;
-                    else{
-                        ++dptr_; return true;
-                    }
-                }
-                inline bst_uint  findex(void) const{
-                    return dptr_->findex;
-                }
-                inline bst_float fvalue(void) const{
-                    return dptr_->fvalue;
-                }
-            };
-            /*! \brief column iterator */
-            struct ColIter : public RowIter{
-                ColIter(const REntry* dptr, const REntry* end)
-                :RowIter(dptr, end){}
-                inline bst_uint  rindex(void) const{
-                    return this->findex();
-                }
-            };
-            /*! \brief reverse column iterator */
-            struct ColBackIter : public ColIter{
-                ColBackIter(const REntry* dptr, const REntry* end)
-                :ColIter(dptr, end){}
-                // shadows RowIter::Next
-                inline bool Next(void){
-                    if (dptr_ == end_) return false;
-                    else{
-                        --dptr_; return true;
-                    }
-                }
-            };
-        public:
-            /*! \brief constructor */
-            FMatrixS(void){ this->Clear(); }
-            /*!  \brief get number of rows */
-            inline size_t NumRow(void) const{
-                return row_ptr_.size() - 1;
-            }
-            /*!
-             * \brief get number of nonzero entries
-             * \return number of nonzero entries
-             */
-            inline size_t NumEntry(void) const{
-                return row_data_.size();
-            }
-            /*! \brief clear the storage */
-            inline void Clear(void){
-                row_ptr_.clear();
-                row_ptr_.push_back(0);
-                row_data_.clear();
-                col_ptr_.clear();
-                col_data_.clear();
-            }
-            /*! \brief get sparse part of current row */
-            inline Line operator[](size_t sidx) const{
-                Line sp;
-                utils::Assert(!bst_debug || sidx < this->NumRow(), "row id exceed bound");
-                sp.len = static_cast<bst_uint>(row_ptr_[sidx + 1] - row_ptr_[sidx]);
-                sp.data_ = &row_data_[row_ptr_[sidx]];
-                return sp;
-            }
-            /*!
-             * \brief add a row to the matrix, with data stored in STL container
-             * \param findex feature index
-             * \param fvalue feature value
-             *  \param fstart start bound of feature
-             *  \param fend   end bound range of feature
-             * \return the row id added line
-             */
-            inline size_t AddRow(const std::vector<bst_uint> &findex,
-                                 const std::vector<bst_float> &fvalue,
-                                 unsigned fstart = 0, unsigned fend = UINT_MAX){
-                utils::Assert(findex.size() == fvalue.size());
-                unsigned cnt = 0;
-                for (size_t i = 0; i < findex.size(); i++){
-                    if (findex[i] < fstart || findex[i] >= fend) continue;
-                    row_data_.push_back(REntry(findex[i], fvalue[i]));
-                    cnt++;
-                }
-                row_ptr_.push_back(row_ptr_.back() + cnt);
-                return row_ptr_.size() - 2;
-            }
-            /*!  \brief get row iterator*/
-            inline RowIter GetRow(size_t ridx) const{
-                utils::Assert(!bst_debug || ridx < this->NumRow(), "row id exceed bound");
-                return RowIter(&row_data_[row_ptr_[ridx]] - 1, &row_data_[row_ptr_[ridx + 1]] - 1);
-            }
-            /*!  \brief get row iterator*/
-            inline RowIter GetRow(size_t ridx, unsigned gid) const{
-                utils::Assert(gid == 0, "FMatrixS only have 1 column group");
-                return FMatrixS::GetRow(ridx);
-            }
-        public:
-            /*! \return whether column access is enabled */
-            inline bool HaveColAccess(void) const{
-                return col_ptr_.size() != 0 && col_data_.size() == row_data_.size();
-            }
-            /*!  \brief get number of colmuns */
-            inline size_t NumCol(void) const{
-                utils::Assert(this->HaveColAccess());
-                return col_ptr_.size() - 1;
-            }
-            /*!  \brief get col iterator*/
-            inline ColIter GetSortedCol(size_t cidx) const{
-                utils::Assert(!bst_debug || cidx < this->NumCol(), "col id exceed bound");
-                return ColIter(&col_data_[col_ptr_[cidx]] - 1, &col_data_[col_ptr_[cidx + 1]] - 1);
-            }
-            /*!  \brief get col iterator */
-            inline ColBackIter GetReverseSortedCol(size_t cidx) const{
-                utils::Assert(!bst_debug || cidx < this->NumCol(), "col id exceed bound");
-                return ColBackIter(&col_data_[col_ptr_[cidx + 1]], &col_data_[col_ptr_[cidx]]);
-            }
-            /*!
-             * \brief intialize the data so that we have both column and row major
-             *        access, call this whenever we need column access
-             */
-            inline void InitData(void){
-                utils::SparseCSRMBuilder<REntry> builder(col_ptr_, col_data_);
-                builder.InitBudget(0);
-                for (size_t i = 0; i < this->NumRow(); i++){
-                    for (RowIter it = this->GetRow(i); it.Next();){
-                        builder.AddBudget(it.findex());
-                    }
-                }
-                builder.InitStorage();
-                for (size_t i = 0; i < this->NumRow(); i++){
-                    for (RowIter it = this->GetRow(i); it.Next();){
-                        builder.PushElem(it.findex(), REntry((bst_uint)i, it.fvalue()));
-                    }
-                }
-                // sort columns
-                unsigned ncol = static_cast<unsigned>(this->NumCol());
-                #pragma omp parallel for schedule(static)
-                for (unsigned i = 0; i < ncol; i++){
-                    std::sort(&col_data_[col_ptr_[i]], &col_data_[col_ptr_[i + 1]], REntry::cmp_fvalue);
-                }
-            }
-            /*!
-             * \brief save data to binary stream
-             *        note: since we have size_t in ptr,
-             *              the function is not consistent between 64bit and 32bit machine
-             * \param fo output stream
-             */
-            inline void SaveBinary(utils::IStream &fo) const{
-                FMatrixS::SaveBinary(fo, row_ptr_, row_data_);
-                int col_access = this->HaveColAccess() ? 1 : 0;
-                fo.Write(&col_access, sizeof(int));
-                if (col_access != 0){
-                    FMatrixS::SaveBinary(fo, col_ptr_, col_data_);
-                }
-            }
-            /*!
-             * \brief load data from binary stream
-             *        note: since we have size_t in ptr,
-             *              the function is not consistent between 64bit and 32bit machin
-             * \param fi input stream
-             */
-            inline void LoadBinary(utils::IStream &fi){
-                FMatrixS::LoadBinary(fi, row_ptr_, row_data_);
-                int col_access;
-                fi.Read(&col_access, sizeof(int));
-                if (col_access != 0){
-                    FMatrixS::LoadBinary(fi, col_ptr_, col_data_);
-                }else{
-                    this->InitData();                    
-                }
-            }
-            /*!
-            * \brief load from text file
-            * \param fi input file pointer
-            */
-            inline void LoadText(FILE *fi){
-                this->Clear();
-                int ninst;
-                while (fscanf(fi, "%d", &ninst) == 1){
-                    std::vector<booster::bst_uint>  findex;
-                    std::vector<booster::bst_float> fvalue;
-                    while (ninst--){
-                        unsigned index; float value;
-                        utils::Assert(fscanf(fi, "%u:%f", &index, &value) == 2, "load Text");
-                        findex.push_back(index); fvalue.push_back(value);
-                    }
-                    this->AddRow(findex, fvalue);
-                }
-                // initialize column support as well
-                this->InitData();
-            }
-        private:
-            /*!
-             * \brief save data to binary stream
-             * \param fo output stream
-             * \param ptr pointer data
-             * \param data data content
-             */
-            inline static void SaveBinary(utils::IStream &fo,
-                const std::vector<size_t> &ptr,
-                const std::vector<REntry> &data){
-                size_t nrow = ptr.size() - 1;
-                fo.Write(&nrow, sizeof(size_t));
-                fo.Write(&ptr[0], ptr.size() * sizeof(size_t));
-                if (data.size() != 0){
-                    fo.Write(&data[0], data.size() * sizeof(REntry));
-                }
-            }
-            /*!
-             * \brief load data from binary stream
-             * \param fi input stream
-             * \param ptr pointer data
-             * \param data data content
-             */
-            inline static void LoadBinary(utils::IStream &fi,
-                std::vector<size_t> &ptr,
-                std::vector<REntry> &data){
-                size_t nrow;
-                utils::Assert(fi.Read(&nrow, sizeof(size_t)) != 0, "Load FMatrixS");
-                ptr.resize(nrow + 1);
-                utils::Assert(fi.Read(&ptr[0], ptr.size() * sizeof(size_t)) != 0, "Load FMatrixS");
-
-                data.resize(ptr.back());
-                if (data.size() != 0){
-                    utils::Assert(fi.Read(&data[0], data.size() * sizeof(REntry)) != 0, "Load FMatrixS");
-                }
-            }
-        public:
-            /*! \brief row pointer of CSR sparse storage */
-            std::vector<size_t>  row_ptr_;
-            /*! \brief data in the row */
-            std::vector<REntry>  row_data_;
-            /*! \brief column pointer of CSC format */
-            std::vector<size_t>  col_ptr_;
-            /*! \brief column datas */
-            std::vector<REntry>  col_data_;
-        };
-    };
-};
-#endif
diff --git a/booster/xgboost_gbmbase.h b/booster/xgboost_gbmbase.h
deleted file mode 100644
index c96e22af3..000000000
--- a/booster/xgboost_gbmbase.h
+++ /dev/null
@@ -1,429 +0,0 @@
-#ifndef XGBOOST_GBMBASE_H
-#define XGBOOST_GBMBASE_H
-
-#include <cstring>
-#include "xgboost.h"
-#include "xgboost_data.h"
-#include "../utils/xgboost_omp.h"
-#include "../utils/xgboost_config.h"
-/*!
- * \file xgboost_gbmbase.h
- * \brief a base model class,
- *        that assembles the ensembles of booster together and do model update
- *        this class can be used as base code to create booster variants
- *
- *        The detailed implementation of boosters should start by using the class
- *        provided by this file
- *
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-namespace xgboost{
-    namespace booster{
-        /*!
-         * \brief a base model class,
-         *        that assembles the ensembles of booster together and provide single routines to do prediction buffer and update
-         *        this class can be used as base code to create booster variants
-         *         *
-         *  relation to xgboost.h:
-         *    (1) xgboost.h provides a interface to a single booster(e.g. a single regression tree )
-         *        while GBMBaseModel builds upon IBooster to build a class that
-         *        ensembls the boosters together;
-         *    (2) GBMBaseModel provides prediction buffering scheme to speedup training;
-         *    (3) Summary: GBMBaseModel is a standard wrapper for boosting ensembles;
-         *
-         *  Usage of this class, the number index gives calling dependencies:
-         *    (1) model.SetParam to set the parameters
-         *    (2) model.LoadModel to load old models or model.InitModel to create a new model
-         *    (3) model.InitTrainer before calling model.Predict and model.DoBoost
-         *    (4) model.Predict to get predictions given a instance
-         *    (4) model.DoBoost to update the ensembles, add new booster to the model
-         *    (4) model.SaveModel to save learned results
-         *
-         *  Bufferring: each instance comes with a buffer_index in Predict.
-         *              when mparam.num_pbuffer != 0, a unique buffer index can be
-         *              assigned to each instance to buffer previous results of boosters,
-         *              this helps to speedup training, so consider assign buffer_index
-         *              for each training instances, if buffer_index = -1, the code
-         *              recalculate things from scratch and will still works correctly
-         */
-        class GBMBase{
-        public:
-            /*! \brief number of thread used */
-            GBMBase(void){}
-            /*! \brief destructor */
-            virtual ~GBMBase(void){
-                this->FreeSpace();
-            }
-            /*!
-             * \brief set parameters from outside
-             * \param name name of the parameter
-             * \param val  value of the parameter
-             */
-            inline void SetParam(const char *name, const char *val){
-                if (!strncmp(name, "bst:", 4)){
-                    cfg.PushBack(name + 4, val);
-                }
-                if (!strcmp(name, "silent")){
-                    cfg.PushBack(name, val);
-                }
-                tparam.SetParam(name, val);
-                if (boosters.size() == 0) mparam.SetParam(name, val);
-            }
-            /*!
-             * \brief load model from stream
-             * \param fi input stream
-             */
-            inline void LoadModel(utils::IStream &fi){
-                if (boosters.size() != 0) this->FreeSpace();
-                utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
-                boosters.resize(mparam.num_boosters);
-                for (size_t i = 0; i < boosters.size(); i++){
-                    boosters[i] = booster::CreateBooster<FMatrixS>(mparam.booster_type);
-                    boosters[i]->LoadModel(fi);
-                }
-                {// load info 
-                    booster_info.resize(mparam.num_boosters);
-                    if (mparam.num_boosters != 0){
-                        utils::Assert(fi.Read(&booster_info[0], sizeof(int)*mparam.num_boosters) != 0);
-                    }
-                }
-                if (mparam.num_pbuffer != 0){
-                    pred_buffer.resize(mparam.PredBufferSize());
-                    pred_counter.resize(mparam.PredBufferSize());
-                    utils::Assert(fi.Read(&pred_buffer[0], pred_buffer.size()*sizeof(float)) != 0);
-                    utils::Assert(fi.Read(&pred_counter[0], pred_counter.size()*sizeof(unsigned)) != 0);
-                }
-            }
-            /*!
-             * \brief save model to stream
-             * \param fo output stream
-             */
-            inline void SaveModel(utils::IStream &fo) const {
-                utils::Assert(mparam.num_boosters == (int)boosters.size());
-                fo.Write(&mparam, sizeof(ModelParam));
-                for (size_t i = 0; i < boosters.size(); i++){
-                    boosters[i]->SaveModel(fo);
-                }
-                if (booster_info.size() != 0){
-                    fo.Write(&booster_info[0], sizeof(int)* booster_info.size());
-                }
-                if (mparam.num_pbuffer != 0){
-                    fo.Write(&pred_buffer[0], pred_buffer.size()*sizeof(float));
-                    fo.Write(&pred_counter[0], pred_counter.size()*sizeof(unsigned));
-                }
-            }
-            /*!
-             * \brief initialize the current data storage for model, if the model is used first time, call this function
-             */
-            inline void InitModel(void){
-                pred_buffer.clear(); pred_counter.clear();
-                pred_buffer.resize(mparam.PredBufferSize(), 0.0);
-                pred_counter.resize(mparam.PredBufferSize(), 0);
-                utils::Assert(mparam.num_boosters == 0);
-                utils::Assert(boosters.size() == 0);
-            }
-            /*!
-             * \brief initialize solver before training, called before training
-             * this function is reserved for solver to allocate necessary space and do other preparation
-             */
-            inline void InitTrainer(void){
-                if (tparam.nthread != 0){
-                    omp_set_num_threads(tparam.nthread);
-                }
-                if (mparam.num_booster_group == 0) mparam.num_booster_group = 1;
-                // make sure all the boosters get the latest parameters
-                for (size_t i = 0; i < this->boosters.size(); i++){
-                    this->ConfigBooster(this->boosters[i]);
-                }
-            }
-            /*!
-             * \brief DumpModel
-             * \param fo text file
-             * \param fmap feature map that may help give interpretations of feature
-             * \param with_stats whether print statistics
-             */
-            inline void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats){
-                for (size_t i = 0; i < boosters.size(); i++){
-                    fprintf(fo, "booster[%d]\n", (int)i);
-                    boosters[i]->DumpModel(fo, fmap, with_stats);
-                }
-            }
-            /*!
-             * \brief Dump path of all trees
-             * \param fo text file
-             * \param data input data
-             */
-            inline void DumpPath(FILE *fo, const FMatrixS &data){
-                for (size_t i = 0; i < data.NumRow(); ++i){
-                    for (size_t j = 0; j < boosters.size(); ++j){
-                        if (j != 0) fprintf(fo, "\t");
-                        std::vector<int> path;
-                        boosters[j]->PredPath(path, data, i);
-                        fprintf(fo, "%d", path[0]);
-                        for (size_t k = 1; k < path.size(); ++k){
-                            fprintf(fo, ",%d", path[k]);
-                        }
-                    }
-                    fprintf(fo, "\n");
-                }
-            }
-        public:
-            /*!
-             * \brief do gradient boost training for one step, using the information given
-             *        Note: content of grad and hess can change after DoBoost
-             * \param grad first order gradient of each instance
-             * \param hess second order gradient of each instance
-             * \param feats features of each instance
-             * \param root_index pre-partitioned root index of each instance,
-             *          root_index.size() can be 0 which indicates that no pre-partition involved
-             * \param bst_group which booster group it belongs to, by default, we only have 1 booster group, and leave this parameter as default
-             */
-            inline void DoBoost(std::vector<float> &grad,
-                                std::vector<float> &hess,
-                                const booster::FMatrixS &feats,
-                                const std::vector<unsigned> &root_index,
-                                int bst_group = 0 ) {
-                booster::IBooster *bst = this->GetUpdateBooster( bst_group );
-                bst->DoBoost(grad, hess, feats, root_index);
-            }
-            /*!
-             * \brief predict values for given sparse feature vector
-             *   NOTE: in tree implementation, this is only OpenMP threadsafe, but not threadsafe
-             * \param feats feature matrix
-             * \param row_index  row index in the feature matrix
-             * \param buffer_index the buffer index of the current feature line, default -1 means no buffer assigned
-             * \param root_index root id of current instance, default = 0
-             * \param bst_group booster group index 
-             * \return prediction
-             */
-            inline float Predict(const FMatrixS &feats, bst_uint row_index, 
-                                 int buffer_index = -1, unsigned root_index = 0, int bst_group = 0 ){
-                size_t itop = 0;
-                float  psum = 0.0f;
-                const int bid = mparam.BufferOffset(buffer_index, bst_group);
-
-                // load buffered results if any
-                if (mparam.do_reboost == 0 && bid >= 0){
-                    itop = this->pred_counter[bid];
-                    psum = this->pred_buffer[bid];
-                }
-
-                for (size_t i = itop; i < this->boosters.size(); ++i ){
-                    if( booster_info[i] == bst_group ){
-                        psum += this->boosters[i]->Predict(feats, row_index, root_index);
-                    }
-                }
-                // updated the buffered results
-                if (mparam.do_reboost == 0 && bid >= 0){
-                    this->pred_counter[bid] = static_cast<unsigned>(boosters.size());
-                    this->pred_buffer[bid] = psum;
-                }
-                return psum;
-            }
-            /*! \return number of boosters so far */
-            inline int NumBoosters(void) const{
-                return mparam.num_boosters;
-            }
-            /*! \return number of booster groups */
-            inline int NumBoosterGroup(void) const{
-                if( mparam.num_booster_group == 0 ) return 1;
-                return mparam.num_booster_group;
-            }
-        public:
-            //--------trial code for interactive update an existing booster------
-            //-------- usually not needed, ignore this region ---------
-            /*!
-             * \brief same as Predict, but removes the prediction of booster to be updated
-             *        this function must be called once and only once for every data with pbuffer
-             */
-            inline float InteractPredict(const FMatrixS &feats, bst_uint row_index, 
-                                         int buffer_index = -1, unsigned root_index = 0, int bst_group = 0){
-                float psum = this->Predict(feats, row_index, buffer_index, root_index);
-                if (tparam.reupdate_booster != -1){
-                    const int bid = tparam.reupdate_booster;
-                    utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
-                    if( bst_group == booster_info[bid] ){
-                        psum -= boosters[bid]->Predict(feats, row_index, root_index);
-                    }
-                    if (mparam.do_reboost == 0 && buffer_index >= 0){
-                        this->pred_buffer[mparam.BufferOffset(buffer_index,bst_group)] = psum;
-                    }
-                }
-                return psum;
-            }
-            /*! \brief delete the specified booster */
-            inline void DelteBooster(void){
-                const int bid = tparam.reupdate_booster;
-                utils::Assert(bid >= 0 && bid < mparam.num_boosters, "must specify booster index for deletion");
-                delete boosters[bid];
-                for (int i = bid + 1; i < mparam.num_boosters; ++i){
-                    boosters[i - 1] = boosters[i];
-                    booster_info[i - 1] = booster_info[i];
-                }
-                boosters.resize(mparam.num_boosters -= 1);
-                booster_info.resize(boosters.size());                
-                // update pred counter
-                for( size_t i = 0; i < pred_counter.size(); ++ i ){
-                    if( pred_counter[i] > (unsigned)bid ) pred_counter[i] -= 1;                    
-                }
-            }
-            /*! \brief update the prediction buffer, after booster have been updated */
-            inline void InteractRePredict(const FMatrixS &feats, bst_uint row_index, 
-                                          int buffer_index = -1, unsigned root_index = 0, int bst_group = 0 ){
-                if (tparam.reupdate_booster != -1){
-                    const int bid = tparam.reupdate_booster;
-                    if( booster_info[bid]  != bst_group ) return;
-                    utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
-                    if (mparam.do_reboost == 0 && buffer_index >= 0){
-                        this->pred_buffer[mparam.BufferOffset(buffer_index,bst_group)] += boosters[bid]->Predict(feats, row_index, root_index);
-                    }
-                }
-            }
-            //-----------non public fields afterwards-------------
-        protected:
-            /*! \brief free space of the model */
-            inline void FreeSpace(void){
-                for (size_t i = 0; i < boosters.size(); i++){
-                    delete boosters[i];
-                }
-                boosters.clear(); booster_info.clear(); mparam.num_boosters = 0;
-            }
-            /*! \brief configure a booster */
-            inline void ConfigBooster(booster::IBooster *bst){
-                cfg.BeforeFirst();
-                while (cfg.Next()){
-                    bst->SetParam(cfg.name(), cfg.val());
-                }
-            }
-            /*!
-             * \brief get a booster to update
-             * \return the booster created
-             */
-            inline booster::IBooster *GetUpdateBooster(int bst_group){
-                if (tparam.reupdate_booster != -1){
-                    const int bid = tparam.reupdate_booster;
-                    utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
-                    this->ConfigBooster(boosters[bid]);
-                    utils::Assert( bst_group == booster_info[bid], "booster group must match existing reupdate booster");
-                    return boosters[bid];
-                }
-
-                if (mparam.do_reboost == 0 || boosters.size() == 0){
-                    mparam.num_boosters += 1;
-                    boosters.push_back(booster::CreateBooster<FMatrixS>(mparam.booster_type));
-                    booster_info.push_back(bst_group);
-                    this->ConfigBooster(boosters.back());
-                    boosters.back()->InitModel();
-                }
-                else{
-                    this->ConfigBooster(boosters.back());
-                }
-                return boosters.back();
-            }
-        protected:
-            /*! \brief model parameters */
-            struct ModelParam{
-                /*! \brief number of boosters */
-                int num_boosters;
-                /*! \brief type of tree used */
-                int booster_type;
-                /*! \brief number of root: default 0, means single tree */
-                int num_roots;
-                /*! \brief number of features to be used by boosters */
-                int num_feature;
-                /*! \brief size of predicton buffer allocated for buffering boosting computation */
-                int num_pbuffer;
-                /*!
-                 * \brief whether we repeatly update a single booster each round: default 0
-                 *        set to 1 for linear booster, so that regularization term can be considered
-                 */
-                int do_reboost;
-                /*! 
-                 * \brief number of booster group, how many predictions a single 
-                 *        input instance could corresponds to
-                 */
-                int num_booster_group;
-                /*! \brief reserved parameters */
-                int reserved[31];
-                /*! \brief constructor */
-                ModelParam(void){
-                    num_boosters = 0;
-                    booster_type = 0;
-                    num_roots = num_feature = 0;
-                    do_reboost = 0;
-                    num_pbuffer = 0;
-                    num_booster_group = 1;
-                    memset(reserved, 0, sizeof(reserved));
-                }
-                /*!
-                 * \brief set parameters from outside
-                 * \param name name of the parameter
-                 * \param val  value of the parameter
-                 */
-                inline void SetParam(const char *name, const char *val){
-                    if (!strcmp("booster_type", name)){
-                        booster_type = atoi(val);
-                        // linear boost automatically set do reboost
-                        if (booster_type == 1) do_reboost = 1;
-                    }
-                    if (!strcmp("num_pbuffer", name))       num_pbuffer = atoi(val);
-                    if (!strcmp("do_reboost", name))        do_reboost = atoi(val);
-                    if (!strcmp("num_booster_group", name)) num_booster_group = atoi(val);
-                    if (!strcmp("bst:num_roots", name))     num_roots = atoi(val);
-                    if (!strcmp("bst:num_feature", name))   num_feature = atoi(val);
-                }
-                inline int PredBufferSize(void) const{
-                    if (num_booster_group == 0) return num_pbuffer;
-                    else return num_booster_group * num_pbuffer;
-                }
-                inline int BufferOffset( int buffer_index, int bst_group ) const{
-                    if( buffer_index < 0 ) return -1;
-                    utils::Assert( buffer_index < num_pbuffer, "buffer_indexexceed num_pbuffer" ); 
-                    return buffer_index + num_pbuffer * bst_group;
-                    
-                }
-            };
-            /*! \brief training parameters */
-            struct TrainParam{
-                /*! \brief number of OpenMP threads */
-                int nthread;
-                /*!
-                 * \brief index of specific booster to be re-updated, default = -1: update new booster
-                 *  parameter this is part of trial interactive update mode
-                 */
-                int reupdate_booster;
-                /*! \brief constructor */
-                TrainParam(void) {
-                    nthread = 1;
-                    reupdate_booster = -1;
-                }
-                /*!
-                 * \brief set parameters from outside
-                 * \param name name of the parameter
-                 * \param val  value of the parameter
-                 */
-                inline void SetParam(const char *name, const char *val){
-                    if (!strcmp("nthread", name))                 nthread = atoi(val);
-                    if (!strcmp("interact:booster_index", name))  reupdate_booster = atoi(val);
-                }
-            };
-        protected:
-            /*! \brief model parameters */
-            ModelParam mparam;
-            /*! \brief training parameters */
-            TrainParam tparam;
-        protected:
-            /*! \brief component boosters */
-            std::vector<booster::IBooster*> boosters;
-            /*! \brief some information indicator of the booster, reserved */
-            std::vector<int> booster_info;
-            /*! \brief prediction buffer */
-            std::vector<float>    pred_buffer;
-            /*! \brief prediction buffer counter, record the progress so fart of the buffer */
-            std::vector<unsigned> pred_counter;
-            /*! \brief configurations saved for each booster */
-            utils::ConfigSaver cfg;
-        };
-    };
-};
-#endif
diff --git a/data.h b/data.h
new file mode 100644
index 000000000..468482446
--- /dev/null
+++ b/data.h
@@ -0,0 +1,293 @@
+#ifndef XGBOOST_UNITY_DATA_H
+#define XGBOOST_UNITY_DATA_H
+/*!
+ * \file data.h
+ * \brief the input data structure for gradient boosting
+ * \author Tianqi Chen
+ */
+#include <cstdio>
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include "utils/io.h"
+#include "utils/utils.h"
+#include "utils/iterator.h"
+#include "utils/matrix_csr.h"
+
+namespace xgboost {
+/*! 
+ * \brief unsigned interger type used in boost, 
+ *        used for feature index and row index 
+ */
+typedef unsigned bst_uint;
+/*! \brief float type, used for storing statistics */
+typedef float bst_float;
+const float rt_eps = 1e-5f;
+// min gap between feature values to allow a split happen
+const float rt_2eps = rt_eps * 2.0f;
+
+/*! \brief gradient statistics pair usually needed in gradient boosting */
+struct bst_gpair{
+  /*! \brief gradient statistics */
+  bst_float grad;
+  /*! \brief second order gradient statistics */
+  bst_float hess;
+  bst_gpair(void) {}
+  bst_gpair(bst_float grad, bst_float hess) : grad(grad), hess(hess) {}
+};
+
+/*! \brief read-only sparse instance batch in CSR format */
+struct SparseBatch {
+  /*! \brief an entry of sparse vector */
+  struct Entry {
+    /*! \brief feature index */
+    bst_uint findex;
+    /*! \brief feature value */
+    bst_float fvalue;
+    // default constructor
+    Entry(void) {}
+    Entry(bst_uint findex, bst_float fvalue) : findex(findex), fvalue(fvalue) {}
+    /*! \brief reversely compare feature values */
+    inline static bool CmpValue(const Entry &a, const Entry &b) {
+      return a.fvalue < b.fvalue;
+    }
+  };
+  /*! \brief an instance of sparse vector in the batch */
+  struct Inst {
+    /*! \brief pointer to the elements*/
+    const Entry *data;
+    /*! \brief length of the instance */
+    const bst_uint length;
+    /*! \brief constructor */
+    Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
+    /*! \brief get i-th pair in the sparse vector*/
+    inline const Entry& operator[](size_t i) const {
+      return data[i];
+    }
+  };
+  /*! \brief batch size */
+  size_t size;
+  /*! \brief the offset of rowid of this batch */
+  size_t base_rowid;
+  /*! \brief array[size+1], row pointer of each of the elements */
+  const size_t *row_ptr;
+  /*! \brief array[row_ptr.back()], content of the sparse element */
+  const Entry *data_ptr;
+  /*! \brief get i-th row from the batch */
+  inline Inst operator[](size_t i) const {
+    return Inst(data_ptr + row_ptr[i], row_ptr[i+1] - row_ptr[i]);
+  }
+};
+
+/**
+ * \brief This is a interface convention via template, defining the way to access features,
+ *        column access rule is defined by template, for efficiency purpose, 
+ *        row access is defined by iterator of sparse batches
+ * \tparam Derived type of actual implementation
+ */
+template<typename Derived>
+class FMatrixInterface {
+ public:
+  /*! \brief example iterator over one column */
+  struct ColIter{
+    /*!
+     * \brief move to next position
+     * \return whether there is element in next position
+     */
+    inline bool Next(void);
+    /*! \return row index of current position  */
+    inline bst_uint rindex(void) const;
+    /*! \return feature value in current position */
+    inline bst_float fvalue(void) const;
+  };
+  /*! \brief backward iterator over column */
+  struct ColBackIter : public ColIter {};
+ public:
+  // column access is needed by some of tree construction algorithms
+  /*!
+   * \brief get column iterator, the columns must be sorted by feature value
+   * \param cidx column index
+   * \return column iterator
+   */
+  inline ColIter GetSortedCol(size_t cidx) const;
+  /*!
+   * \brief get column backward iterator, starts from biggest fvalue, and iterator back
+   * \param cidx column index
+   * \return reverse column iterator
+   */
+  inline ColBackIter GetReverseSortedCol(size_t cidx) const;
+  /*!
+   * \brief get number of columns
+   * \return number of columns
+   */
+  inline size_t NumCol(void) const;
+  /*! 
+   * \brief check if column access is supported, if not, initialize column access 
+   * \param max_rows maximum number of rows allowed in constructor 
+   */
+  inline void InitColAccess(void);
+  /*! \return whether column access is enabled */
+  inline bool HaveColAccess(void) const;
+  /*! \breif return #entries-in-col */
+  inline size_t GetColSize(size_t cidx) const;
+  /*!
+   * \breif return #entries-in-col / #rows
+   * \param cidx column index 
+   *   this function is used to help speedup, 
+   *   doese not necessarily implement it if not sure, return 0.0;
+   * \return column density
+   */
+  inline float GetColDensity(size_t cidx) const;
+  /*! \brief get the row iterator associated with FMatrix */
+  virtual utils::IIterator<SparseBatch>* RowIterator(void) const = 0;
+};
+
+/*!
+ * \brief sparse matrix that support column access, CSC
+ */
+class FMatrixS : public FMatrixInterface<FMatrixS>{
+ public:
+  typedef SparseBatch::Entry Entry;
+  /*! \brief row iterator */
+  struct ColIter{
+    const Entry *dptr_, *end_;
+    ColIter(const Entry* begin, const Entry* end)
+        :dptr_(begin), end_(end) {}
+    inline bool Next(void) {
+      if (dptr_ == end_) {
+        return false;
+      } else {
+        ++dptr_; return true;
+      }
+    }
+    inline bst_uint rindex(void) const {
+      return dptr_->findex;
+    }
+    inline bst_float fvalue(void) const {
+      return dptr_->fvalue;
+    }
+  };
+  /*! \brief reverse column iterator */
+  struct ColBackIter : public ColIter {
+    ColBackIter(const Entry* dptr, const Entry* end) : ColIter(dptr, end) {}
+    // shadows ColIter::Next
+    inline bool Next(void) {
+      if (dptr_ == end_) {
+        return false;
+      } else {
+        --dptr_; return true;
+      }
+    }
+  };
+  /*! \brief constructor */
+  explicit FMatrixS(utils::IIterator<SparseBatch> *base_iter)
+      : iter_(base_iter) {}
+  // destructor
+  virtual ~FMatrixS(void) {
+    delete iter_;
+  }
+  /*! \return whether column access is enabled */
+  inline bool HaveColAccess(void) const {
+    return col_ptr_.size() != 0;
+  }
+  /*! \brief get number of colmuns */
+  inline size_t NumCol(void) const {
+    utils::Check(this->HaveColAccess(), "NumCol:need column access");
+    return col_ptr_.size() - 1;
+  }
+  /*! \brief get col sorted iterator */
+  inline ColIter GetSortedCol(size_t cidx) const {
+    utils::Assert(cidx < this->NumCol(), "col id exceed bound");
+    return ColIter(&col_data_[col_ptr_[cidx]] - 1,
+                   &col_data_[col_ptr_[cidx + 1]] - 1);
+  }
+  /*! 
+   * \brief get reversed col iterator, 
+   *   this function will be deprecated at some point 
+   */
+  inline ColBackIter GetReverseSortedCol(size_t cidx) const {
+    utils::Assert(cidx < this->NumCol(), "col id exceed bound");
+    return ColBackIter(&col_data_[col_ptr_[cidx + 1]],
+                       &col_data_[col_ptr_[cidx]]);
+  }
+  /*! \brief get col size */
+  inline size_t GetColSize(size_t cidx) const {
+    return col_ptr_[cidx+1] - col_ptr_[cidx];
+  }
+  /*! \brief get column density */
+  inline float GetColDensity(size_t cidx) const {
+    size_t nmiss = num_buffered_row_ - (col_ptr_[cidx+1] - col_ptr_[cidx]);
+    return 1.0f - (static_cast<float>(nmiss)) / num_buffered_row_;
+  }
+  virtual void InitColAccess(void) {
+    if (this->HaveColAccess()) return;
+    const size_t max_nrow = std::numeric_limits<bst_uint>::max();
+    this->InitColData(max_nrow);
+  }
+  /*! \brief get the row iterator associated with FMatrix */
+  virtual utils::IIterator<SparseBatch>* RowIterator(void) const {
+    return iter_;
+  }
+
+ protected:
+  /*!
+   * \brief intialize column data 
+   * \param max_nrow maximum number of rows supported 
+   */
+  inline void InitColData(size_t max_nrow) {
+    // note: this part of code is serial, todo, parallelize this transformer
+    utils::SparseCSRMBuilder<SparseBatch::Entry> builder(col_ptr_, col_data_);
+    builder.InitBudget(0);
+    // start working
+    iter_->BeforeFirst();
+    num_buffered_row_ = 0;
+    while (iter_->Next()) {
+      const SparseBatch &batch = iter_->Value();
+      if (batch.base_rowid >= max_nrow) break;
+      const size_t nbatch = std::min(batch.size, max_nrow - batch.base_rowid);
+      for (size_t i = 0; i < nbatch; ++i, ++num_buffered_row_) {
+        SparseBatch::Inst inst = batch[i];
+        for (bst_uint j = 0; j < batch.size; ++j) {
+          builder.AddBudget(inst[j].findex);
+        }
+      }
+    }
+
+    builder.InitStorage();
+
+    iter_->BeforeFirst();
+    while (iter_->Next()) {
+      const SparseBatch &batch = iter_->Value();
+      if (batch.base_rowid >= max_nrow) break;
+      const size_t nbatch = std::min(batch.size, max_nrow - batch.base_rowid);
+      for (size_t i = 0; i < nbatch; ++i) {
+        SparseBatch::Inst inst = batch[i];
+        for (bst_uint j = 0; j < batch.size; ++j) {
+          builder.PushElem(inst[j].findex,
+                           Entry((bst_uint)(batch.base_rowid+j),
+                                 inst[j].fvalue));
+        }
+      }
+    }
+
+    // sort columns
+    unsigned ncol = static_cast<unsigned>(this->NumCol());
+    #pragma omp parallel for schedule(static)
+    for (unsigned i = 0; i < ncol; ++i) {
+      std::sort(&col_data_[col_ptr_[i]],
+                &col_data_[col_ptr_[i + 1]], Entry::CmpValue);
+    }
+  }
+
+ private:
+  // --- data structure used to support InitColAccess --
+  utils::IIterator<SparseBatch> *iter_;
+  /*! \brief number */
+  size_t num_buffered_row_;
+  /*! \brief column pointer of CSC format */
+  std::vector<size_t>  col_ptr_;
+  /*! \brief column datas in CSC format */
+  std::vector<SparseBatch::Entry>  col_data_;
+};
+}  // namespace xgboost
+#endif
diff --git a/gbm/gbm.h b/gbm/gbm.h
new file mode 100644
index 000000000..640bcbafc
--- /dev/null
+++ b/gbm/gbm.h
@@ -0,0 +1,82 @@
+#ifndef XGBOOST_GBM_GBM_H_
+#define XGBOOST_GBM_GBM_H_
+/*!
+ * \file gbm.h
+ * \brief interface of gradient booster, that learns through gradient statistics
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include "../data.h"
+
+namespace xgboost {
+/*! \brief namespace for gradient booster */
+namespace gbm {
+/*! 
+ * \brief interface of gradient boosting model
+ * \tparam FMatrix the data type updater taking
+ */
+template<typename FMatrix>
+class IGradBooster {
+ public:
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val  value of the parameter
+   */
+  virtual void SetParam(const char *name, const char *val) = 0;
+  /*!
+   * \brief load model from stream
+   * \param fi input stream
+   */
+  virtual void LoadModel(utils::IStream &fi) = 0;
+  /*!
+   * \brief save model to stream
+   * \param fo output stream
+   */
+  virtual void SaveModel(utils::IStream &fo) const = 0;
+  /*!
+   * \brief initialize the model
+   */
+  virtual void InitModel(void) = 0;
+  /*!
+   * \brief peform update to the model(boosting)
+   * \param gpair the gradient pair statistics of the data
+   * \param fmat feature matrix that provide access to features
+   * \param root_index pre-partitioned root_index of each instance,
+   *   root_index.size() can be 0 which indicates that no pre-partition involved
+   */
+  virtual void DoBoost(const std::vector<bst_gpair> &gpair,
+                       FMatrix &fmat,
+                       const std::vector<unsigned> &root_index) = 0;
+  /*!
+   * \brief generate predictions for given feature matrix
+   * \param fmat feature matrix
+   * \param buffer_offset buffer index offset of these instances, if equals -1
+   *        this means we do not have buffer index allocated to the gbm
+   *  a buffer index is assigned to each instance that requires repeative prediction
+   *  the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
+   * \param root_index pre-partitioned root_index of each instance,
+   *   root_index.size() can be 0 which indicates that no pre-partition involved
+   * \param out_preds output vector to hold the predictions
+   */
+  virtual void Predict(const FMatrix &fmat,
+                       int64_t buffer_offset,
+                       const std::vector<unsigned> &root_index,
+                       std::vector<float> *out_preds) = 0;
+  // destrcutor
+  virtual ~IGradBooster(void){}
+};
+}  // namespace gbm
+}  // namespace xgboost
+#include "gbtree-inl.hpp"
+namespace xgboost {
+namespace gbm {
+template<typename FMatrix>
+inline IGradBooster<FMatrix>* CreateGradBooster(const char *name) {
+  if (!strcmp("gbtree", name)) return new GBTree<FMatrix>();
+  utils::Error("unknown booster type: %s", name);
+  return NULL;
+}
+}  // namespace gbm
+}  // namespace xgboost
+#endif  // XGBOOST_GBM_GBM_H_
diff --git a/gbm/gbtree-inl.hpp b/gbm/gbtree-inl.hpp
new file mode 100644
index 000000000..d610ce5ad
--- /dev/null
+++ b/gbm/gbtree-inl.hpp
@@ -0,0 +1,365 @@
+#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
+#define XGBOOST_GBM_GBTREE_INL_HPP_
+/*!
+ * \file gbtree-inl.hpp
+ * \brief gradient boosted tree implementation
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <utility>
+#include <string>
+#include "./gbm.h"
+#include "../tree/updater.h"
+
+namespace xgboost {
+namespace gbm {
+/*!
+ * \brief gradient boosted tree
+ * \tparam FMatrix the data type updater taking
+ */
+template<typename FMatrix>
+class GBTree : public IGradBooster<FMatrix> {
+ public:
+  virtual ~GBTree(void) {
+    this->Clear();
+  }
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strncmp(name, "bst:", 4)) {
+      cfg.push_back(std::make_pair(std::string(name+4), std::string(val)));
+      // set into updaters, if already intialized
+      for (size_t i = 0; i < updaters.size(); ++i) {
+        updaters[i]->SetParam(name+4, val);
+      }
+    }
+    if (!strcmp(name, "silent")) {
+      this->SetParam("bst:silent", val);
+    }
+    tparam.SetParam(name, val);
+    if (trees.size() == 0) mparam.SetParam(name, val);
+  }
+  virtual void LoadModel(utils::IStream &fi) {
+    this->Clear();
+    utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
+                 "GBTree: invalid model file");
+    trees.resize(mparam.num_trees);
+    for (size_t i = 0; i < trees.size(); ++i) {
+      trees[i] = new tree::RegTree();
+      trees[i]->LoadModel(fi);
+    }
+    tree_info.resize(mparam.num_trees);
+    if (mparam.num_trees != 0) {
+      utils::Check(fi.Read(&tree_info[0], sizeof(int) * mparam.num_trees) != 0,
+                   "GBTree: invalid model file");
+    }
+    if (mparam.num_pbuffer != 0) {
+      pred_buffer.resize(mparam.PredBufferSize());
+      pred_counter.resize(mparam.PredBufferSize());
+      utils::Check(fi.Read(&pred_buffer[0], pred_buffer.size() * sizeof(float)) != 0,
+                   "GBTree: invalid model file");
+      utils::Check(fi.Read(&pred_counter[0], pred_counter.size() * sizeof(unsigned)) != 0,
+                   "GBTree: invalid model file");
+    }
+  }
+  virtual void SaveModel(utils::IStream &fo) const {
+    utils::Assert(mparam.num_trees == static_cast<int>(trees.size()), "GBTree");
+    fo.Write(&mparam, sizeof(ModelParam));
+    for (size_t i = 0; i < trees.size(); ++i) {
+      trees[i]->SaveModel(fo);
+    }
+    if (tree_info.size() != 0) {
+      fo.Write(&tree_info[0], sizeof(int) * tree_info.size());
+    }
+    if (mparam.num_pbuffer != 0) {
+      fo.Write(&pred_buffer[0], pred_buffer.size() * sizeof(float));
+      fo.Write(&pred_counter[0], pred_counter.size() * sizeof(unsigned));
+    }
+  }
+  // initialize the predic buffer
+  virtual void InitModel(void) {
+    pred_buffer.clear(); pred_counter.clear();
+    pred_buffer.resize(mparam.PredBufferSize(), 0.0f);
+    pred_counter.resize(mparam.PredBufferSize(), 0);
+    utils::Assert(mparam.num_trees == 0, "GBTree: model already initialized");
+    utils::Assert(trees.size() == 0, "GBTree: model already initialized");
+  }
+  virtual void DoBoost(const std::vector<bst_gpair> &gpair,
+                       FMatrix &fmat,
+                       const std::vector<unsigned> &root_index) {
+    if (mparam.num_output_group == 1) {
+      this->BoostNewTrees(gpair, fmat, root_index, 0);
+    } else {
+      const int ngroup = mparam.num_output_group;
+      utils::Check(gpair.size() % ngroup == 0,
+                   "must have exactly ngroup*nrow gpairs");
+      std::vector<bst_gpair> tmp(gpair.size()/ngroup);
+      for (int gid = 0; gid < ngroup; ++gid) {
+        #pragma omp parallel for schedule(static)
+        for (size_t i = 0; i < tmp.size(); ++i) {
+          tmp[i] = gpair[i * ngroup + gid];
+        }
+        this->BoostNewTrees(tmp, fmat, root_index, gid);
+      }
+    }
+  }
+  virtual void Predict(const FMatrix &fmat,
+                       int64_t buffer_offset,
+                       const std::vector<unsigned> &root_index,
+                       std::vector<float> *out_preds) {
+    int nthread;
+    #pragma omp parallel
+    {
+      nthread = omp_get_num_threads();
+    }
+    this->InitThreadTemp(nthread);
+    std::vector<float> &preds = *out_preds;
+    preds.resize(0);
+    // start collecting the prediction
+    utils::IIterator<SparseBatch> *iter = fmat.RowIterator();
+    iter->BeforeFirst();
+    while (iter->Next()) {
+      const SparseBatch &batch = iter->Value();
+      utils::Assert(batch.base_rowid * mparam.num_output_group == preds.size(),
+                    "base_rowid is not set correctly");
+      // output convention: nrow * k, where nrow is number of rows
+      // k is number of group
+      preds.resize(preds.size() + batch.size * mparam.num_output_group);
+      // parallel over local batch
+      const unsigned nsize = static_cast<unsigned>(batch.size);
+      #pragma omp parallel for schedule(static)
+      for (unsigned i = 0; i < nsize; ++i) {
+        const int tid = omp_get_thread_num();
+        std::vector<float> &feats = thread_temp[tid];
+        const size_t ridx = batch.base_rowid + i;
+        const unsigned root_idx = root_index.size() == 0 ? 0 : root_index[ridx];
+        // loop over output groups
+        for (int gid = 0; gid < mparam.num_output_group; ++gid) {
+          preds[ridx * mparam.num_output_group + gid] =
+              this->Pred(batch[i],
+                         buffer_offset < 0 ? -1 : buffer_offset+ridx,
+                         gid, root_idx, &feats);
+        }
+      }
+    }
+  }
+
+ protected:
+  // clear the model
+  inline void Clear(void) {
+    for (size_t i = 0; i < trees.size(); ++i) {
+      delete trees[i];
+    }
+    trees.clear();
+    pred_buffer.clear();
+    pred_counter.clear();
+  }
+  // initialize updater before using them
+  inline void InitUpdater(void) {
+    if (tparam.updater_initialized != 0) return;
+    for (size_t i = 0; i < updaters.size(); ++i) {
+      delete updaters[i];
+    }
+    updaters.clear();
+    std::string tval = tparam.updater_seq;
+    char *saveptr, *pstr;
+    pstr = strtok_r(&tval[0], ",", &saveptr);
+    while (pstr != NULL) {
+      updaters.push_back(tree::CreateUpdater<FMatrix>(pstr));
+      for (size_t j = 0; j < cfg.size(); ++j) {
+        // set parameters
+        updaters.back()->SetParam(cfg[j].first.c_str(), cfg[j].second.c_str());
+      }
+      pstr = strtok_r(NULL, ",", &saveptr);
+    }
+    tparam.updater_initialized = 1;
+  }
+  // do group specific group
+  inline void BoostNewTrees(const std::vector<bst_gpair> &gpair,
+                            FMatrix &fmat,
+                            const std::vector<unsigned> &root_index,
+                            int bst_group) {
+    this->InitUpdater();
+    // create the trees
+    std::vector<tree::RegTree *> new_trees;
+    for (int i = 0; i < tparam.num_parallel_tree; ++i) {
+      new_trees.push_back(new tree::RegTree());
+      for (size_t j = 0; j < cfg.size(); ++j) {
+        new_trees.back()->param.SetParam(cfg[j].first.c_str(), cfg[j].second.c_str());
+      }
+      new_trees.back()->InitModel();
+    }
+    // update the trees
+    for (size_t i = 0; i < updaters.size(); ++i) {
+      updaters[i]->Update(gpair, fmat, root_index, new_trees);
+    }
+    // push back to model
+    for (size_t i = 0; i < new_trees.size(); ++i) {
+      trees.push_back(new_trees[i]);
+      tree_info.push_back(bst_group);
+    }
+    mparam.num_trees += tparam.num_parallel_tree;
+  }
+  // make a prediction for a single instance
+  inline float Pred(const SparseBatch::Inst &inst,
+                    int64_t buffer_index,
+                    int bst_group,
+                    unsigned root_index,
+                    std::vector<float> *p_feats) {
+    size_t itop = 0;
+    float  psum = 0.0f;
+    const int bid = mparam.BufferOffset(buffer_index, bst_group);
+    // load buffered results if any
+    if (bid >= 0) {
+      itop = pred_counter[bid];
+      psum = pred_buffer[bid];
+    }
+    if (itop != trees.size()) {
+      FillThreadTemp(inst, p_feats);
+      for (size_t i = itop; i < trees.size(); ++i) {
+        if (tree_info[i] == bst_group) {
+          psum += trees[i]->Predict(*p_feats, root_index);
+        }
+      }
+      DropThreadTemp(inst, p_feats);
+    }
+    // updated the buffered results
+    if (bid >= 0) {
+      pred_counter[bid] = static_cast<unsigned>(trees.size());
+      pred_buffer[bid] = psum;
+    }
+    return psum;
+  }
+  // initialize thread local space for prediction
+  inline void InitThreadTemp(int nthread) {
+    thread_temp.resize(nthread);
+    for (size_t i = 0; i < thread_temp.size(); ++i) {
+      thread_temp[i].resize(mparam.num_feature);
+      std::fill(thread_temp[i].begin(), thread_temp[i].end(), NAN);
+    }
+  }
+  // fill in a thread local dense vector using a sparse instance
+  inline static void FillThreadTemp(const SparseBatch::Inst &inst,
+                                    std::vector<float> *p_feats) {
+    std::vector<float> &feats = *p_feats;
+    for (bst_uint i = 0; i < inst.length; ++i) {
+      feats[inst[i].findex] = inst[i].fvalue;
+    }
+  }
+  // clear up a thread local dense vector
+  inline static void DropThreadTemp(const SparseBatch::Inst &inst,
+                                    std::vector<float> *p_feats) {
+    std::vector<float> &feats = *p_feats;
+    for (bst_uint i = 0; i < inst.length; ++i) {
+      feats[inst[i].findex] = NAN;
+    }
+  }
+  // --- data structure ---
+  /*! \brief training parameters */
+  struct TrainParam {
+    /*! \brief number of threads */
+    int nthread;
+    /*!
+     * \brief number of parallel trees constructed each iteration
+     *  use this option to support boosted random forest
+     */
+    int num_parallel_tree;
+    /*! \brief whether updater is already initialized */
+    int updater_initialized;
+    /*! \brief tree updater sequence */
+    std::string updater_seq;
+    // construction
+    TrainParam(void) {
+      nthread = 0;
+      updater_seq = "grow_colmaker,prune";
+      num_parallel_tree = 1;
+      updater_initialized = 0;
+    }
+    inline void SetParam(const char *name, const char *val){
+      if (!strcmp(name, "updater") &&
+          strcmp(updater_seq.c_str(), val) != 0) {
+        updater_seq = val;
+        updater_initialized = 0;
+      }
+      if (!strcmp(name, "nthread")) {
+        omp_set_num_threads(nthread);
+        nthread = atoi(val);
+      }
+      if (!strcmp(name, "num_parallel_tree")) {
+        num_parallel_tree = atoi(val);
+      }
+    }
+  };
+  /*! \brief model parameters */
+  struct ModelParam {
+    /*! \brief number of trees */
+    int num_trees;
+    /*! \brief number of root: default 0, means single tree */
+    int num_roots;
+    /*! \brief number of features to be used by trees */
+    int num_feature;
+    /*! \brief size of predicton buffer allocated used for buffering */
+    int64_t num_pbuffer;
+    /*! 
+     * \brief how many output group a single instance can produce
+     *  this affects the behavior of number of output we have:
+     *    suppose we have n instance and k group, output will be k*n 
+     */
+    int num_output_group;
+    /*! \brief reserved parameters */
+    int reserved[32];
+    /*! \brief constructor */
+    ModelParam(void) {
+      num_trees = 0;
+      num_roots = num_feature = 0;
+      num_pbuffer = 0;
+      num_output_group = 1;
+      memset(reserved, 0, sizeof(reserved));
+    }
+    /*!
+     * \brief set parameters from outside
+     * \param name name of the parameter
+     * \param val  value of the parameter
+     */
+    inline void SetParam(const char *name, const char *val) {
+      if (!strcmp("num_pbuffer", name)) num_pbuffer = atol(val);
+      if (!strcmp("num_output_group", name)) num_output_group = atol(val);
+      if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
+      if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
+    }
+    /*! \return size of prediction buffer actually needed */
+    inline size_t PredBufferSize(void) const {
+      return num_output_group * num_pbuffer;
+    }
+    /*! 
+     * \brief get the buffer offset given a buffer index and group id  
+     * \return calculated buffer offset
+     */
+    inline size_t BufferOffset(int64_t buffer_index, int bst_group) const {
+      if (buffer_index < 0) return -1;
+      utils::Check(buffer_index < num_pbuffer, "buffer_index exceed num_pbuffer");
+      return buffer_index + num_pbuffer * bst_group;
+    }
+  };
+  // training parameter
+  TrainParam tparam;
+  // model parameter
+  ModelParam mparam;
+  /*! \brief vector of trees stored in the model */
+  std::vector<tree::RegTree*> trees;
+  /*! \brief some information indicator of the tree, reserved */
+  std::vector<int> tree_info;
+  /*! \brief prediction buffer */
+  std::vector<float>  pred_buffer;
+  /*! \brief prediction buffer counter, remember the prediction */
+  std::vector<unsigned> pred_counter;
+  // ----training fields----
+  // configurations for tree
+  std::vector< std::pair<std::string, std::string> > cfg;
+  // temporal storage for per thread
+  std::vector< std::vector<float> > thread_temp;
+  // the updaters that can be applied to each of tree
+  std::vector< tree::IUpdater<FMatrix>* > updaters;
+};
+
+}  // namespace gbm
+}  // namespace xgboost
+#endif  // XGBOOST_GBM_GBTREE_INL_HPP_
diff --git a/learner/dmatrix.h b/learner/dmatrix.h
new file mode 100644
index 000000000..522be9b95
--- /dev/null
+++ b/learner/dmatrix.h
@@ -0,0 +1,84 @@
+#ifndef XGBOOST_LEARNER_DMATRIX_H_
+#define XGBOOST_LEARNER_DMATRIX_H_
+/*!
+ * \file dmatrix.h
+ * \brief meta data and template data structure 
+ *        used for regression/classification/ranking
+ * \author Tianqi Chen
+ */
+#include "../data.h"
+
+namespace xgboost {
+namespace learner {
+/*! 
+ * \brief meta information needed in training, including label, weight
+ */
+struct MetaInfo {
+  /*! \brief label of each instance */
+  std::vector<float> labels;
+  /*!
+   * \brief the index of begin and end of a group
+   * needed when the learning task is ranking
+   */
+  std::vector<bst_uint> group_ptr;
+  /*! \brief weights of each instance, optional */
+  std::vector<float> weights;
+  /*!
+   * \brief specified root index of each instance,
+   *  can be used for multi task setting
+   */
+  std::vector<unsigned> root_index;
+  /*! \brief get weight of each instances */
+  inline float GetWeight(size_t i) const {
+    if(weights.size() != 0) {
+      return weights[i];
+    } else {
+      return 1.0f;
+    }
+  }
+  /*! \brief get root index of i-th instance */
+  inline float GetRoot(size_t i) const {
+    if(root_index.size() != 0) {
+      return static_cast<float>(root_index[i]);
+    } else {
+      return 0;
+    }
+  }
+  inline void SaveBinary(utils::IStream &fo) {
+    fo.Write(labels);
+    fo.Write(group_ptr);
+    fo.Write(weights);
+    fo.Write(root_index);
+  }
+  inline void LoadBinary(utils::IStream &fi) {
+    utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&root_index), "MetaInfo: invalid format");
+  }
+};
+
+/*! 
+ * \brief data object used for learning,
+ * \tparam FMatrix type of feature data source
+ */
+template<typename FMatrix>
+struct DMatrix {
+  /*! \brief meta information about the dataset */
+  MetaInfo info;
+  /*! \brief number of rows in the DMatrix */
+  size_t num_row;
+  /*! \brief feature matrix about data content */
+  FMatrix fmat;
+  /*! 
+   * \brief cache pointer to verify if the data structure is cached in some learner
+   *  used to verify if DMatrix is cached
+   */
+  void *cache_learner_ptr_;
+  /*! \brief default constructor */
+  DMatrix(void) : cache_learner_ptr_(NULL) {}
+};
+
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_DMATRIX_H_
diff --git a/learner/evaluation-inl.hpp b/learner/evaluation-inl.hpp
new file mode 100644
index 000000000..a4ac1e462
--- /dev/null
+++ b/learner/evaluation-inl.hpp
@@ -0,0 +1,346 @@
+#ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_
+#define XGBOOST_LEARNER_EVALUATION_INL_HPP_
+/*!
+* \file xgboost_evaluation-inl.hpp
+* \brief evaluation metrics for regression and classification and rank
+* \author Kailong Chen, Tianqi Chen
+*/
+#include <vector>
+#include <utility>
+#include <string>
+#include <climits>
+#include <algorithm>
+#include "./evaluation.h"
+#include "./helper_utils.h"
+
+namespace xgboost {
+namespace learner {
+/*! 
+ * \brief base class of elementwise evaluation 
+ * \tparam Derived the name of subclass
+ */
+template<typename Derived>
+struct EvalEWiseBase : public IEvaluator {
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Check(preds.size() == info.labels.size(),
+                 "label and prediction size not match");
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    float sum = 0.0, wsum = 0.0;
+    #pragma omp parallel for reduction(+:sum, wsum) schedule(static)
+    for (unsigned i = 0; i < ndata; ++i) {
+      const float wt = info.GetWeight(i);
+      sum += Derived::EvalRow(info.labels[i], preds[i]) * wt;
+      wsum += wt;
+    }
+    return Derived::GetFinal(sum, wsum);
+  }
+  /*! 
+   * \brief to be implemented by subclass, 
+   *   get evaluation result from one row 
+   * \param label label of current instance
+   * \param pred prediction value of current instance
+   * \param weight weight of current instance
+   */
+  inline static float EvalRow(float label, float pred);
+  /*! 
+   * \brief to be overide by subclas, final trasnformation 
+   * \param esum the sum statistics returned by EvalRow
+   * \param wsum sum of weight
+   */
+  inline static float GetFinal(float esum, float wsum) {
+    return esum / wsum;
+  }
+};
+
+/*! \brief RMSE */
+struct EvalRMSE : public EvalEWiseBase<EvalRMSE> {
+  virtual const char *Name(void) const {
+    return "rmse";
+  }
+  inline static float EvalRow(float label, float pred) {
+    float diff = label - pred;
+    return diff * diff;
+  }
+  inline static float GetFinal(float esum, float wsum) {
+    return std::sqrt(esum / wsum);
+  }
+};
+
+/*! \brief logloss */
+struct EvalLogLoss : public EvalEWiseBase<EvalLogLoss> {
+  virtual const char *Name(void) const {
+    return "logloss";
+  }
+  inline static float EvalRow(float y, float py) {
+    return - y * std::log(py) - (1.0f - y) * std::log(1 - py);
+  }
+};
+
+/*! \brief error */
+struct EvalError : public EvalEWiseBase<EvalError> {
+  virtual const char *Name(void) const {
+    return "error";
+  }
+  inline static float EvalRow(float label, float pred) {
+    // assume label is in [0,1]
+    return pred > 0.5f ? 1.0f - label : label;
+  }
+};
+
+/*! \brief match error */
+struct EvalMatchError : public EvalEWiseBase<EvalMatchError> {
+  virtual const char *Name(void) const {
+    return "merror";
+  }
+  inline static float EvalRow(float label, float pred) {
+    return static_cast<int>(pred) != static_cast<int>(label);
+  }
+};
+
+/*! \brief AMS: also records best threshold */
+struct EvalAMS : public IEvaluator {
+ public:
+  explicit EvalAMS(const char *name) {
+    name_ = name;
+    // note: ams@0 will automatically select which ratio to go
+    utils::Check(sscanf(name, "ams@%f", &ratio_) == 1, "invalid ams format");
+  }
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
+    std::vector< std::pair<float, unsigned> > rec(ndata);
+
+    #pragma omp parallel for schedule(static)
+    for (unsigned i = 0; i < ndata; ++i) {
+      rec[i] = std::make_pair(preds[i], i);
+    }
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    unsigned ntop = static_cast<unsigned>(ratio_ * ndata);
+    if (ntop == 0) ntop = ndata;
+    const double br = 10.0;
+    unsigned thresindex = 0;
+    double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
+    for (unsigned i = 0; i < ndata-1 && i < ntop; ++i) {
+      const unsigned ridx = rec[i].second;
+      const float wt = info.weights[ridx];
+      if (info.labels[ridx] > 0.5f) {
+        s_tp += wt;
+      } else {
+        b_fp += wt;
+      }
+      if (rec[i].first != rec[i+1].first) {
+        double ams = sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
+        if (tams < ams) {
+          thresindex = i;
+          tams = ams;
+        }
+      }
+    }
+    if (ntop == ndata) {
+      fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
+      return tams;
+    } else {
+      return sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
+    }
+  }
+  virtual const char *Name(void) const {
+    return name_.c_str();
+  }
+
+ private:
+  std::string name_;
+  float ratio_;
+};
+
+/*! \brief Area under curve, for both classification and rank */
+struct EvalAuc : public IEvaluator {
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Check(preds.size() == info.labels.size(), "label size predict size not match");
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
+    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
+    utils::Check(gptr.back() == preds.size(),
+                 "EvalAuc: group structure must match number of prediction");
+    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    // sum statictis
+    double sum_auc = 0.0f;
+    #pragma omp parallel reduction(+:sum_auc)
+    {
+      // each thread takes a local rec
+      std::vector< std::pair<float, unsigned> > rec;
+      #pragma omp for schedule(static)
+      for (unsigned k = 0; k < ngroup; ++k) {
+        rec.clear();
+        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
+          rec.push_back(std::make_pair(preds[j], j));
+        }
+        std::sort(rec.begin(), rec.end(), CmpFirst);
+        // calculate AUC
+        double sum_pospair = 0.0;
+        double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
+        for (size_t j = 0; j < rec.size(); ++j) {
+          const float wt = info.GetWeight(rec[j].second);
+          const float ctr = info.labels[rec[j].second];
+          // keep bucketing predictions in same bucket
+          if (j != 0 && rec[j].first != rec[j - 1].first) {
+            sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
+            sum_npos += buf_pos; sum_nneg += buf_neg;
+            buf_neg = buf_pos = 0.0f;
+          }
+          buf_pos += ctr * wt; buf_neg += (1.0f - ctr) * wt;
+        }
+        sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
+        sum_npos += buf_pos; sum_nneg += buf_neg;
+        // check weird conditions
+        utils::Check(sum_npos > 0.0 && sum_nneg > 0.0,
+                     "AUC: the dataset only contains pos or neg samples");
+        // this is the AUC
+        sum_auc += sum_pospair / (sum_npos*sum_nneg);
+      }
+    }
+    // return average AUC over list
+    return static_cast<float>(sum_auc) / ngroup;
+  }
+  virtual const char *Name(void) const {
+    return "auc";
+  }
+};
+
+/*! \brief Evaluate rank list */
+struct EvalRankList : public IEvaluator {
+ public:
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Check(preds.size() == info.labels.size(),
+                  "label size predict size not match");
+    const std::vector<unsigned> &gptr = info.group_ptr;
+    utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
+    utils::Assert(gptr.back() == preds.size(),
+                   "EvalRanklist: group structure must match number of prediction");
+    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    // sum statistics
+    double sum_metric = 0.0f;
+    #pragma omp parallel reduction(+:sum_metric)
+    {
+      // each thread takes a local rec
+      std::vector< std::pair<float, unsigned> > rec;
+      #pragma omp for schedule(static)
+      for (unsigned k = 0; k < ngroup; ++k) {
+        rec.clear();
+        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
+          rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j])));
+        }
+        sum_metric += this->EvalMetric(rec);
+      }
+    }
+    return static_cast<float>(sum_metric) / ngroup;
+  }
+  virtual const char *Name(void) const {
+    return name_.c_str();
+  }
+
+ protected:
+  explicit EvalRankList(const char *name) {
+    name_ = name;
+    minus_ = false;
+    if (sscanf(name, "%*[^@]@%u[-]?", &topn_) != 1) {
+      topn_ = UINT_MAX;
+    }    
+    if (name[strlen(name) - 1] == '-') {
+      minus_ = true;
+    }
+  }
+  /*! \return evaluation metric, given the pair_sort record, (pred,label) */
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0;
+
+ protected:
+  unsigned topn_;
+  std::string name_;
+  bool minus_;
+};
+
+/*! \brief Precison at N, for both classification and rank */
+struct EvalPrecision : public EvalRankList{
+ public:
+  explicit EvalPrecision(const char *name) : EvalRankList(name) {}
+
+ protected:
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+    // calculate Preicsion
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    unsigned nhit = 0;
+    for (size_t j = 0; j < rec.size() && j < this->topn_; ++j) {
+      nhit += (rec[j].second != 0);
+    }
+    return static_cast<float>(nhit) / topn_;
+  }
+};
+
+/*! \brief NDCG */
+struct EvalNDCG : public EvalRankList{
+ public:
+  explicit EvalNDCG(const char *name) : EvalRankList(name) {}
+
+ protected:
+  inline float CalcDCG(const std::vector< std::pair<float, unsigned> > &rec) const {
+    double sumdcg = 0.0;
+    for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
+      const unsigned rel = rec[i].second;
+      if (rel != 0) { 
+        sumdcg += ((1 << rel) - 1) / logf(i + 2);
+      }
+    }
+    return static_cast<float>(sumdcg);
+  }
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+    std::stable_sort(rec.begin(), rec.end(), CmpFirst);
+    float dcg = this->CalcDCG(rec);
+    std::stable_sort(rec.begin(), rec.end(), CmpSecond);
+    float idcg = this->CalcDCG(rec);
+    if (idcg == 0.0f) {
+      if (minus_) {
+        return 0.0f;
+      } else {
+        return 1.0f;
+      }
+    }
+    return dcg/idcg;
+  }
+};
+
+/*! \brief Precison at N, for both classification and rank */
+struct EvalMAP : public EvalRankList {
+ public:
+  explicit EvalMAP(const char *name) : EvalRankList(name) {}
+
+ protected:
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    unsigned nhits = 0;
+    double sumap = 0.0;
+    for (size_t i = 0; i < rec.size(); ++i) {
+      if (rec[i].second != 0) {
+        nhits += 1;
+        if (i < this->topn_) {
+          sumap += static_cast<float>(nhits) / (i+1);
+        }
+      }
+    }
+    if (nhits != 0) {
+      sumap /= nhits;
+      return static_cast<float>(sumap);
+    } else {
+      if (minus_) {
+        return 0.0f;
+      } else {
+        return 1.0f;
+      }
+    }
+  }
+};
+
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_EVALUATION_INL_HPP_
diff --git a/learner/evaluation.h b/learner/evaluation.h
new file mode 100644
index 000000000..d51e5b767
--- /dev/null
+++ b/learner/evaluation.h
@@ -0,0 +1,82 @@
+#ifndef XGBOOST_LEARNER_EVALUATION_H_
+#define XGBOOST_LEARNER_EVALUATION_H_
+/*!
+ * \file evaluation.h
+ * \brief interface of evaluation function supported in xgboost
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <string>
+#include <vector>
+#include "../utils/utils.h"
+
+namespace xgboost {
+namespace learner {
+/*! \brief evaluator that evaluates the loss metrics */
+struct IEvaluator{
+  /*!
+   * \brief evaluate a specific metric
+   * \param preds prediction
+   * \param info information, including label etc.
+   */
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const = 0;
+  /*! \return name of metric */
+  virtual const char *Name(void) const = 0;
+  /*! \brief virtual destructor */
+  virtual ~IEvaluator(void) {}
+};
+}  // namespace learner
+}  // namespace xgboost
+
+// include implementations of evaluation functions
+#include "evaluation-inl.hpp"
+// factory function
+namespace xgboost {
+namespace learner {
+inline IEvaluator* CreateEvaluator(const char *name) {
+  if (!strcmp(name, "rmse")) return new EvalRMSE();
+  if (!strcmp(name, "error")) return new EvalError();
+  if (!strcmp(name, "merror")) return new EvalMatchError();
+  if (!strcmp(name, "logloss")) return new EvalLogLoss();
+  if (!strcmp(name, "auc")) return new EvalAuc();
+  if (!strncmp(name, "ams@",4)) return new EvalAMS(name);
+  if (!strncmp(name, "pre@", 4)) return new EvalPrecision(name);
+  if (!strncmp(name, "map", 3)) return new EvalMAP(name);
+  if (!strncmp(name, "ndcg", 3)) return new EvalNDCG(name);
+  utils::Error("unknown evaluation metric type: %s", name);
+  return NULL;
+}
+
+/*! \brief a set of evaluators */
+class EvalSet{
+ public:
+  inline void AddEval(const char *name) {
+    for (size_t i = 0; i < evals_.size(); ++i) {
+      if (!strcmp(name, evals_[i]->Name())) return;
+    }
+    evals_.push_back(CreateEvaluator(name));
+  }
+  ~EvalSet(void) {
+    for (size_t i = 0; i < evals_.size(); ++i) {
+      delete evals_[i];
+    }
+  }
+  inline std::string Eval(const char *evname,
+                          const std::vector<float> &preds,
+                          const MetaInfo &info) const {
+    std::string result = "";
+    for (size_t i = 0; i < evals_.size(); ++i) {
+      float res = evals_[i]->Eval(preds, info);
+      char tmp[1024];
+      snprintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
+      result += tmp;
+    }
+    return result;
+  }
+
+ private:
+  std::vector<const IEvaluator*> evals_;
+};
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_EVALUATION_H_
diff --git a/learner/helper_utils.h b/learner/helper_utils.h
new file mode 100644
index 000000000..e2f8a3574
--- /dev/null
+++ b/learner/helper_utils.h
@@ -0,0 +1,50 @@
+#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
+#define XGBOOST_LEARNER_HELPER_UTILS_H_
+/*!
+ * \file helper_utils.h
+ * \brief useful helper functions
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <utility>
+#include <vector>
+#include <algorithm>
+namespace xgboost {
+namespace learner {
+// simple helper function to do softmax
+inline static void Softmax(std::vector<float>* p_rec) {
+  std::vector<float> &rec = *p_rec;
+  float wmax = rec[0];
+  for (size_t i = 1; i < rec.size(); ++i) {
+    wmax = std::max(rec[i], wmax);
+  }
+  double wsum = 0.0f;
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] = std::exp(rec[i]-wmax);
+    wsum += rec[i];
+  }
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] /= static_cast<float>(wsum);
+  }
+}
+// simple helper function to do softmax
+inline static int FindMaxIndex(const std::vector<float>& rec) {
+  size_t mxid = 0;
+  for (size_t i = 1; i < rec.size(); ++i) {
+    if (rec[i] > rec[mxid] + 1e-6f) {
+      mxid = i;
+    }
+  }
+  return static_cast<int>(mxid);
+}
+
+inline static bool CmpFirst(const std::pair<float, unsigned> &a,
+                            const std::pair<float, unsigned> &b) {
+  return a.first > b.first;
+}
+inline static bool CmpSecond(const std::pair<float, unsigned> &a,
+                             const std::pair<float, unsigned> &b) {
+  return a.second > b.second;
+}
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_HELPER_UTILS_H_
diff --git a/learner/learner-inl.hpp b/learner/learner-inl.hpp
new file mode 100644
index 000000000..62f852a12
--- /dev/null
+++ b/learner/learner-inl.hpp
@@ -0,0 +1,296 @@
+#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
+#define XGBOOST_LEARNER_LEARNER_INL_HPP_
+/*!
+ * \file learner-inl.hpp
+ * \brief learning algorithm 
+ * \author Tianqi Chen
+ */
+#include <algorithm>
+#include <vector>
+#include <utility>
+#include <string>
+#include "./objective.h"
+#include "./evaluation.h"
+#include "../gbm/gbm.h"
+
+namespace xgboost {
+/*! \brief namespace for learning algorithm */
+namespace learner {
+/*! 
+ * \brief learner that takes do gradient boosting on specific objective functions
+ *  and do training and prediction
+ */
+template<typename FMatrix>
+class BoostLearner {
+ public:
+  BoostLearner(void) {
+    obj_ = NULL;
+    gbm_ = NULL;
+    name_obj_ = "reg:linear";
+    name_gbm_ = "gbtree";
+  }
+  ~BoostLearner(void) {
+    if (obj_ != NULL) delete obj_;
+    if (gbm_ != NULL) delete gbm_;
+  }
+  /*!
+   * \brief add internal cache space for mat, this can speedup prediction for matrix,
+   *        please cache prediction for training and eval data
+   *    warning: if the model is loaded from file from some previous training history
+   *             set cache data must be called with exactly SAME 
+   *             data matrices to continue training otherwise it will cause error
+   * \param mats array of pointers to matrix whose prediction result need to be cached
+   */          
+  inline void SetCacheData(const std::vector<DMatrix<FMatrix>*>& mats) {
+    // estimate feature bound
+    unsigned num_feature = 0;
+    // assign buffer index
+    size_t buffer_size = 0;
+    utils::Assert(cache_.size() == 0, "can only call cache data once");
+    for (size_t i = 0; i < mats.size(); ++i) {
+      bool dupilicate = false;
+      for (size_t j = 0; j < i; ++j) {
+        if (mats[i] == mats[j]) dupilicate = true;
+      }
+      if (dupilicate) continue;
+      // set mats[i]'s cache learner pointer to this
+      mats[i]->cache_learner_ptr_ = this;
+      cache_.push_back(CacheEntry(mats[i], buffer_size, mats[i]->num_row));
+      buffer_size += mats[i]->num_row;
+      num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->num_col));
+    }
+    char str_temp[25];
+    if (num_feature > mparam.num_feature) {
+      snprintf(str_temp, sizeof(str_temp), "%u", num_feature);
+      this->SetParam("bst:num_feature", str_temp);
+    }
+    snprintf(str_temp, sizeof(str_temp), "%lu", buffer_size);
+    this->SetParam("num_pbuffer", str_temp);
+    if (!silent) {
+      printf("buffer_size=%ld\n", buffer_size);
+    }
+  }
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val  value of the parameter
+   */
+  inline void SetParam(const char *name, const char *val) {
+    if (!strcmp(name, "silent")) silent = atoi(val);
+    if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
+    if (gbm_ == NULL) {
+      if (!strcmp(name, "objective")) name_obj_ = val;
+      if (!strcmp(name, "booster")) name_gbm_ = val;
+      mparam.SetParam(name, val);
+    }
+    cfg_.push_back(std::make_pair(std::string(name), std::string(val)));
+  }
+  /*!
+   * \brief initialize the model
+   */
+  inline void InitModel(void) {
+    this->InitObjGBM();
+    // adapt the base score
+    mparam.base_score = obj_->ProbToMargin(mparam.base_score);
+    gbm_->InitModel();
+  }
+  /*!
+   * \brief load model from stream
+   * \param fi input stream
+   */
+  inline void LoadModel(utils::IStream &fi) {
+    utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
+                 "BoostLearner: wrong model format");
+    utils::Check(fi.Read(&name_obj_), "BoostLearner: wrong model format");
+    utils::Check(fi.Read(&name_gbm_), "BoostLearner: wrong model format");
+    // delete existing gbm if any
+    if (obj_ != NULL) delete obj_;
+    if (gbm_ != NULL) delete gbm_;
+    this->InitObjGBM();
+    gbm_->LoadModel(fi);
+  }
+  /*!
+   * \brief load model from file
+   * \param fname file name
+   */
+  inline void LoadModel(const char *fname) {
+    utils::FileStream fi(utils::FopenCheck(fname, "rb"));
+    this->LoadModel(fi);
+    fi.Close();
+  }
+  inline void SaveModel(utils::IStream &fo) const {
+    fo.Write(&mparam, sizeof(ModelParam));
+    fo.Write(&name_obj_);
+    fo.Write(&name_gbm_);
+    gbm_->SaveModel(fo);
+  }
+  /*!
+   * \brief save model into file
+   * \param fname file name
+   */
+  inline void SaveModel(const char *fname) const {
+    utils::FileStream fo(utils::FopenCheck(fname, "wb"));
+    this->SaveModel(fo);
+    fo.Close();
+  }  
+  /*!
+   * \brief update the model for one iteration
+   * \param iter current iteration number
+   * \param p_train pointer to the data matrix
+   */
+  inline void UpdateOneIter(int iter, DMatrix<FMatrix> *p_train) {
+    this->PredictRaw(preds_, *p_train);
+    obj_->GetGradient(preds_, p_train->info, iter, &gpair_);
+    gbm_->DoBoost(gpair_, p_train->fmat, p_train->info.root_index);
+  }
+  /*!
+   * \brief evaluate the model for specific iteration
+   * \param iter iteration number
+   * \param evals datas i want to evaluate
+   * \param evname name of each dataset
+   * \return a string corresponding to the evaluation result
+   */
+  inline std::string EvalOneIter(int iter,
+                                 const std::vector<const DMatrix<FMatrix>*> &evals,
+                                 const std::vector<std::string> &evname) {
+    std::string res;
+    char tmp[256];
+    snprintf(tmp, sizeof(tmp), "[%d]", iter);
+    res = tmp;
+    for (size_t i = 0; i < evals.size(); ++i) {
+      this->PredictRaw(*evals[i], &preds_);
+      obj_->EvalTransform(&preds_);
+      res += evaluator_.Eval(evname[i].c_str(), preds_, evals[i]->info);
+    }
+    return res;
+  }
+  /*!
+   * \brief simple evaluation function, with a specified metric
+   * \param data input data
+   * \param metric name of metric
+   * \return a pair of <evaluation name, result>
+   */
+  std::pair<std::string, float> Evaluate(const DMatrix<FMatrix> &data, std::string metric) {
+    if (metric == "auto") metric = obj_->DefaultEvalMetric();
+    IEvaluator *ev = CreateEvaluator(metric.c_str());
+    this->PredictRaw(data, &preds_);
+    obj_->EvalTransform(&preds_);
+    float res = ev->Eval(preds_, data.info);
+    delete ev;
+    return std::make_pair(metric, res);
+  }
+  /*!
+   * \brief get prediction
+   * \param data input data
+   * \param out_preds output vector that stores the prediction
+   */
+  inline void Predict(const DMatrix<FMatrix> &data,
+                      std::vector<float> *out_preds) const {
+    this->PredictRaw(data, out_preds);
+    obj_->PredTransform(out_preds);
+  }
+
+ protected:
+  /*! 
+   * \brief initialize the objective function and GBM, 
+   * if not yet done
+   */
+  inline void InitObjGBM(void) {
+    if (obj_ != NULL) return;
+    utils::Assert(gbm_ == NULL, "GBM and obj should be NULL");
+    obj_ = CreateObjFunction(name_obj_.c_str());
+    gbm_ = gbm::CreateGradBooster<FMatrix>(name_gbm_.c_str());
+    for (size_t i = 0; i < cfg_.size(); ++i) {
+      obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
+      gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
+    }
+    evaluator_.AddEval(obj_->DefaultEvalMetric());
+  }
+  /*! 
+   * \brief get un-transformed prediction
+   * \param data training data matrix
+   * \param out_preds output vector that stores the prediction
+   */
+  inline void PredictRaw(const DMatrix<FMatrix> &data,
+                         std::vector<float> *out_preds) {
+    gbm_->Predict(data.fmat, this->FindBufferOffset(data),
+                  data.info, out_preds);
+  }
+
+  /*! \brief training parameter for regression */
+  struct ModelParam{
+    /* \brief global bias */
+    float base_score;
+    /* \brief number of features  */
+    unsigned num_feature;
+    /* \brief number of class, if it is multi-class classification  */
+    int num_class;
+    /*! \brief reserved field */
+    int reserved[32];
+    /*! \brief constructor */
+    ModelParam(void) {
+      base_score = 0.5f;
+      num_feature = 0;
+      num_class = 0;
+      memset(reserved, 0, sizeof(reserved));
+    }
+    /*!
+     * \brief set parameters from outside
+     * \param name name of the parameter
+     * \param val value of the parameter
+     */
+    inline void SetParam(const char *name, const char *val) {
+      if (!strcmp("base_score", name)) base_score = static_cast<float>(atof(val));
+      if (!strcmp("num_class", name)) num_class = atoi(val);
+      if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
+    }
+  };
+  // data fields
+  // silent during training
+  int silent;
+  // evaluation set
+  EvalSet evaluator_;
+  // model parameter
+  ModelParam   mparam;
+  // gbm model that back everything
+  gbm::IGradBooster<FMatrix> *gbm_;
+  // name of gbm model used for training
+  std::string name_gbm_;
+  // objective fnction
+  IObjFunction *obj_;
+  // name of objective function
+  std::string name_obj_;
+  // configurations
+  std::vector< std::pair<std::string, std::string> > cfg_;
+  // temporal storages for prediciton
+  std::vector<float> preds_;
+  // gradient pairs
+  std::vector<bst_gpair> gpair_;
+
+ private:
+  // cache entry object that helps handle feature caching
+  struct CacheEntry {
+    const DMatrix<FMatrix> *mat_;
+    size_t buffer_offset_;
+    size_t num_row_;
+    CacheEntry(const DMatrix<FMatrix> *mat, size_t buffer_offset, size_t num_row)
+        :mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row) {}
+  };
+  // find internal bufer offset for certain matrix, if not exist, return -1
+  inline int64_t FindBufferOffset(const DMatrix<FMatrix> &mat) const {
+    for (size_t i = 0; i < cache_.size(); ++i) {
+      if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) {
+        if (cache_[i].num_row_ == mat.num_row) {
+          return cache_[i].buffer_offset_;
+        }
+      }
+    }
+    return -1;
+  }
+  // data structure field
+  /*! \brief the entries indicates that we have internal prediction cache */
+  std::vector<CacheEntry> cache_;
+};
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_LEARNER_INL_HPP_
diff --git a/learner/objective-inl.hpp b/learner/objective-inl.hpp
new file mode 100644
index 000000000..7aa11d338
--- /dev/null
+++ b/learner/objective-inl.hpp
@@ -0,0 +1,137 @@
+#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
+#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
+/*!
+ * \file objective-inl.hpp
+ * \brief objective function implementations
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <vector>
+#include "./objective.h"
+
+namespace xgboost {
+namespace learner {
+/*! \brief defines functions to calculate some commonly used functions */
+struct LossType {
+  /*! \brief indicate which type we are using */
+  int loss_type;
+  // list of constants
+  static const int kLinearSquare = 0;
+  static const int kLogisticNeglik = 1;
+  static const int kLogisticClassify = 2;
+  static const int kLogisticRaw = 3;
+  /*!
+   * \brief transform the linear sum to prediction
+   * \param x linear sum of boosting ensemble
+   * \return transformed prediction
+   */
+  inline float PredTransform(float x) const {
+    switch (loss_type) {
+      case kLogisticRaw:
+      case kLinearSquare: return x;
+      case kLogisticClassify:
+      case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
+      default: utils::Error("unknown loss_type"); return 0.0f;
+    }
+  }
+  /*!
+   * \brief calculate first order gradient of loss, given transformed prediction
+   * \param predt transformed prediction
+   * \param label true label
+   * \return first order gradient
+   */
+  inline float FirstOrderGradient(float predt, float label) const {
+    switch (loss_type) {
+      case kLinearSquare: return predt - label;
+      case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
+      case kLogisticClassify:
+      case kLogisticNeglik: return predt - label;
+      default: utils::Error("unknown loss_type"); return 0.0f;
+    }
+  }
+  /*!
+   * \brief calculate second order gradient of loss, given transformed prediction
+   * \param predt transformed prediction
+   * \param label true label
+   * \return second order gradient
+   */
+  inline float SecondOrderGradient(float predt, float label) const {
+    switch (loss_type) {
+      case kLinearSquare: return 1.0f;
+      case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
+      case kLogisticClassify:
+      case kLogisticNeglik: return predt * (1 - predt);
+      default: utils::Error("unknown loss_type"); return 0.0f;
+    }
+  }
+  /*!
+   * \brief transform probability value back to margin
+   */
+  inline float ProbToMargin(float base_score) const {
+    if (loss_type == kLogisticRaw ||
+        loss_type == kLogisticClassify ||
+        loss_type == kLogisticNeglik ) {
+      utils::Check(base_score > 0.0f && base_score < 1.0f,
+                   "base_score must be in (0,1) for logistic loss");
+      base_score = -logf(1.0f / base_score - 1.0f);
+    }
+    return base_score;
+  }
+  /*! \brief get default evaluation metric for the objective */
+  inline const char *DefaultEvalMetric(void) const {
+    if (loss_type == kLogisticClassify) return "error";
+    if (loss_type == kLogisticRaw) return "auc";
+    return "rmse";
+  }
+};
+
+/*! \brief objective function that only need to */
+class RegLossObj : public IObjFunction{
+ public:
+  explicit RegLossObj(int loss_type) {
+    loss.loss_type = loss_type;
+    scale_pos_weight = 1.0f;
+  }
+  virtual ~RegLossObj(void) {}
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strcmp("scale_pos_weight", name)) {
+      scale_pos_weight = static_cast<float>(atof(val));
+    }
+  }
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo &info,
+                           int iter,
+                           std::vector<bst_gpair> *out_gpair) {
+    utils::Check(preds.size() == info.labels.size(),
+                 "labels are not correctly provided");
+    std::vector<bst_gpair> &gpair = *out_gpair;
+    gpair.resize(preds.size());
+    // start calculating gradient
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    #pragma omp parallel for schedule(static)
+    for (unsigned j = 0; j < ndata; ++j) {
+      float p = loss.PredTransform(preds[j]);
+      float w = info.GetWeight(j);
+      if (info.labels[j] == 1.0f) w *= scale_pos_weight;
+      gpair[j] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w,
+                           loss.SecondOrderGradient(p, info.labels[j]) * w);
+    }
+  }
+  virtual const char* DefaultEvalMetric(void) {
+    return loss.DefaultEvalMetric();
+  }
+  virtual void PredTransform(std::vector<float> *io_preds) {
+    std::vector<float> &preds = *io_preds;
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    #pragma omp parallel for schedule(static)
+    for (unsigned j = 0; j < ndata; ++j) {
+      preds[j] = loss.PredTransform(preds[j]);
+    }
+  }
+
+ protected:
+  float scale_pos_weight;
+  LossType loss;
+};
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
diff --git a/learner/objective.h b/learner/objective.h
new file mode 100644
index 000000000..2ae5b7d3e
--- /dev/null
+++ b/learner/objective.h
@@ -0,0 +1,80 @@
+#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
+#define XGBOOST_LEARNER_OBJECTIVE_H_
+/*!
+ * \file objective.h
+ * \brief interface of objective function used for gradient boosting
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include "dmatrix.h"
+
+namespace xgboost {
+namespace learner {
+/*! \brief interface of objective function */
+class IObjFunction{
+ public:
+  /*! \brief virtual destructor */
+  virtual ~IObjFunction(void){}
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val value of the parameter
+   */
+  virtual void SetParam(const char *name, const char *val) = 0;  
+  /*!
+   * \brief get gradient over each of predictions, given existing information
+   * \param preds prediction of current round
+   * \param info information about labels, weights, groups in rank
+   * \param iter current iteration number
+   * \param out_gpair output of get gradient, saves gradient and second order gradient in
+   */
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo &info,
+                           int iter,
+                           std::vector<bst_gpair> *out_gpair) = 0;
+  /*! \return the default evaluation metric for the objective */
+  virtual const char* DefaultEvalMetric(void) = 0;
+  // the following functions are optional, most of time default implementation is good enough
+  /*!
+   * \brief transform prediction values, this is only called when Prediction is called
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void PredTransform(std::vector<float> *io_preds){}
+  /*!
+   * \brief transform prediction values, this is only called when Eval is called, 
+   *  usually it redirect to PredTransform
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void EvalTransform(std::vector<float> *io_preds) {
+    this->PredTransform(io_preds);
+  }
+  /*!
+   * \brief transform probability value back to margin
+   * this is used to transform user-set base_score back to margin 
+   * used by gradient boosting
+   * \return transformed value
+   */
+  virtual float ProbToMargin(float base_score) {
+    return base_score;
+  }
+};
+
+}  // namespace learner
+}  // namespace xgboost
+
+// this are implementations of objective functions
+#include "objective-inl.hpp"
+// factory function
+namespace xgboost {
+namespace learner {
+/*! \brief factory funciton to create objective function by name */
+inline IObjFunction* CreateObjFunction(const char *name) {
+  if (!strcmp("reg:linear", name)) return new RegLossObj( LossType::kLinearSquare );
+  if (!strcmp("reg:logistic", name)) return new RegLossObj( LossType::kLogisticNeglik );
+  if (!strcmp("binary:logistic", name)) return new RegLossObj( LossType::kLogisticClassify );
+  if (!strcmp("binary:logitraw", name)) return new RegLossObj( LossType::kLogisticRaw );
+  utils::Error("unknown objective function type: %s", name);
+  return NULL;
+}
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_OBJECTIVE_H_
diff --git a/regrank/xgboost_regrank.h b/regrank/xgboost_regrank.h
deleted file mode 100644
index 7ddabdfae..000000000
--- a/regrank/xgboost_regrank.h
+++ /dev/null
@@ -1,401 +0,0 @@
-#ifndef XGBOOST_REGRANK_H
-#define XGBOOST_REGRANK_H
-/*!
-* \file xgboost_regrank.h
-* \brief class for gradient boosted regression and ranking
-* \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
-*/
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
-#include "xgboost_regrank_data.h"
-#include "xgboost_regrank_eval.h"
-#include "xgboost_regrank_obj.h"
-#include "../utils/xgboost_omp.h"
-#include "../booster/xgboost_gbmbase.h"
-#include "../utils/xgboost_utils.h"
-#include "../utils/xgboost_stream.h"
-
-namespace xgboost{
-    namespace regrank{
-        /*! \brief class for gradient boosted regression and ranking */
-        class RegRankBoostLearner{
-        public:
-            /*! \brief constructor */
-            RegRankBoostLearner(void){
-                silent = 0;
-                obj_ = NULL;
-                name_obj_ = "reg:linear";
-            }
-            /*! \brief destructor */
-            ~RegRankBoostLearner(void){
-                if( obj_ != NULL ) delete obj_;
-            }
-            /*!
-             * \brief a regression booter associated with training and evaluating data
-             * \param mats  array of pointers to matrix whose prediction result need to be cached
-             */
-            RegRankBoostLearner(const std::vector<DMatrix *>& mats){
-                silent = 0;
-                obj_ = NULL;
-                name_obj_ = "reg:linear";
-                this->SetCacheData(mats);
-            }            
-            /*!
-             * \brief add internal cache space for mat, this can speedup prediction for matrix,
-             *        please cache prediction for training and eval data
-             *    warning: if the model is loaded from file from some previous training history
-             *             set cache data must be called with exactly SAME 
-             *             data matrices to continue training otherwise it will cause error
-             * \param mats  array of pointers to matrix whose prediction result need to be cached
-             */          
-            inline void SetCacheData(const std::vector<DMatrix *>& mats){
-                // estimate feature bound
-                int num_feature = 0;
-                // assign buffer index
-                unsigned buffer_size = 0;
-                
-                utils::Assert( cache_.size() == 0, "can only call cache data once" );
-                for( size_t i = 0; i < mats.size(); ++i ){
-                    bool dupilicate = false;
-                    for( size_t j = 0; j < i; ++ j ){
-                        if( mats[i] == mats[j] ) dupilicate = true;
-                    }
-                    if( dupilicate ) continue;
-                    // set mats[i]'s cache learner pointer to this
-                    mats[i]->cache_learner_ptr_ = this;
-                    cache_.push_back( CacheEntry( mats[i], buffer_size, mats[i]->Size() ) );
-                    buffer_size += static_cast<unsigned>(mats[i]->Size());
-                    num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
-                }
-                
-                char str_temp[25];
-                if (num_feature > mparam.num_feature){
-                    mparam.num_feature = num_feature;
-                    sprintf(str_temp, "%d", num_feature);
-                    base_gbm.SetParam("bst:num_feature", str_temp);
-                }
-
-                sprintf(str_temp, "%u", buffer_size);
-                base_gbm.SetParam("num_pbuffer", str_temp);
-                if (!silent){
-                    printf("buffer_size=%u\n", buffer_size);
-                }
-            }
-
-            /*!
-             * \brief set parameters from outside
-             * \param name name of the parameter
-             * \param val  value of the parameter
-             */
-            inline void SetParam(const char *name, const char *val){
-                if (!strcmp(name, "silent"))  silent = atoi(val);
-                if (!strcmp(name, "eval_metric"))  evaluator_.AddEval(val);
-                if (!strcmp(name, "objective") )   name_obj_ = val;
-                if (!strcmp(name, "num_class") )   base_gbm.SetParam("num_booster_group", val );
-                mparam.SetParam(name, val);
-                base_gbm.SetParam(name, val);
-                cfg_.push_back( std::make_pair( std::string(name), std::string(val) ) );
-            }
-            /*!
-            * \brief initialize solver before training, called before training
-            * this function is reserved for solver to allocate necessary space and do other preparation
-            */
-            inline void InitTrainer(void){
-                if( mparam.num_class != 0 ){
-                    if( name_obj_ != "multi:softmax" && name_obj_ != "multi:softprob"){
-                        name_obj_ = "multi:softmax";
-                        printf("auto select objective=softmax to support multi-class classification\n" );
-                    }
-                }
-                base_gbm.InitTrainer();                
-                obj_ = CreateObjFunction( name_obj_.c_str() );
-                for( size_t i = 0; i < cfg_.size(); ++ i ){
-                    obj_->SetParam( cfg_[i].first.c_str(), cfg_[i].second.c_str() );
-                }
-                evaluator_.AddEval( obj_->DefaultEvalMetric() );
-            }
-            /*!
-             * \brief initialize the current data storage for model, if the model is used first time, call this function
-             */
-            inline void InitModel(void){
-                base_gbm.InitModel();
-                mparam.AdjustBase(name_obj_.c_str());
-            }
-            /*!
-             * \brief load model from file 
-             * \param fname file name
-             */
-            inline void LoadModel(const char *fname){
-                utils::FileStream fi(utils::FopenCheck(fname, "rb"));
-                this->LoadModel(fi);
-                fi.Close();          
-            }
-            /*!
-             * \brief load model from stream
-             * \param fi input stream
-             */
-            inline void LoadModel(utils::IStream &fi){
-                base_gbm.LoadModel(fi);
-                utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
-                // save name obj
-                size_t len;                
-                if( fi.Read(&len, sizeof(len)) != 0 ){
-                    name_obj_.resize( len );
-                    if( len != 0 ){
-                        utils::Assert( fi.Read(&name_obj_[0], len*sizeof(char)) != 0 );
-                    }
-                }
-            }
-            /*!
-             * \brief DumpModel
-             * \param fo text file
-             * \param fmap feature map that may help give interpretations of feature
-             * \param with_stats whether print statistics as well
-             */
-            inline void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats){
-                base_gbm.DumpModel(fo, fmap, with_stats);
-            }
-            /*!
-             * \brief Dump path of all trees
-             * \param fo text file
-             * \param data input data
-             */
-            inline void DumpPath(FILE *fo, const DMatrix &data){
-                base_gbm.DumpPath(fo, data.data);
-            }
-            /*!
-            * \brief save model to stream
-            * \param fo output stream
-            */
-            inline void SaveModel(utils::IStream &fo) const{
-                base_gbm.SaveModel(fo);
-                fo.Write(&mparam, sizeof(ModelParam));
-                // save name obj
-                size_t len = name_obj_.length();
-                fo.Write(&len, sizeof(len));
-                fo.Write(&name_obj_[0], len*sizeof(char));
-            }
-            /*!
-             * \brief save model into file
-             * \param fname file name
-             */
-            inline void SaveModel(const char *fname) const{
-                utils::FileStream fo(utils::FopenCheck(fname, "wb"));
-                this->SaveModel(fo);
-                fo.Close();                
-            }
-            /*!
-             * \brief update the model for one iteration
-             */
-            inline void UpdateOneIter(const DMatrix &train){
-                this->PredictRaw(preds_, train);
-                obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_);
-                if( grad_.size() == train.Size() ){
-                    base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index);
-                }else{
-                    int ngroup = base_gbm.NumBoosterGroup();
-                    utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" );
-                    std::vector<float> tgrad( train.Size() ), thess( train.Size() );
-                    for( int g = 0; g < ngroup; ++ g ){
-                        memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() );
-                        memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() );
-                        base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g );
-                    }
-                }
-            }
-            /*!
-             * \brief evaluate the model for specific iteration
-             * \param iter iteration number
-             * \param evals datas i want to evaluate
-             * \param evname name of each dataset
-             * \param fo file to output log
-             */
-            inline void EvalOneIter(int iter,
-                                    const std::vector<const DMatrix*> &evals,
-                                    const std::vector<std::string> &evname,
-                                    FILE *fo=stderr ){
-                fprintf(fo, "[%d]", iter);
-                for (size_t i = 0; i < evals.size(); ++i){
-                    this->PredictRaw(preds_, *evals[i]);
-                    obj_->EvalTransform(preds_);
-                    evaluator_.Eval(fo, evname[i].c_str(), preds_, evals[i]->info);
-                }
-                fprintf(fo, "\n");
-                fflush(fo);
-            }
-            /*! 
-             * \brief get prediction
-             * \param storage to store prediction
-             * \param data input data
-             * \param bst_group booster group we are in
-             */
-            inline void Predict(std::vector<float> &preds, const DMatrix &data, int bst_group = -1){
-                this->PredictRaw( preds, data, bst_group );
-                obj_->PredTransform( preds );
-            }            
-        public:
-            /*!
-             * \brief interactive update 
-             * \param action action type 
-             * \parma train training data
-             */
-            inline void UpdateInteract(std::string action, const DMatrix& train){
-                for(size_t i = 0; i < cache_.size(); ++i){
-                    this->InteractPredict(preds_, *cache_[i].mat_);
-                }
-
-                if (action == "remove"){
-                    base_gbm.DelteBooster(); return;
-                }
-
-                obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_);
-                std::vector<unsigned> root_index;
-                base_gbm.DoBoost(grad_, hess_, train.data, root_index);
-
-                for(size_t i = 0; i < cache_.size(); ++i){
-                    this->InteractRePredict(*cache_[i].mat_);
-                }
-            }
-        private:
-            /*! \brief get the transformed predictions, given data */
-            inline void InteractPredict(std::vector<float> &preds, const DMatrix &data){
-                int buffer_offset = this->FindBufferOffset(data);
-                utils::Assert( buffer_offset >=0, "interact mode must cache training data" );
-                preds.resize(data.Size());
-                const unsigned ndata = static_cast<unsigned>(data.Size());
-                #pragma omp parallel for schedule( static )
-                for (unsigned j = 0; j < ndata; ++j){
-                    preds[j] = mparam.base_score + base_gbm.InteractPredict(data.data, j, buffer_offset + j);                    
-                }
-                obj_->PredTransform( preds );
-            }
-            /*! \brief repredict trial */
-            inline void InteractRePredict(const DMatrix &data){
-                int buffer_offset = this->FindBufferOffset(data);
-                utils::Assert( buffer_offset >=0, "interact mode must cache training data" );
-                const unsigned ndata = static_cast<unsigned>(data.Size());
-                #pragma omp parallel for schedule( static )
-                for (unsigned j = 0; j < ndata; ++j){
-                    base_gbm.InteractRePredict(data.data, j, buffer_offset + j);
-                }
-            }
-            /*! \brief get un-transformed prediction*/
-            inline void PredictRaw(std::vector<float> &preds, const DMatrix &data, int bst_group = -1 ){
-                int buffer_offset =  this->FindBufferOffset(data);
-                if( bst_group < 0 ){
-                    int ngroup = base_gbm.NumBoosterGroup();
-                    preds.resize( data.Size() * ngroup );
-                    for( int g = 0; g < ngroup; ++ g ){ 
-                        this->PredictBuffer(&preds[ data.Size() * g ], data, buffer_offset, g );
-                    }
-                }else{
-                    preds.resize( data.Size() );
-                    this->PredictBuffer(&preds[0], data, buffer_offset, bst_group );
-                }
-            }
-            /*! \brief get the un-transformed predictions, given data */
-            inline void PredictBuffer(float *preds, const DMatrix &data, int buffer_offset, int bst_group ){
-                const unsigned ndata = static_cast<unsigned>(data.Size());
-                if( buffer_offset >= 0 ){  
-                    #pragma omp parallel for schedule( static )
-                    for (unsigned j = 0; j < ndata; ++j){
-                        preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j, data.info.GetRoot(j), bst_group );
-
-                    }
-                }else
-                    #pragma omp parallel for schedule( static )
-                    for (unsigned j = 0; j < ndata; ++j){
-                        preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, -1, data.info.GetRoot(j), bst_group );
-                    }{
-                }
-            }
-        private:
-            /*! \brief training parameter for regression */
-            struct ModelParam{
-                /* \brief global bias */
-                float base_score;
-                /* \brief type of loss function */
-                int loss_type;
-                /* \brief number of features  */
-                int num_feature;  
-                /* \brief number of class, if it is multi-class classification  */
-                int num_class; 
-                /*! \brief reserved field */
-                int reserved[15];
-                /*! \brief constructor */
-                ModelParam(void){
-                    base_score = 0.5f;
-                    loss_type = -1;
-                    num_feature = 0;
-                    num_class = 0;
-                    memset(reserved, 0, sizeof(reserved));
-                }
-                /*!
-                 * \brief set parameters from outside
-                 * \param name name of the parameter
-                 * \param val  value of the parameter
-                 */
-                inline void SetParam(const char *name, const char *val){
-                    if (!strcmp("base_score", name))  base_score = (float)atof(val);
-                    if (!strcmp("num_class", name))   num_class = atoi(val);
-                    if (!strcmp("loss_type", name))   loss_type = atoi(val);
-                    if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
-                }
-                /*!
-                * \brief adjust base_score based on loss type and objective function
-                */
-                inline void AdjustBase(const char *obj){
-                    // some tweaks for loss type
-                    if( loss_type == -1 ){
-                        loss_type = 1;
-                        if( !strcmp("reg:linear", obj ) ) loss_type = 0;
-                    }
-                    if (loss_type == 1 || loss_type == 2|| loss_type == 3){
-                        utils::Assert(base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain");
-                        base_score = -logf(1.0f / base_score - 1.0f);
-                    }
-                }
-            };
-        private:
-            struct CacheEntry{
-                const DMatrix *mat_;
-                int   buffer_offset_;
-                size_t num_row_; 
-                CacheEntry(const DMatrix *mat, int buffer_offset, size_t num_row)
-                    :mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row){}
-            };           
-            /*! \brief the entries indicates that we have internal prediction cache */
-            std::vector<CacheEntry> cache_;
-        private:
-            // find internal bufer offset for certain matrix, if not exist, return -1
-            inline int FindBufferOffset(const DMatrix &mat){
-                for(size_t i = 0; i < cache_.size(); ++i){
-                    if( cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this ) {
-                        if( cache_[i].num_row_ == mat.Size() ){                            
-                            return cache_[i].buffer_offset_; 
-                        }else{
-                            fprintf( stderr, "warning: number of rows in input matrix changed as remembered in cachelist, ignore cached results\n" );
-                            fflush( stderr );
-                        }
-                    }
-                }
-                return -1;
-            } 
-        protected:
-            int silent;
-            EvalSet evaluator_;
-            booster::GBMBase base_gbm;
-            ModelParam   mparam;           
-            // objective fnction
-            IObjFunction *obj_;
-            // name of objective function
-            std::string name_obj_;
-            std::vector< std::pair<std::string, std::string> > cfg_;
-        protected:
-            std::vector<float> grad_, hess_, preds_;
-        };
-    }
-};
-
-#endif
diff --git a/regrank/xgboost_regrank_data.h b/regrank/xgboost_regrank_data.h
deleted file mode 100644
index 7c3138089..000000000
--- a/regrank/xgboost_regrank_data.h
+++ /dev/null
@@ -1,260 +0,0 @@
-#ifndef XGBOOST_REGRANK_DATA_H
-#define XGBOOST_REGRANK_DATA_H
-
-/*!
- * \file xgboost_regrank_data.h
- * \brief input data structure for regression, binary classification, and rankning.
- *     Format:
- *        The data should contain each data instance in each line.
- *		  The format of line data is as below:
- *        label <nonzero feature dimension> [feature index:feature value]+
- *     When using rank, an addtional group file with suffix group must be provided, giving the number of instances in each group
- *     When using weighted aware classification(regression), an addtional weight file must be provided, giving the weight of each instance
- * 
- * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
- */
-#include <cstdio>
-#include <vector>
-#include <string>
-#include <cstring>
-#include "../booster/xgboost_data.h"
-#include "../utils/xgboost_utils.h"
-#include "../utils/xgboost_stream.h"
-
-namespace xgboost{
-    /*! \brief namespace to handle regression and rank */
-    namespace regrank{
-        /*! \brief data matrix for regression content */
-        struct DMatrix{
-        public:
-            /*! \brief data information besides the features */
-            struct Info{
-                /*! \brief label of each instance */
-                std::vector<float> labels;
-                /*! \brief the index of begin and end of a groupneeded when the learning task is ranking */
-                std::vector<unsigned> group_ptr;
-                /*! \brief weights of each instance, optional */            
-                std::vector<float> weights;
-                /*! \brief specified root index of each instance, can be used for multi task setting*/
-                std::vector<unsigned> root_index;
-                /*! \brief get weight of each instances */
-                inline float GetWeight( size_t i ) const{
-                    if( weights.size() != 0 ) return weights[i];
-                    else return 1.0f;
-                }
-                inline float GetRoot( size_t i ) const{
-                    if( root_index.size() != 0 ) return static_cast<float>(root_index[i]);
-                    else return 0;
-                }
-            };
-        public:
-            /*! \brief feature data content */
-            booster::FMatrixS data;
-            /*! \brief information fields */
-            Info info;
-            /*! 
-             * \brief cache pointer to verify if the data structure is cached in some learner 
-             * this is a bit ugly, we need to have double check verification, so if one side get deleted, 
-             * and some strange re-allocation gets the same pointer we will still be fine
-             */
-            void *cache_learner_ptr_;
-        public:
-            /*! \brief default constructor */
-            DMatrix(void):cache_learner_ptr_(NULL){}
-            /*! \brief get the number of instances */
-            inline size_t Size() const{
-                return data.NumRow();
-            }           
-            /*!
-             * \brief load from text file
-             * \param fname name of text data
-             * \param silent whether print information or not
-             */
-            inline void LoadText(const char* fname, bool silent = false){
-                data.Clear();
-                FILE* file = utils::FopenCheck(fname, "r");
-                float label; bool init = true;
-                char tmp[1024];
-                std::vector<booster::bst_uint> findex;
-                std::vector<booster::bst_float> fvalue;
-                
-                while (fscanf(file, "%s", tmp) == 1){
-                    unsigned index; float value;
-                    if (sscanf(tmp, "%u:%f", &index, &value) == 2){
-                        findex.push_back(index); fvalue.push_back(value);
-                    }
-                    else{
-                        if (!init){
-                            info.labels.push_back(label);
-                            data.AddRow(findex, fvalue);
-                        }
-                        findex.clear(); fvalue.clear();
-                        utils::Assert(sscanf(tmp, "%f", &label) == 1, "invalid format");
-                        init = false;
-                    }
-                }
-            
-                info.labels.push_back(label);
-                data.AddRow(findex, fvalue);
-                // initialize column support as well
-                data.InitData();
-                
-                if (!silent){
-                    printf("%ux%u matrix with %lu entries is loaded from %s\n",
-                           (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
-                }
-                fclose(file);
-                this->TryLoadGroup(fname, silent);
-                this->TryLoadWeight(fname, silent);
-            }
-            /*!
-             * \brief load from binary file
-             * \param fname name of binary data
-             * \param silent whether print information or not
-             * \return whether loading is success
-             */
-            inline bool LoadBinary(const char* fname, bool silent = false){
-                FILE *fp = fopen64(fname, "rb");
-                if (fp == NULL) return false;
-                utils::FileStream fs(fp);
-                data.LoadBinary(fs);
-                info.labels.resize(data.NumRow());
-                utils::Assert(fs.Read(&info.labels[0], sizeof(float)* data.NumRow()) != 0, "DMatrix LoadBinary");
-                {// load in group ptr
-                    unsigned ngptr;
-                    if( fs.Read(&ngptr, sizeof(unsigned) ) != 0 ){
-                        info.group_ptr.resize( ngptr );
-                        if( ngptr != 0 ){
-                            utils::Assert( fs.Read(&info.group_ptr[0], sizeof(unsigned) * ngptr) != 0, "Load group file");
-                            utils::Assert( info.group_ptr.back() == data.NumRow(), "number of group must match number of record" );
-                        }
-                    }
-                }
-                {// load in weight
-                    unsigned nwt;
-                    if( fs.Read(&nwt, sizeof(unsigned) ) != 0 ){
-                        utils::Assert( nwt == 0 || nwt == data.NumRow(), "invalid weight" );
-                        info.weights.resize( nwt );
-                        if( nwt != 0 ){
-                            utils::Assert( fs.Read(&info.weights[0], sizeof(unsigned) * nwt) != 0, "Load weight file");
-                        }
-                    }
-                }
-                fs.Close();
-                
-                if (!silent){
-                    printf("%ux%u matrix with %lu entries is loaded from %s\n",
-                           (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
-                    if( info.group_ptr.size() != 0 ){
-                        printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1 );
-                    }
-                }
-                return true;
-            }
-            /*!
-             * \brief save to binary file
-             * \param fname name of binary data
-             * \param silent whether print information or not
-             */
-            inline void SaveBinary(const char* fname, bool silent = false){
-                // initialize column support as well
-                data.InitData();
-                
-                utils::FileStream fs(utils::FopenCheck(fname, "wb"));
-                data.SaveBinary(fs);
-                utils::Assert( info.labels.size() == data.NumRow(), "label size is not consistent with feature matrix size" );
-                fs.Write(&info.labels[0], sizeof(float) * data.NumRow());
-                {// write out group ptr
-                    unsigned ngptr = static_cast<unsigned>( info.group_ptr.size() );
-                    fs.Write(&ngptr, sizeof(unsigned) );
-                    if( ngptr != 0 ){
-                        fs.Write(&info.group_ptr[0], sizeof(unsigned) * ngptr);
-                    }
-                }                
-                {// write out weight
-                    unsigned nwt = static_cast<unsigned>( info.weights.size() );
-                    fs.Write( &nwt, sizeof(unsigned) );
-                    if( nwt != 0 ){
-                        fs.Write(&info.weights[0], sizeof(float) * nwt);
-                    }
-                }
-                fs.Close();
-                if (!silent){
-                    printf("%ux%u matrix with %lu entries is saved to %s\n",
-                       (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
-                    if( info.group_ptr.size() != 0 ){
-                        printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1 );
-                    }
-                }
-            }
-            /*!
-             * \brief cache load data given a file name, if filename ends with .buffer, direct load binary
-             *        otherwise the function will first check if fname + '.buffer' exists,
-             *        if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file,
-             *        and try to create a buffer file
-             * \param fname name of binary data
-             * \param silent whether print information or not
-             * \param savebuffer whether do save binary buffer if it is text
-             */
-            inline void CacheLoad(const char *fname, bool silent = false, bool savebuffer = true){
-                int len = strlen(fname);
-                if (len > 8 && !strcmp(fname + len - 7, ".buffer")){
-                    if( !this->LoadBinary(fname, silent) ){
-                        fprintf(stderr,"can not open file \"%s\"", fname);
-                        utils::Error("DMatrix::CacheLoad failed");
-                    }
-                    return;
-                }
-                char bname[1024];
-                sprintf(bname, "%s.buffer", fname);
-                if (!this->LoadBinary(bname, silent)){
-                    this->LoadText(fname, silent);
-                    if (savebuffer) this->SaveBinary(bname, silent);
-                }
-            }
-        private:
-            inline bool TryLoadGroup(const char* fname, bool silent = false){
-                std::string name = fname;
-                if (name.length() > 8 && !strcmp(fname + name.length() - 7, ".buffer")){
-                    name.resize( name.length() - 7 );
-                }
-                name += ".group";
-                //if exists group data load it in
-                FILE *fi = fopen64(name.c_str(), "r");
-                if (fi == NULL) return false;                
-                info.group_ptr.push_back(0);
-                unsigned nline;
-                while (fscanf(fi, "%u", &nline) == 1){
-                    info.group_ptr.push_back(info.group_ptr.back()+nline);
-                }
-                if(!silent){
-                    printf("%lu groups are loaded from %s\n", info.group_ptr.size()-1, name.c_str());
-                }
-                fclose(fi);
-                utils::Assert( info.group_ptr.back() == data.NumRow(), "DMatrix: group data does not match the number of rows in feature matrix" );
-                return true;
-            }
-            inline bool TryLoadWeight(const char* fname, bool silent = false){
-                std::string name = fname;
-                if (name.length() > 8 && !strcmp(fname + name.length() - 7, ".buffer")){
-                    name.resize( name.length() - 7 );
-                }
-                name += ".weight";
-                //if exists group data load it in
-                FILE *fi = fopen64(name.c_str(), "r");
-                if (fi == NULL) return false;                
-                float wt;
-                while (fscanf(fi, "%f", &wt) == 1){
-                    info.weights.push_back( wt );
-                }
-                if(!silent){
-                    printf("loading weight from %s\n", name.c_str());
-                }
-                fclose(fi);
-                utils::Assert( info.weights.size() == data.NumRow(), "DMatrix: weight data does not match the number of rows in feature matrix" );
-                return true;
-            }
-        };
-    };
-};
-#endif
diff --git a/regrank/xgboost_regrank_eval.h b/regrank/xgboost_regrank_eval.h
deleted file mode 100644
index 740b6ec5b..000000000
--- a/regrank/xgboost_regrank_eval.h
+++ /dev/null
@@ -1,375 +0,0 @@
-#ifndef XGBOOST_REGRANK_EVAL_H
-#define XGBOOST_REGRANK_EVAL_H
-/*!
-* \file xgboost_regrank_eval.h
-* \brief evaluation metrics for regression and classification and rank
-* \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
-*/
-
-#include <cmath>
-#include <vector>
-#include <algorithm>
-#include "../utils/xgboost_utils.h"
-#include "../utils/xgboost_omp.h"
-#include "../utils/xgboost_random.h"
-#include "xgboost_regrank_data.h"
-#include "xgboost_regrank_utils.h"
-
-namespace xgboost{
-    namespace regrank{
-        /*! \brief evaluator that evaluates the loss metrics */
-        struct IEvaluator{
-            /*!
-             * \brief evaluate a specific metric
-             * \param preds prediction
-             * \param info information, including label etc.
-             */
-            virtual float Eval(const std::vector<float> &preds,
-            const DMatrix::Info &info) const = 0;
-            /*! \return name of metric */
-            virtual const char *Name(void) const = 0;
-            /*! \brief virtual destructor */
-            virtual ~IEvaluator(void){}
-        };
-
-        /*! \brief RMSE */
-        struct EvalRMSE : public IEvaluator{
-            virtual float Eval(const std::vector<float> &preds,
-                               const DMatrix::Info &info) const {
-                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
-                const unsigned ndata = static_cast<unsigned>(preds.size());
-                float sum = 0.0, wsum = 0.0;
-                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
-                for (unsigned i = 0; i < ndata; ++i){
-                    const float wt = info.GetWeight(i);
-                    const float diff = info.labels[i] - preds[i];
-                    sum += diff*diff * wt;
-                    wsum += wt;
-                }
-                return sqrtf(sum / wsum);
-            }
-            virtual const char *Name(void) const{
-                return "rmse";
-            }
-        };
-
-        /*! \brief Error */
-        struct EvalLogLoss : public IEvaluator{
-            virtual float Eval(const std::vector<float> &preds,
-                               const DMatrix::Info &info) const {
-                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
-                const unsigned ndata = static_cast<unsigned>(preds.size());
-                float sum = 0.0f, wsum = 0.0f;
-                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
-                for (unsigned i = 0; i < ndata; ++i){
-                    const float y = info.labels[i];
-                    const float py = preds[i];
-                    const float wt = info.GetWeight(i);
-                    sum -= wt * (y * std::log(py) + (1.0f - y)*std::log(1 - py));
-                    wsum += wt;
-                }
-                return sum / wsum;
-            }
-            virtual const char *Name(void) const{
-                return "negllik";
-            }
-        };
-
-        /*! \brief Error */
-        struct EvalError : public IEvaluator{
-            virtual float Eval(const std::vector<float> &preds,
-                               const DMatrix::Info &info) const {
-                const unsigned ndata = static_cast<unsigned>(preds.size());
-                float sum = 0.0f, wsum = 0.0f;
-                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
-                for (unsigned i = 0; i < ndata; ++i){
-                    const float wt = info.GetWeight(i);                    
-                    if (preds[i] > 0.5f){
-                        if (info.labels[i] < 0.5f) sum += wt;
-                    }
-                    else{
-                        if (info.labels[i] >= 0.5f) sum += wt;
-                    }
-                    wsum += wt;
-                }
-                return sum / wsum;
-            }
-            virtual const char *Name(void) const{
-                return "error";
-            }
-        };
-
-        /*! \brief AMS: also records best threshold */
-        struct EvalAMS : public IEvaluator{
-        public:
-            EvalAMS(const char *name){
-                name_ = name;
-                // note: ams@0 will automatically select which ratio to go
-                utils::Assert( sscanf(name, "ams@%f", &ratio_ ) == 1, "invalid ams format" );
-            }            
-            virtual float Eval(const std::vector<float> &preds,
-                               const DMatrix::Info &info) const {
-                const unsigned ndata = static_cast<unsigned>(preds.size());
-                utils::Assert( info.weights.size() == ndata, "we need weight to evaluate ams");
-                std::vector< std::pair<float, unsigned> > rec(ndata);
-                
-                #pragma omp parallel for schedule( static )                
-                for (unsigned i = 0; i < ndata; ++i){
-                    rec[i] = std::make_pair( preds[i], i );
-                }
-                std::sort( rec.begin(), rec.end(), CmpFirst );
-                unsigned ntop = static_cast<unsigned>( ratio_ * ndata );
-                if( ntop == 0 ) ntop = ndata;
-                const double br = 10.0;
-                unsigned thresindex = 0;
-                double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
-                for (unsigned i = 0; i < ndata-1 && i < ntop; ++i){
-                    const unsigned ridx = rec[i].second;
-                    const float wt = info.weights[ridx];
-                    if( info.labels[ridx] > 0.5f ){
-                        s_tp += wt;
-                    }else{
-                        b_fp += wt;
-                    }
-                    if( rec[i].first != rec[i+1].first ){
-                        double ams = sqrtf( 2*((s_tp+b_fp+br) * log( 1.0 + s_tp/(b_fp+br) ) - s_tp) );
-                        if( tams < ams ){
-                            thresindex = i;
-                            tams = ams;
-                        }
-                    }
-                }
-                if( ntop == ndata ){
-                    fprintf( stderr, "\tams-ratio=%g", float(thresindex)/ndata );
-                    return tams;
-                }else{
-                    return sqrtf( 2*((s_tp+b_fp+br) * log( 1.0 + s_tp/(b_fp+br) ) - s_tp) );
-                }
-            }
-            virtual const char *Name(void) const{
-                return name_.c_str();
-            }
-        private:
-            std::string name_;
-            float ratio_;
-        };
-
-        /*! \brief Error for multi-class classification, need exact match */
-        struct EvalMatchError : public IEvaluator{
-        public:
-            virtual float Eval(const std::vector<float> &preds,
-                               const DMatrix::Info &info) const {
-                const unsigned ndata = static_cast<unsigned>(preds.size());
-                float sum = 0.0f, wsum = 0.0f;
-                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
-                for (unsigned i = 0; i < ndata; ++i){
-                    const float wt = info.GetWeight(i);
-                    int label = static_cast<int>(info.labels[i]);
-                    if (static_cast<int>(preds[i]) != label ) sum += wt;
-                    wsum += wt;
-                }
-                return sum / wsum;
-            }
-            virtual const char *Name(void) const{
-                return "merror";
-            }
-        };
-
-        /*! \brief Area under curve, for both classification and rank */
-        struct EvalAuc : public IEvaluator{
-            virtual float Eval(const std::vector<float> &preds,
-                               const DMatrix::Info &info) const {
-                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
-                std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
-                const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
-                utils::Assert(gptr.back() == preds.size(), "EvalAuc: group structure must match number of prediction");
-                const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
-
-                double sum_auc = 0.0f;
-                #pragma omp parallel reduction(+:sum_auc) 
-                {
-                    // each thread takes a local rec
-                    std::vector< std::pair<float, unsigned> > rec;
-                    #pragma omp for schedule(static) 
-                    for (unsigned k = 0; k < ngroup; ++k){
-                        rec.clear();
-                        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j){
-                            rec.push_back(std::make_pair(preds[j], j));
-                        }
-                        std::sort(rec.begin(), rec.end(), CmpFirst);
-                        // calculate AUC
-                        double sum_pospair = 0.0;
-                        double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
-                        for (size_t j = 0; j < rec.size(); ++j){
-                            const float wt = info.GetWeight(rec[j].second);
-                            const float ctr = info.labels[rec[j].second];
-                            // keep bucketing predictions in same bucket
-                            if (j != 0 && rec[j].first != rec[j - 1].first){
-                                sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
-                                sum_npos += buf_pos; sum_nneg += buf_neg;
-                                buf_neg = buf_pos = 0.0f;
-                            }
-                            buf_pos += ctr * wt; buf_neg += (1.0f - ctr) * wt;
-                        }
-                        sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
-                        sum_npos += buf_pos; sum_nneg += buf_neg;
-                        // 
-                        utils::Assert(sum_npos > 0.0 && sum_nneg > 0.0, "the dataset only contains pos or neg samples");
-                        // this is the AUC
-                        sum_auc += sum_pospair / (sum_npos*sum_nneg);
-                    }
-                }
-                // return average AUC over list
-                return static_cast<float>(sum_auc) / ngroup;
-            }
-            virtual const char *Name(void) const{
-                return "auc";
-            }
-        };
-
-        /*! \brief Evaluate rank list */          
-        struct EvalRankList : public IEvaluator{
-        public:
-            virtual float Eval(const std::vector<float> &preds,
-                               const DMatrix::Info &info) const {
-                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
-                const std::vector<unsigned> &gptr = info.group_ptr;
-                utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
-                utils::Assert( gptr.back() == preds.size(), "EvalRanklist: group structure must match number of prediction");
-                const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
-
-                double sum_metric = 0.0f;
-                #pragma omp parallel reduction(+:sum_metric) 
-                {
-                    // each thread takes a local rec
-                    std::vector< std::pair<float, unsigned> > rec;
-                    #pragma omp for schedule(static) 
-                    for (unsigned k = 0; k < ngroup; ++k){
-                        rec.clear();
-                        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j){
-                            rec.push_back(std::make_pair(preds[j], (int)info.labels[j]));
-                        }
-                        sum_metric += this->EvalMetric( rec );                        
-                    }
-                }
-                return static_cast<float>(sum_metric) / ngroup;
-            }
-            virtual const char *Name(void) const{
-                return name_.c_str();
-            }
-        protected:
-            EvalRankList(const char *name){
-                name_ = name;
-                if( sscanf(name, "%*[^@]@%u", &topn_) != 1 ){
-                    topn_ = UINT_MAX;
-                }
-            }
-            /*! \return evaluation metric, given the pair_sort record, (pred,label) */
-            virtual float EvalMetric( std::vector< std::pair<float, unsigned> > &pair_sort ) const = 0;
-        protected:
-            unsigned topn_;
-            std::string name_;
-        };
-        
-        /*! \brief Precison at N, for both classification and rank */
-        struct EvalPrecision : public EvalRankList{
-        public:
-            EvalPrecision(const char *name):EvalRankList(name){}
-        protected:
-            virtual float EvalMetric( std::vector< std::pair<float, unsigned> > &rec ) const {
-                // calculate Preicsion
-                std::sort(rec.begin(), rec.end(), CmpFirst);
-                unsigned nhit = 0;
-                for (size_t j = 0; j < rec.size() && j < this->topn_; ++j){
-                    nhit += (rec[j].second != 0 );
-                }
-                return static_cast<float>( nhit ) / topn_;
-            }
-        };
-
-
-        /*! \brief NDCG */
-        struct EvalNDCG : public EvalRankList{
-        public:
-            EvalNDCG(const char *name):EvalRankList(name){}
-        protected:
-            inline float CalcDCG( const std::vector< std::pair<float,unsigned> > &rec ) const {
-                double sumdcg = 0.0;
-                for( size_t i = 0; i < rec.size() && i < this->topn_; i ++ ){
-                    const unsigned rel = rec[i].second;
-                    if( rel != 0 ){ 
-                        sumdcg += logf(2.0f) * ((1<<rel)-1) / logf( i + 2 );
-                    }
-                }
-                return static_cast<float>(sumdcg);
-            }
-            virtual float EvalMetric( std::vector< std::pair<float, unsigned> > &rec ) const {
-                std::sort(rec.begin(), rec.end(), CmpSecond);
-                float idcg = this->CalcDCG(rec);
-                std::sort(rec.begin(), rec.end(), CmpFirst);
-                float dcg = this->CalcDCG(rec);
-                if( idcg == 0.0f ) return 0.0f;
-                else return dcg/idcg;
-            }
-        };
-
-        /*! \brief Precison at N, for both classification and rank */
-        struct EvalMAP : public EvalRankList{
-        public:
-            EvalMAP(const char *name):EvalRankList(name){}
-        protected:
-            virtual float EvalMetric( std::vector< std::pair<float, unsigned> > &rec ) const {
-                std::sort(rec.begin(), rec.end(), CmpFirst);
-                unsigned nhits = 0;
-                double sumap = 0.0;
-                for( size_t i = 0; i < rec.size(); ++i){
-                    if( rec[i].second != 0 ){
-                        nhits += 1;
-                        if( i < this->topn_ ){
-                            sumap += static_cast<float>(nhits) / (i+1);
-                        }
-                    }
-                }
-                if (nhits != 0) sumap /= nhits;
-                return static_cast<float>(sumap);                
-            }
-        };
-    };
-
-    namespace regrank{
-        /*! \brief a set of evaluators */
-        struct EvalSet{
-        public:
-            inline void AddEval(const char *name){
-                for (size_t i = 0; i < evals_.size(); ++i){
-                    if (!strcmp(name, evals_[i]->Name())) return;
-                }
-                if (!strcmp(name, "rmse"))    evals_.push_back(new EvalRMSE());
-                if (!strcmp(name, "error"))   evals_.push_back(new EvalError());
-                if (!strcmp(name, "merror"))   evals_.push_back(new EvalMatchError());
-                if (!strcmp(name, "logloss")) evals_.push_back(new EvalLogLoss());
-                if (!strcmp(name, "auc"))    evals_.push_back(new EvalAuc());
-                if (!strncmp(name, "ams@",4))  evals_.push_back(new EvalAMS(name));
-                if (!strncmp(name, "pre@", 4)) evals_.push_back(new EvalPrecision(name));
-                if (!strncmp(name, "map", 3))   evals_.push_back(new EvalMAP(name));
-                if (!strncmp(name, "ndcg", 3))  evals_.push_back(new EvalNDCG(name));
-            }
-            ~EvalSet(){
-                for (size_t i = 0; i < evals_.size(); ++i){
-                    delete evals_[i];
-                }
-            }
-            inline void Eval(FILE *fo, const char *evname,
-                const std::vector<float> &preds,
-                const DMatrix::Info &info) const{
-                for (size_t i = 0; i < evals_.size(); ++i){
-                    float res = evals_[i]->Eval(preds, info);
-                    fprintf(fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res);
-                }
-            }
-        private:
-            std::vector<const IEvaluator*> evals_;
-        };
-    };
-};
-#endif
diff --git a/regrank/xgboost_regrank_main.cpp b/regrank/xgboost_regrank_main.cpp
deleted file mode 100644
index 4e8a59564..000000000
--- a/regrank/xgboost_regrank_main.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-#define _CRT_SECURE_NO_WARNINGS
-#define _CRT_SECURE_NO_DEPRECATE
-
-#include <ctime>
-#include <string>
-#include <cstring>
-#include "xgboost_regrank.h"
-#include "../utils/xgboost_fmap.h"
-#include "../utils/xgboost_random.h"
-#include "../utils/xgboost_config.h"
-
-namespace xgboost{
-    namespace regrank{
-        /*!
-        * \brief wrapping the training process of the gradient boosting regression model,
-        *   given the configuation
-        * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.chen@gmail.com
-        */
-        class RegBoostTask{
-        public:
-            inline int Run(int argc, char *argv[]){
-                if (argc < 2){
-                    printf("Usage: <config>\n");
-                    return 0;
-                }
-                utils::ConfigIterator itr(argv[1]);
-                while (itr.Next()){
-                    this->SetParam(itr.name(), itr.val());
-                }
-                for (int i = 2; i < argc; i++){
-                    char name[256], val[256];
-                    if (sscanf(argv[i], "%[^=]=%s", name, val) == 2){
-                        this->SetParam(name, val);
-                    }
-                }
-                this->InitData();
-                this->InitLearner();
-                if (task == "dump"){
-                    this->TaskDump();
-                    return 0;
-                }
-                if (task == "interact"){
-                    this->TaskInteractive(); return 0;
-                }
-                if (task == "dumppath"){
-                    this->TaskDumpPath(); return 0;
-                }
-                if (task == "eval"){
-                    this->TaskEval(); return 0;
-                }
-                if (task == "pred"){
-                    this->TaskPred();
-                }
-                else{
-                    this->TaskTrain();
-                }
-                return 0;
-            }
-            inline void SetParam(const char *name, const char *val){
-                if (!strcmp("silent", name))       silent = atoi(val);
-                if (!strcmp("use_buffer", name))   use_buffer = atoi(val);
-                if (!strcmp("seed", name))         random::Seed(atoi(val));
-                if (!strcmp("num_round", name))    num_round = atoi(val);
-                if (!strcmp("save_period", name))  save_period = atoi(val);
-                if (!strcmp("eval_train", name))   eval_train = atoi(val);
-                if (!strcmp("task", name))         task = val;
-                if (!strcmp("data", name))        train_path = val;
-                if (!strcmp("test:data", name))   test_path = val;
-                if (!strcmp("model_in", name))    model_in = val;
-                if (!strcmp("model_out", name))   model_out = val;
-                if (!strcmp("model_dir", name))    model_dir_path = val;
-                if (!strcmp("fmap", name))        name_fmap = val;
-                if (!strcmp("name_dump", name))   name_dump = val;
-                if (!strcmp("name_dumppath", name))   name_dumppath = val;
-                if (!strcmp("name_pred", name))   name_pred = val;
-                if (!strcmp("dump_stats", name))   dump_model_stats = atoi(val);
-                if (!strcmp("interact:action", name))  interact_action = val;
-                if (!strncmp("batch:", name, 6)){
-                    cfg_batch.PushBack(name + 6, val);
-                }
-                if (!strncmp("eval[", name, 5)) {
-                    char evname[256];
-                    utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display");
-                    eval_data_names.push_back(std::string(evname));
-                    eval_data_paths.push_back(std::string(val));
-                }
-                cfg.PushBack(name, val);
-            }
-        public:
-            RegBoostTask(void){
-                // default parameters
-                silent = 0;
-                use_buffer = 1;
-                num_round = 10;
-                save_period = 0;
-                eval_train = 0;
-                dump_model_stats = 0;
-                task = "train";
-                model_in = "NULL";
-                model_out = "NULL";
-                name_fmap = "NULL";
-                name_pred = "pred.txt";
-                name_dump = "dump.txt";
-                name_dumppath = "dump.path.txt";
-                model_dir_path = "./";
-                interact_action = "update";
-            }
-            ~RegBoostTask(void){
-                for (size_t i = 0; i < deval.size(); i++){
-                    delete deval[i];
-                }
-            }
-        private:
-            inline void InitData(void){
-                if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
-                if (task == "dump") return;
-                if (task == "pred" || task == "dumppath"){
-                    data.CacheLoad(test_path.c_str(), silent != 0, use_buffer != 0);
-                }
-                else{
-                    // training 
-                    data.CacheLoad(train_path.c_str(), silent != 0, use_buffer != 0);
-                    utils::Assert(eval_data_names.size() == eval_data_paths.size());
-                    for (size_t i = 0; i < eval_data_names.size(); ++i){
-                        deval.push_back(new DMatrix());
-                        deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
-                        devalall.push_back(deval.back());
-                    }
-                    std::vector<DMatrix *> dcache(1, &data);
-                    for( size_t i = 0; i < deval.size(); ++ i){
-                        dcache.push_back( deval[i] );
-                    }
-                    // set cache data to be all training and evaluation data
-                    learner.SetCacheData(dcache);
-
-                    // add training set to evaluation set if needed
-                    if( eval_train != 0 ){
-                        devalall.push_back( &data );
-                        eval_data_names.push_back( std::string("train") );
-                    }
-
-                }
-            }
-            inline void InitLearner(void){
-                cfg.BeforeFirst();
-                while (cfg.Next()){
-                    learner.SetParam(cfg.name(), cfg.val());
-                }
-                if (model_in != "NULL"){
-                    utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
-                    learner.LoadModel(fi);
-                    fi.Close();
-                }
-                else{
-                    utils::Assert(task == "train", "model_in not specified");
-                    learner.InitModel();
-                }
-                learner.InitTrainer();
-            }
-            inline void TaskTrain(void){
-                const time_t start = time(NULL);
-                unsigned long elapsed = 0;
-                for (int i = 0; i < num_round; ++i){
-                    elapsed = (unsigned long)(time(NULL) - start);
-                    if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
-                    learner.UpdateOneIter(data);                    
-                    learner.EvalOneIter(i, devalall, eval_data_names);
-                    if (save_period != 0 && (i + 1) % save_period == 0){
-                        this->SaveModel(i);
-                    }
-                    elapsed = (unsigned long)(time(NULL) - start);
-                }
-                // always save final round
-                if ((save_period == 0 || num_round % save_period != 0) && model_out != "NONE"){                    
-                    if (model_out == "NULL"){
-                        this->SaveModel(num_round - 1);
-                    }
-                    else{
-                        this->SaveModel(model_out.c_str());
-                    }
-                }
-                if (!silent){
-                    printf("\nupdating end, %lu sec in all\n", elapsed);
-                }
-            }
-            inline void TaskEval(void){
-                learner.EvalOneIter(0, devalall, eval_data_names);
-            }
-            inline void TaskInteractive(void){
-                const time_t start = time(NULL);
-                unsigned long elapsed = 0;
-                int batch_action = 0;
-
-                cfg_batch.BeforeFirst();
-                while (cfg_batch.Next()){
-                    if (!strcmp(cfg_batch.name(), "run")){
-                        learner.UpdateInteract(interact_action, data);
-                        batch_action += 1;
-                    }
-                    else{
-                        learner.SetParam(cfg_batch.name(), cfg_batch.val());
-                    }
-                }
-
-                if (batch_action == 0){
-                    learner.UpdateInteract(interact_action, data);
-                }
-                utils::Assert(model_out != "NULL", "interactive mode must specify model_out");
-                this->SaveModel(model_out.c_str());
-                elapsed = (unsigned long)(time(NULL) - start);
-
-                if (!silent){
-                    printf("\ninteractive update, %d batch actions, %lu sec in all\n", batch_action, elapsed);
-                }
-            }
-
-            inline void TaskDump(void){
-                FILE *fo = utils::FopenCheck(name_dump.c_str(), "w");
-                learner.DumpModel(fo, fmap, dump_model_stats != 0);
-                fclose(fo);
-            }
-            inline void TaskDumpPath(void){
-                FILE *fo = utils::FopenCheck(name_dumppath.c_str(), "w");
-                learner.DumpPath(fo, data);
-                fclose(fo);
-            }
-            inline void SaveModel(const char *fname) const{
-                utils::FileStream fo(utils::FopenCheck(fname, "wb"));
-                learner.SaveModel(fo);
-                fo.Close();
-            }
-            inline void SaveModel(int i) const{
-                char fname[256];
-                sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1);
-                this->SaveModel(fname);
-            }
-            inline void TaskPred(void){
-                std::vector<float> preds;
-                if (!silent) printf("start prediction...\n");
-                learner.Predict(preds, data);
-                if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
-                FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
-                for (size_t i = 0; i < preds.size(); i++){
-                    fprintf(fo, "%f\n", preds[i]);
-                }
-                fclose(fo);
-            }
-        private:
-            /* \brief whether silent */
-            int silent;
-            /* \brief whether use auto binary buffer */
-            int use_buffer;
-            /* \brief whether evaluate training statistics */            
-            int eval_train;
-            /* \brief number of boosting iterations */
-            int num_round;
-            /* \brief the period to save the model, 0 means only save the final round model */
-            int save_period;
-            /*! \brief interfact action */
-            std::string interact_action;
-            /* \brief the path of training/test data set */
-            std::string train_path, test_path;
-            /* \brief the path of test model file, or file to restart training */
-            std::string model_in;
-            /* \brief the path of final model file, to be saved */
-            std::string model_out;
-            /* \brief the path of directory containing the saved models */
-            std::string model_dir_path;
-            /* \brief task to perform */
-            std::string task;
-            /* \brief name of predict file */
-            std::string name_pred;
-            /* \brief whether dump statistics along with model */
-            int dump_model_stats;
-            /* \brief name of feature map */
-            std::string name_fmap;
-            /* \brief name of dump file */
-            std::string name_dump;
-            /* \brief name of dump path file */
-            std::string name_dumppath;
-            /* \brief the paths of validation data sets */
-            std::vector<std::string> eval_data_paths;
-            /* \brief the names of the evaluation data used in output log */
-            std::vector<std::string> eval_data_names;
-            /*! \brief saves configurations */
-            utils::ConfigSaver cfg;
-            /*! \brief batch configurations */
-            utils::ConfigSaver cfg_batch;
-        private:
-            DMatrix data;
-            std::vector<DMatrix*> deval;
-            std::vector<const DMatrix*> devalall;
-            utils::FeatMap fmap;
-            RegRankBoostLearner learner;
-        };
-    };
-};
-
-int main( int argc, char *argv[] ){
-  xgboost::random::Seed( 0 );
-  xgboost::regrank::RegBoostTask tsk;
-  return tsk.Run( argc, argv );
-}
diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h
deleted file mode 100644
index 09b447a15..000000000
--- a/regrank/xgboost_regrank_obj.h
+++ /dev/null
@@ -1,131 +0,0 @@
-#ifndef XGBOOST_REGRANK_OBJ_H
-#define XGBOOST_REGRANK_OBJ_H
-/*!
- * \file xgboost_regrank_obj.h
- * \brief defines objective function interface used in xgboost for regression and rank
- * \author Tianqi Chen, Kailong Chen
- */
-#include "xgboost_regrank_data.h"
-
-namespace xgboost{
-    namespace regrank{
-        /*! \brief interface of objective function */
-        class IObjFunction{
-        public:
-            /*! \brief virtual destructor */
-            virtual ~IObjFunction(void){}
-            /*!
-             * \brief set parameters from outside
-             * \param name name of the parameter
-             * \param val  value of the parameter
-             */
-            virtual void SetParam(const char *name, const char *val) = 0;
-            
-            /*! 
-             * \brief get gradient over each of predictions, given existing information
-             * \param preds prediction of current round             
-             * \param info information about labels, weights, groups in rank
-             * \param iter current iteration number 
-             * \param grad gradient over each preds
-             * \param hess second order gradient over each preds
-             */
-            virtual void GetGradient(const std::vector<float>& preds,  
-                                     const DMatrix::Info &info,
-                                     int iter,
-                                     std::vector<float> &grad, 
-                                     std::vector<float> &hess ) = 0;
-            /*! \return the default evaluation metric for the problem */
-            virtual const char* DefaultEvalMetric(void) = 0;
-            /*! 
-             * \brief transform prediction values, this is only called when Prediction is called
-             * \param preds prediction values, saves to this vector as well
-             */
-            virtual void PredTransform(std::vector<float> &preds){}
-            /*! 
-             * \brief transform prediction values, this is only called when Eval is called, usually it redirect to PredTransform
-             * \param preds prediction values, saves to this vector as well
-             */
-            virtual void EvalTransform(std::vector<float> &preds){ this->PredTransform(preds); }
-        };
-    };
-    
-    namespace regrank{
-        /*! \brief defines functions to calculate some commonly used functions */
-        struct LossType{
-        public:
-            const static int kLinearSquare = 0;
-            const static int kLogisticNeglik = 1;
-            const static int kLogisticClassify = 2;
-            const static int kLogisticRaw = 3;
-        public:
-            /*! \brief indicate which type we are using */
-            int loss_type;
-        public:
-            /*!
-             * \brief transform the linear sum to prediction
-             * \param x linear sum of boosting ensemble
-             * \return transformed prediction
-             */
-            inline float PredTransform(float x){
-                switch (loss_type){
-                case kLogisticRaw: 
-                case kLinearSquare: return x;
-                case kLogisticClassify:
-                case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
-                default: utils::Error("unknown loss_type"); return 0.0f;
-                }
-            }
-            
-            /*!
-             * \brief calculate first order gradient of loss, given transformed prediction
-             * \param predt transformed prediction
-             * \param label true label
-             * \return first order gradient
-             */
-            inline float FirstOrderGradient(float predt, float label) const{
-                switch (loss_type){
-                case kLinearSquare: return predt - label;
-                case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
-                case kLogisticClassify:
-                case kLogisticNeglik: return predt - label;
-                default: utils::Error("unknown loss_type"); return 0.0f;
-                }
-            }
-            /*!
-             * \brief calculate second order gradient of loss, given transformed prediction
-             * \param predt transformed prediction
-             * \param label true label
-             * \return second order gradient
-             */
-            inline float SecondOrderGradient(float predt, float label) const{
-                switch (loss_type){
-                case kLinearSquare: return 1.0f;
-                case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
-                case kLogisticClassify:
-                case kLogisticNeglik: return predt * (1 - predt);
-                default: utils::Error("unknown loss_type"); return 0.0f;
-                }
-            }
-        };
-    };
-};
-
-#include "xgboost_regrank_obj.hpp"
-
-namespace xgboost{
-    namespace regrank{        
-       inline IObjFunction* CreateObjFunction( const char *name ){
-           if( !strcmp("reg:linear", name ) )     return new RegressionObj( LossType::kLinearSquare );
-           if( !strcmp("reg:logistic", name ) )    return new RegressionObj( LossType::kLogisticNeglik );
-           if( !strcmp("binary:logistic", name ) ) return new RegressionObj( LossType::kLogisticClassify );
-           if( !strcmp("binary:logitraw", name ) ) return new RegressionObj( LossType::kLogisticRaw );
-           if( !strcmp("multi:softmax", name ) )   return new SoftmaxMultiClassObj(0);
-           if( !strcmp("multi:softprob", name ) )   return new SoftmaxMultiClassObj(1);
-           if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
-           if( !strcmp("rank:softmax", name ) )  return new SoftmaxRankObj();
-           utils::Error("unknown objective function type");
-           return NULL;
-       }
-    };
-};
-#endif
diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp
deleted file mode 100644
index b73c03c0c..000000000
--- a/regrank/xgboost_regrank_obj.hpp
+++ /dev/null
@@ -1,353 +0,0 @@
-#ifndef XGBOOST_REGRANK_OBJ_HPP
-#define XGBOOST_REGRANK_OBJ_HPP
-/*!
- * \file xgboost_regrank_obj.hpp
- * \brief implementation of objective functions
- * \author Tianqi Chen, Kailong Chen
- */
-//#include "xgboost_regrank_sample.h"
-#include <vector>
-#include <functional>
-#include "xgboost_regrank_utils.h"
-
-namespace xgboost{
-    namespace regrank{        
-        class RegressionObj : public IObjFunction{
-        public:
-            RegressionObj( int loss_type ){
-                loss.loss_type = loss_type;
-                scale_pos_weight = 1.0f;
-            }
-            virtual ~RegressionObj(){}
-            virtual void SetParam(const char *name, const char *val){
-                if( !strcmp( "loss_type", name ) ) loss.loss_type = atoi( val );
-                if( !strcmp( "scale_pos_weight", name ) ) scale_pos_weight = (float)atof( val );
-            }
-            virtual void GetGradient(const std::vector<float>& preds,  
-                                     const DMatrix::Info &info,
-                                     int iter,
-                                     std::vector<float> &grad, 
-                                     std::vector<float> &hess ) {
-                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
-                grad.resize(preds.size()); hess.resize(preds.size());
-
-                const unsigned ndata = static_cast<unsigned>(preds.size());
-                #pragma omp parallel for schedule( static )
-                for (unsigned j = 0; j < ndata; ++j){
-                    float p = loss.PredTransform(preds[j]);
-                    float w = info.GetWeight(j);
-                    if( info.labels[j] == 1.0f ) w *= scale_pos_weight;
-                    grad[j] = loss.FirstOrderGradient(p, info.labels[j]) * w;
-                    hess[j] = loss.SecondOrderGradient(p, info.labels[j]) * w;
-                }
-            }
-            virtual const char* DefaultEvalMetric(void) {
-                if( loss.loss_type == LossType::kLogisticClassify ) return "error";
-                if( loss.loss_type == LossType::kLogisticRaw ) return "auc";
-                return "rmse";
-            }
-            virtual void PredTransform(std::vector<float> &preds){
-                const unsigned ndata = static_cast<unsigned>(preds.size());
-                #pragma omp parallel for schedule( static )
-                for (unsigned j = 0; j < ndata; ++j){
-                    preds[j] = loss.PredTransform( preds[j] );
-                }
-            }
-        private:
-            float scale_pos_weight;
-            LossType loss;
-        };
-    };
-
-    namespace regrank{
-        // simple softmax rak
-        class SoftmaxRankObj : public IObjFunction{
-        public:
-            SoftmaxRankObj(void){
-            }
-            virtual ~SoftmaxRankObj(){}
-            virtual void SetParam(const char *name, const char *val){
-            }
-            virtual void GetGradient(const std::vector<float>& preds,  
-                                     const DMatrix::Info &info,
-                                     int iter,
-                                     std::vector<float> &grad, 
-                                     std::vector<float> &hess ) {
-                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
-                grad.resize(preds.size()); hess.resize(preds.size());
-                const std::vector<unsigned> &gptr = info.group_ptr;
-                utils::Assert( gptr.size() != 0 && gptr.back() == preds.size(), "rank loss must have group file" );
-                const unsigned ngroup = static_cast<unsigned>( gptr.size() - 1 );
-
-                #pragma omp parallel
-                {
-                    std::vector< float > rec;                    
-                    #pragma omp for schedule(static)
-                    for (unsigned k = 0; k < ngroup; ++k){
-                        rec.clear();
-                        int nhit = 0;
-                        for(unsigned j = gptr[k]; j < gptr[k+1]; ++j ){
-                            rec.push_back( preds[j] );
-                            grad[j] = hess[j] = 0.0f;
-                            nhit += info.labels[j];
-                        }
-                        Softmax( rec );
-                        if( nhit == 1 ){
-                            for(unsigned j = gptr[k]; j < gptr[k+1]; ++j ){
-                                float p = rec[ j - gptr[k] ];
-                                grad[j] = p - info.labels[j];
-                                hess[j] = 2.0f * p * ( 1.0f - p );
-                            }  
-                        }else{
-                            utils::Assert( nhit == 0, "softmax does not allow multiple labels" );
-                        }
-                    }
-                }
-            }
-            virtual const char* DefaultEvalMetric(void) {
-                return "pre@1";
-            }
-        };
-
-        // simple softmax multi-class classification
-        class SoftmaxMultiClassObj : public IObjFunction{
-        public:
-            SoftmaxMultiClassObj(int output_prob):output_prob(output_prob){
-                nclass = 0;
-            }
-            virtual ~SoftmaxMultiClassObj(){}
-            virtual void SetParam(const char *name, const char *val){
-                if( !strcmp( "num_class", name ) ) nclass = atoi(val); 
-            }
-            virtual void GetGradient(const std::vector<float>& preds,  
-                                     const DMatrix::Info &info,
-                                     int iter,
-                                     std::vector<float> &grad, 
-                                     std::vector<float> &hess ) {
-                utils::Assert( nclass != 0, "must set num_class to use softmax" );
-                utils::Assert( preds.size() == (size_t)nclass * info.labels.size(), "SoftmaxMultiClassObj: label size and pred size does not match" );
-                grad.resize(preds.size()); hess.resize(preds.size());
-                
-                const unsigned ndata = static_cast<unsigned>(info.labels.size());
-                #pragma omp parallel
-                {
-                    std::vector<float> rec(nclass);
-                    #pragma omp for schedule(static)
-                    for (unsigned j = 0; j < ndata; ++j){
-                        for( int k = 0; k < nclass; ++ k ){
-                            rec[k] = preds[j + k * ndata];
-                        }
-                        Softmax( rec );
-                        int label = static_cast<int>(info.labels[j]);
-                        if( label < 0 ){
-                            label = -label - 1;
-                        }
-                        utils::Assert( label < nclass, "SoftmaxMultiClassObj: label exceed num_class" );
-                        for( int k = 0; k < nclass; ++ k ){
-                            float p = rec[ k ];
-                            if( label == k ){
-                                grad[j+k*ndata] = p - 1.0f;
-                            }else{
-                                grad[j+k*ndata] = p;
-                            }
-                            hess[j+k*ndata] = 2.0f * p * ( 1.0f - p );
-                        }  
-                    }
-                }
-            }
-            virtual void PredTransform(std::vector<float> &preds){
-                this->Transform(preds, output_prob);
-            }
-            virtual void EvalTransform(std::vector<float> &preds){
-                this->Transform(preds, 0);
-            }
-        private:
-            inline void Transform(std::vector<float> &preds, int prob){
-                utils::Assert( nclass != 0, "must set num_class to use softmax" );
-                utils::Assert( preds.size() % nclass == 0, "SoftmaxMultiClassObj: label size and pred size does not match" );                
-                const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
-                
-                #pragma omp parallel
-                {
-                    std::vector<float> rec(nclass);
-                    #pragma omp for schedule(static)
-                    for (unsigned j = 0; j < ndata; ++j){
-                        for( int k = 0; k < nclass; ++ k ){
-                            rec[k] = preds[j + k * ndata];
-                        }
-                        if( prob == 0 ){
-                            preds[j] = FindMaxIndex( rec );
-                        }else{
-                            Softmax( rec );
-                            for( int k = 0; k < nclass; ++ k ){
-                                preds[j + k * ndata] = rec[k];
-                            }
-                        }
-                    }
-                }
-                if( prob == 0 ){
-                    preds.resize( ndata );
-                }
-            }
-            virtual const char* DefaultEvalMetric(void) {
-                return "merror";
-            }
-        private:
-            int nclass;
-            int output_prob;
-        };
-    };
-
-
-    namespace regrank{
-        /*! \brief objective for lambda rank */
-        class LambdaRankObj : public IObjFunction{
-        public:
-            LambdaRankObj(void){
-                loss.loss_type = LossType::kLogisticRaw;
-                fix_list_weight = 0.0f;
-                num_pairsample = 1;
-            }
-            virtual ~LambdaRankObj(){}
-            virtual void SetParam(const char *name, const char *val){
-                if( !strcmp( "loss_type", name ) )       loss.loss_type = atoi( val );
-                if( !strcmp( "fix_list_weight", name ) ) fix_list_weight = (float)atof( val );
-                if( !strcmp( "num_pairsample", name ) )  num_pairsample = atoi( val );
-            }
-        public:
-            virtual void GetGradient(const std::vector<float>& preds,  
-                                     const DMatrix::Info &info,
-                                     int iter,
-                                     std::vector<float> &grad, 
-                                     std::vector<float> &hess ) {
-                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );              
-                grad.resize(preds.size()); hess.resize(preds.size());
-                const std::vector<unsigned> &gptr = info.group_ptr;
-                utils::Assert( gptr.size() != 0 && gptr.back() == preds.size(), "rank loss must have group file" );
-                const unsigned ngroup = static_cast<unsigned>( gptr.size() - 1 );
-
-                #pragma omp parallel
-                {
-                    // parall construct, declare random number generator here, so that each 
-                    // thread use its own random number generator, seed by thread id and current iteration
-                    random::Random rnd; rnd.Seed( iter * 1111 + omp_get_thread_num() );
-                    std::vector<LambdaPair> pairs;
-                    std::vector<ListEntry>  lst;
-                    std::vector< std::pair<float,unsigned> > rec;
-                    
-                    #pragma omp for schedule(static)
-                    for (unsigned k = 0; k < ngroup; ++k){
-                        lst.clear(); pairs.clear(); 
-                        for(unsigned j = gptr[k]; j < gptr[k+1]; ++j ){
-                            lst.push_back( ListEntry(preds[j], info.labels[j], j ) );
-                            grad[j] = hess[j] = 0.0f;
-                        }                        
-                        std::sort( lst.begin(), lst.end(), ListEntry::CmpPred );
-                        rec.resize( lst.size() );
-                        for( unsigned i = 0; i < lst.size(); ++i ){
-                            rec[i] = std::make_pair( lst[i].label, i );
-                        }
-                        std::sort( rec.begin(), rec.end(), CmpFirst );
-                        // enumerate buckets with same label, for each item in the lst, grab another sample randomly
-                        for( unsigned i = 0; i < rec.size(); ){
-                            unsigned j = i + 1;
-                            while( j < rec.size() && rec[j].first == rec[i].first ) ++ j;
-                            // bucket in [i,j), get a sample outside bucket
-                            unsigned nleft = i, nright = rec.size() - j;
-                            if( nleft + nright != 0 ){
-                                int nsample = num_pairsample;
-                                while( nsample -- ){
-                                    for( unsigned pid = i; pid < j; ++ pid ){
-                                        unsigned ridx = static_cast<unsigned>( rnd.RandDouble() * (nleft+nright) );
-                                        if( ridx < nleft ){
-                                            pairs.push_back( LambdaPair( rec[ridx].second, rec[pid].second ) );
-                                        }else{
-                                            pairs.push_back( LambdaPair( rec[pid].second, rec[ridx+j-i].second ) );
-                                        }
-                                    }      
-                                }
-                            }
-                            i = j;
-                        }
-                        // get lambda weight for the pairs
-                        this->GetLambdaWeight( lst, pairs );
-                        // rescale each gradient and hessian so that the lst have constant weighted
-                        float scale = 1.0f / num_pairsample;
-                        if( fix_list_weight != 0.0f ){
-                            scale *= fix_list_weight / (gptr[k+1] - gptr[k]);
-                        }
-                        for( size_t i = 0; i < pairs.size(); ++ i ){
-                            const ListEntry &pos = lst[ pairs[i].pos_index ];
-                            const ListEntry &neg = lst[ pairs[i].neg_index ];
-                            const float w = pairs[i].weight * scale;
-                            float p = loss.PredTransform( pos.pred - neg.pred );
-                            float g = loss.FirstOrderGradient( p, 1.0f );
-                            float h = loss.SecondOrderGradient( p, 1.0f );
-                            // accumulate gradient and hessian in both pid, and nid, 
-                            grad[ pos.rindex ] += g * w; 
-                            grad[ neg.rindex ] -= g * w;
-                            // take conservative update, scale hessian by 2
-                            hess[ pos.rindex ] += 2.0f * h * w; 
-                            hess[ neg.rindex ] += 2.0f * h * w;
-                        }                       
-                    }
-                }
-            }
-            virtual const char* DefaultEvalMetric(void) {
-                return "map";
-            }
-        private:
-            // loss function
-            LossType loss;
-            // number of samples peformed for each instance
-            int num_pairsample;            
-            // fix weight of each elements in list
-            float fix_list_weight;
-        protected:
-            /*! \brief helper information in a list */
-            struct ListEntry{
-                /*! \brief the predict score we in the data */
-                float pred;
-                /*! \brief the actual label of the entry */
-                float label;
-                /*! \brief row index in the data matrix */                
-                unsigned rindex;
-                // constructor
-                ListEntry(float pred, float label, unsigned rindex): pred(pred),label(label),rindex(rindex){}
-                // comparator by prediction
-                inline static bool CmpPred(const ListEntry &a, const ListEntry &b){
-                    return a.pred > b.pred;
-                }
-                // comparator by label
-                inline static bool CmpLabel(const ListEntry &a, const ListEntry &b){
-                    return a.label > b.label;
-                }
-            };
-            /*! \brief a pair in the lambda rank */
-            struct LambdaPair{
-                /*! \brief positive index: this is a position in the list */
-                unsigned pos_index;
-                /*! \brief negative index: this is a position in the list */
-                unsigned neg_index;
-                /*! \brief weight to be filled in */
-                float weight;
-                LambdaPair( unsigned pos_index, unsigned neg_index ):pos_index(pos_index),neg_index(neg_index),weight(1.0f){}
-            };            
-            /*! 
-             * \brief get lambda weight for existing pairs 
-             * \param list a list that is sorted by pred score
-             * \param pairs record of pairs, containing the pairs to fill in weights
-             */
-            virtual void GetLambdaWeight( const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs ) = 0;
-        };
-    };
-    
-    namespace regrank{
-        class PairwiseRankObj: public LambdaRankObj{
-        public:
-            virtual ~PairwiseRankObj(void){}
-            virtual void GetLambdaWeight( const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs ){}            
-        };
-    };
-};
-#endif
diff --git a/regrank/xgboost_regrank_utils.h b/regrank/xgboost_regrank_utils.h
deleted file mode 100644
index 49ab715d5..000000000
--- a/regrank/xgboost_regrank_utils.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef XGBOOST_REGRANK_UTILS_H
-#define XGBOOST_REGRANK_UTILS_H
-/*!
- * \file xgboost_regrank_utils.h
- * \brief useful helper functions
- * \author Tianqi Chen, Kailong Chen
- */
-namespace xgboost{
-    namespace regrank{
-        // simple helper function to do softmax
-        inline static void Softmax( std::vector<float>& rec ){
-            float wmax = rec[0];
-            for( size_t i = 1; i < rec.size(); ++ i ){
-                wmax = std::max( rec[i], wmax );
-            }
-            double wsum = 0.0f;
-            for( size_t i = 0; i < rec.size(); ++ i ){
-                rec[i] = expf(rec[i]-wmax);
-                    wsum += rec[i];
-            }
-            for( size_t i = 0; i < rec.size(); ++ i ){
-                rec[i] /= static_cast<float>(wsum);
-            }                
-        }        
-        // simple helper function to do softmax
-        inline static int FindMaxIndex( std::vector<float>& rec ){
-            size_t mxid = 0;
-            for( size_t i = 1; i < rec.size(); ++ i ){
-                if( rec[i] > rec[mxid]+1e-6f ){
-                    mxid = i;
-                }
-            }
-            return (int)mxid;
-        }        
-        inline static bool CmpFirst(const std::pair<float, unsigned> &a, const std::pair<float, unsigned> &b){
-            return a.first > b.first;
-        }
-        inline static bool CmpSecond(const std::pair<float, unsigned> &a, const std::pair<float, unsigned> &b){
-            return a.second > b.second;
-        }
-    };
-};
-
-#endif
-
diff --git a/tree/model.h b/tree/model.h
new file mode 100644
index 000000000..aa84d265d
--- /dev/null
+++ b/tree/model.h
@@ -0,0 +1,492 @@
+#ifndef XGBOOST_TREE_MODEL_H_
+#define XGBOOST_TREE_MODEL_H_
+/*!
+ * \file model.h
+ * \brief model structure for tree
+ * \author Tianqi Chen
+ */
+#include <string>
+#include <cstring>
+#include <sstream>
+#include <limits>
+#include <algorithm>
+#include <vector>
+#include <cmath>
+#include "../utils/io.h"
+#include "../utils/fmap.h"
+#include "../utils/utils.h"
+
+namespace xgboost {
+namespace tree {
+/*!
+ * \brief template class of TreeModel 
+ * \tparam TSplitCond data type to indicate split condition
+ * \tparam TNodeStat auxiliary statistics of node to help tree building
+ */
+template<typename TSplitCond, typename TNodeStat>
+class TreeModel {
+ public:
+  /*! \brief data type to indicate split condition */
+  typedef TNodeStat  NodeStat;
+  /*! \brief auxiliary statistics of node to help tree building */
+  typedef TSplitCond SplitCond;
+  /*! \brief parameters of the tree */
+  struct Param{
+    /*! \brief number of start root */
+    int num_roots;
+    /*! \brief total number of nodes */
+    int num_nodes;
+    /*!\brief number of deleted nodes */
+    int num_deleted;
+    /*! \brief maximum depth, this is a statistics of the tree */
+    int max_depth;
+    /*! \brief  number of features used for tree construction */
+    int num_feature;
+    /*! \brief reserved part */
+    int reserved[32];
+    /*! \brief constructor */
+    Param(void) {
+      max_depth = 0;
+      memset(reserved, 0, sizeof(reserved));
+    }
+    /*! 
+     * \brief set parameters from outside 
+     * \param name name of the parameter
+     * \param val  value of the parameter
+     */
+    inline void SetParam(const char *name, const char *val) {
+      if (!strcmp("num_roots", name)) num_roots = atoi(val);
+      if (!strcmp("num_feature", name)) num_feature = atoi(val);
+    }
+  };
+  /*! \brief tree node */
+  class Node{
+   public:
+    /*! \brief index of left child */
+    inline int cleft(void) const {
+      return this->cleft_;
+    }
+    /*! \brief index of right child */
+    inline int cright(void) const {
+      return this->cright_;
+    }
+    /*! \brief index of default child when feature is missing */
+    inline int cdefault(void) const {
+      return this->default_left() ? this->cleft() : this->cright();
+    }
+    /*! \brief feature index of split condition */
+    inline unsigned split_index(void) const {
+      return sindex_ & ((1U << 31) - 1U);
+    }
+    /*! \brief when feature is unknown, whether goes to left child */
+    inline bool default_left(void) const {
+      return (sindex_ >> 31) != 0;
+    }
+    /*! \brief whether current node is leaf node */
+    inline bool is_leaf(void) const {
+      return cleft_ == -1;
+    }
+    /*! \brief get leaf value of leaf node */
+    inline float leaf_value(void) const {
+      return (this->info_).leaf_value;
+    }
+    /*! \brief get split condition of the node */
+    inline TSplitCond split_cond(void) const {
+      return (this->info_).split_cond;
+    }
+    /*! \brief get parent of the node */
+    inline int parent(void) const {
+      return parent_ & ((1U << 31) - 1);
+    }
+    /*! \brief whether current node is left child */
+    inline bool is_left_child(void) const {
+      return (parent_ & (1U << 31)) != 0;
+    }
+    /*! \brief whether current node is root */
+    inline bool is_root(void) const {
+      return parent_ == -1;
+    }
+    /*! 
+     * \brief set the right child 
+     * \param nide node id to right child
+     */
+    inline void set_right_child(int nid) {
+      this->cright_ = nid;
+    }
+    /*! 
+     * \brief set split condition of current node 
+     * \param split_index feature index to split
+     * \param split_cond  split condition
+     * \param default_left the default direction when feature is unknown
+     */
+    inline void set_split(unsigned split_index, TSplitCond split_cond,
+                          bool default_left = false) {
+      if (default_left) split_index |= (1U << 31);
+      this->sindex_ = split_index;
+      (this->info_).split_cond = split_cond;
+    }
+    /*! 
+     * \brief set the leaf value of the node
+     * \param value leaf value
+     * \param right right index, could be used to store 
+     *        additional information
+     */
+    inline void set_leaf(float value, int right = -1) {
+      (this->info_).leaf_value = value;
+      this->cleft_ = -1;
+      this->cright_ = right;
+    }
+
+   private:
+    friend class TreeModel<TSplitCond, TNodeStat>;
+    /*! 
+     * \brief in leaf node, we have weights, in non-leaf nodes, 
+     *        we have split condition 
+     */
+    union Info{
+      float leaf_value;
+      TSplitCond split_cond;
+    };
+    // pointer to parent, highest bit is used to
+    // indicate whether it's a left child or not
+    int parent_;
+    // pointer to left, right
+    int cleft_, cright_;
+    // split feature index, left split or right split depends on the highest bit
+    unsigned sindex_;
+    // extra info
+    Info info_;
+    // set parent
+    inline void set_parent(int pidx, bool is_left_child = true) {
+      if (is_left_child) pidx |= (1U << 31);
+      this->parent_ = pidx;
+    }
+  };
+
+ protected:
+  // vector of nodes
+  std::vector<Node> nodes;
+  // stats of nodes
+  std::vector<TNodeStat> stats;
+  // free node space, used during training process
+  std::vector<int>  deleted_nodes;
+  // allocate a new node,
+  // !!!!!! NOTE: may cause BUG here, nodes.resize
+  inline int AllocNode(void) {
+    if (param.num_deleted != 0) {
+      int nd = deleted_nodes.back();
+      deleted_nodes.pop_back();
+      --param.num_deleted;
+      return nd;
+    }
+    int nd = param.num_nodes++;
+    utils::Check(param.num_nodes < std::numeric_limits<int>::max(),
+                 "number of nodes in the tree exceed 2^31");
+    nodes.resize(param.num_nodes);
+    stats.resize(param.num_nodes);
+    return nd;
+  }
+  // delete a tree node
+  inline void DeleteNode(int nid) {
+    utils::Assert(nid >= param.num_roots, "can not delete root");
+    deleted_nodes.push_back(nid);
+    nodes[nid].set_parent(-1);
+    ++param.num_deleted;
+  }
+
+ public:
+  /*! 
+   * \brief change a non leaf node to a leaf node, delete its children
+   * \param rid node id of the node
+   * \param new leaf value
+   */
+  inline void ChangeToLeaf(int rid, float value) {
+    utils::Assert(nodes[nodes[rid].cleft() ].is_leaf(),
+                  "can not delete a non termial child");
+    utils::Assert(nodes[nodes[rid].cright()].is_leaf(),
+                  "can not delete a non termial child");
+    this->DeleteNode(nodes[rid].cleft());
+    this->DeleteNode(nodes[rid].cright());
+    nodes[rid].set_leaf(value);
+  }
+  /*! 
+   * \brief collapse a non leaf node to a leaf node, delete its children
+   * \param rid node id of the node
+   * \param new leaf value
+   */
+  inline void CollapseToLeaf(int rid, float value) {
+    if (nodes[rid].is_leaf()) return;
+    if (!nodes[nodes[rid].cleft() ].is_leaf()) {
+      CollapseToLeaf(nodes[rid].cleft(), 0.0f);
+    }
+    if (!nodes[nodes[rid].cright() ].is_leaf()) {
+      CollapseToLeaf(nodes[rid].cright(), 0.0f);
+    }
+    this->ChangeToLeaf(rid, value);
+  }
+
+ public:
+  /*! \brief model parameter */
+  Param param;
+  /*! \brief constructor */
+  TreeModel(void) {
+    param.num_nodes = 1;
+    param.num_roots = 1;
+    param.num_deleted = 0;
+    nodes.resize(1);
+  }
+  /*! \brief get node given nid */
+  inline Node &operator[](int nid) {
+    return nodes[nid];
+  }
+  /*! \brief get node given nid */
+  inline const Node &operator[](int nid) const {
+    return nodes[nid];
+  }
+  /*! \brief get node statistics given nid */
+  inline NodeStat &stat(int nid) {
+    return stats[nid];
+  }
+  /*! \brief initialize the model */
+  inline void InitModel(void) {
+    param.num_nodes = param.num_roots;
+    nodes.resize(param.num_nodes);
+    stats.resize(param.num_nodes);
+    for (int i = 0; i < param.num_nodes; i ++) {
+      nodes[i].set_leaf(0.0f);
+      nodes[i].set_parent(-1);
+    }
+  }
+  /*! 
+   * \brief load model from stream
+   * \param fi input stream
+   */
+  inline void LoadModel(utils::IStream &fi) {
+    utils::Check(fi.Read(&param, sizeof(Param)) > 0,
+                 "TreeModel: wrong format");
+    nodes.resize(param.num_nodes); stats.resize(param.num_nodes);
+    utils::Check(fi.Read(&nodes[0], sizeof(Node) * nodes.size()) > 0,
+                 "TreeModel: wrong format");
+    utils::Check(fi.Read(&stats[0], sizeof(NodeStat) * stats.size()) > 0,
+                 "TreeModel: wrong format");
+    // chg deleted nodes
+    deleted_nodes.resize(0);
+    for (int i = param.num_roots; i < param.num_nodes; i ++) {
+      if (nodes[i].is_root()) deleted_nodes.push_back(i);
+    }
+    utils::Assert(static_cast<int>(deleted_nodes.size()) == param.num_deleted,
+                  "number of deleted nodes do not match");
+  }
+  /*! 
+   * \brief save model to stream
+   * \param fo output stream
+   */
+  inline void SaveModel(utils::IStream &fo) const {
+    utils::Assert(param.num_nodes == static_cast<int>(nodes.size()),
+                  "Tree::SaveModel");
+    utils::Assert(param.num_nodes == static_cast<int>(stats.size()),
+                  "Tree::SaveModel");
+    fo.Write(&param, sizeof(Param));
+    fo.Write(&nodes[0], sizeof(Node) * nodes.size());
+    fo.Write(&stats[0], sizeof(NodeStat) * nodes.size());
+  }
+  /*! 
+   * \brief add child nodes to node
+   * \param nid node id to add childs
+   */
+  inline void AddChilds(int nid) {
+    int pleft  = this->AllocNode();
+    int pright = this->AllocNode();
+    nodes[nid].cleft_  = pleft;
+    nodes[nid].cright_ = pright;
+    nodes[nodes[nid].cleft() ].set_parent(nid, true);
+    nodes[nodes[nid].cright()].set_parent(nid, false);
+  }
+  /*! 
+   * \brief only add a right child to a leaf node 
+   * \param node id to add right child
+   */
+  inline void AddRightChild(int nid) {
+    int pright = this->AllocNode();
+    nodes[nid].right  = pright;
+    nodes[nodes[nid].right].set_parent(nid, false);
+  }
+  /*!
+   * \brief get current depth
+   * \param nid node id
+   * \param pass_rchild whether right child is not counted in depth
+   */
+  inline int GetDepth(int nid, bool pass_rchild = false) const {
+    int depth = 0;
+    while (!nodes[nid].is_root()) {
+      if (!pass_rchild || nodes[nid].is_left_child()) ++depth;
+      nid = nodes[nid].parent();
+    }
+    return depth;
+  }
+  /*!
+   * \brief get maximum depth
+   * \param nid node id
+   */
+  inline int MaxDepth(int nid) const {
+    if (nodes[nid].is_leaf()) return 0;
+    return std::max(MaxDepth(nodes[nid].cleft())+1,
+                     MaxDepth(nodes[nid].cright())+1);
+  }
+  /*!
+   * \brief get maximum depth
+   */
+  inline int MaxDepth(void) {
+    int maxd = 0;
+    for (int i = 0; i < param.num_roots; ++i) {
+      maxd = std::max(maxd, MaxDepth(i));
+    }
+    return maxd;
+  }
+  /*! \brief number of extra nodes besides the root */
+  inline int num_extra_nodes(void) const {
+    return param.num_nodes - param.num_roots - param.num_deleted;
+  }
+  /*! 
+   * \brief dump model to text string
+   * \param fmap feature map of feature types
+   * \param with_stats whether dump out statistics as well
+   * \return the string of dumped model
+   */
+  inline std::string DumpModel(const utils::FeatMap& fmap, bool with_stats) {
+    std::stringstream fo("");
+    for (int i = 0; i < param.num_roots; ++i) {
+      this->Dump(i, fo, fmap, 0, with_stats);
+    }
+    return fo.str();
+  }
+
+ private:
+  void Dump(int nid, std::stringstream &fo,
+            const utils::FeatMap& fmap, int depth, bool with_stats) {
+    for (int i = 0;  i < depth; ++i) {
+      fo << '\t';
+    }
+    if (nodes[nid].is_leaf()) {
+      fo << nid << ":leaf=" << nodes[nid].leaf_value();
+      if (with_stats) {
+        stat(nid).Print(fo, true);
+      }
+      fo << '\n';
+    } else {
+      // right then left,
+      TSplitCond cond = nodes[nid].split_cond();
+      const unsigned split_index = nodes[nid].split_index();
+      if (split_index < fmap.size()) {
+        switch (fmap.type(split_index)) {
+          case utils::FeatMap::kIndicator: {
+            int nyes = nodes[nid].default_left() ?
+                nodes[nid].cright() : nodes[nid].cleft();
+            fo << nid << ":[" << fmap.name(split_index) << "] yes=" << nyes
+               << ",no=" << nodes[nid].cdefault();
+            break;
+          }
+          case utils::FeatMap::kInteger: {
+            fo << nid << ":[" << fmap.name(split_index) << "<"
+               << int(float(cond)+1.0f)
+               << "] yes=" << nodes[nid].cleft()
+               << ",no=" << nodes[nid].cright()
+               << ",missing=" << nodes[nid].cdefault();
+            break;
+          }
+          case utils::FeatMap::kFloat:
+          case utils::FeatMap::kQuantitive: {
+            fo << nid << ":[" << fmap.name(split_index) << "<"<< float(cond)
+               << "] yes=" << nodes[nid].cleft()
+               << ",no=" << nodes[nid].cright()
+               << ",missing=" << nodes[nid].cdefault();
+            break;
+          }
+          default: utils::Error("unknown fmap type");
+        }
+      } else {
+        fo << nid << ":[f" << split_index << "<"<< float(cond)
+           << "] yes=" << nodes[nid].cleft()
+           << ",no=" << nodes[nid].cright()
+           << ",missing=" << nodes[nid].cdefault();
+      }
+      if (with_stats) {
+        fo << ' ';
+        stat(nid).Print(fo, false);
+      }
+      fo << '\n';
+      this->Dump(nodes[nid].cleft(), fo, fmap, depth+1, with_stats);
+      this->Dump(nodes[nid].cright(), fo, fmap, depth+1, with_stats);
+    }
+  }
+};
+
+/*! \brief node statistics used in regression tree */
+struct RTreeNodeStat{
+  /*! \brief loss chg caused by current split */
+  float loss_chg;
+  /*! \brief sum of hessian values, used to measure coverage of data */
+  float sum_hess;
+  /*! \brief weight of current node */
+  float base_weight;
+  /*! \brief number of child that is leaf node known up to now */
+  int   leaf_child_cnt;
+  /*! \brief print information of current stats to fo */
+  inline void Print(std::stringstream &fo, bool is_leaf) const {
+    if (!is_leaf) {
+      fo << "gain=" << loss_chg << ",cover=" << sum_hess;
+    } else {
+      fo << "cover=" << sum_hess;
+    }
+  }
+};
+
+/*! \brief define regression tree to be the most common tree model */
+class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
+ public:
+  /*!
+   * \brief get the leaf index 
+   * \param feats dense feature vector, if the feature is missing the field is set to NaN
+   * \param root_gid starting root index of the instance
+   * \return the leaf index of the given feature 
+   */
+  inline int GetLeafIndex(const std::vector<float> &feat, unsigned root_id = 0) const {
+    // start from groups that belongs to current data
+    int pid = static_cast<int>(root_id);
+    // tranverse tree
+    while (!(*this)[ pid ].is_leaf()) {
+      unsigned split_index = (*this)[pid].split_index();
+      const float fvalue = feat[split_index];
+      pid = this->GetNext(pid, fvalue, std::isnan(fvalue));
+    }
+    return pid;
+  }
+  /*!
+   * \brief get the prediction of regression tree, only accepts dense feature vector
+   * \param feats dense feature vector, if the feature is missing the field is set to NaN
+   * \param root_gid starting root index of the instance
+   * \return the leaf index of the given feature 
+   */
+  inline float Predict(const std::vector<float> &feat, unsigned root_id = 0) const {
+    int pid = this->GetLeafIndex(feat, root_id);
+    return (*this)[pid].leaf_value();
+  }
+ private:
+  /*! \brief get next position of the tree given current pid */
+  inline int GetNext(int pid, float fvalue, bool is_unknown) const {
+    float split_value = (*this)[pid].split_cond();
+    if (is_unknown) {
+      return (*this)[pid].cdefault();
+    } else {
+      if (fvalue < split_value) {
+        return (*this)[pid].cleft();
+      } else {
+        return (*this)[pid].cright();
+      }
+    }
+  }
+};
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_MODEL_H_
diff --git a/tree/param.h b/tree/param.h
new file mode 100644
index 000000000..a19eb2d82
--- /dev/null
+++ b/tree/param.h
@@ -0,0 +1,262 @@
+#ifndef XGBOOST_TREE_PARAM_H_
+#define XGBOOST_TREE_PARAM_H_
+/*!
+ * \file param.h
+ * \brief training parameters, statistics used to support tree construction
+ * \author Tianqi Chen
+ */
+#include <cstring>
+#include "../data.h"
+
+namespace xgboost {
+namespace tree {
+
+/*! \brief core statistics used for tree construction */
+struct GradStats {
+  /*! \brief sum gradient statistics */
+  double sum_grad;
+  /*! \brief sum hessian statistics */
+  double sum_hess;
+  /*! \brief constructor */
+  GradStats(void) {
+    this->Clear();
+  }
+  /*! \brief clear the statistics */
+  inline void Clear(void) {
+    sum_grad = sum_hess = 0.0f;
+  }
+  /*! \brief add statistics to the data */
+  inline void Add(double grad, double hess) {
+    sum_grad += grad; sum_hess += hess;
+  }
+  /*! \brief add statistics to the data */
+  inline void Add(const bst_gpair& b) {
+    this->Add(b.grad, b.hess);
+  }
+  /*! \brief add statistics to the data */
+  inline void Add(const GradStats &b) {
+    this->Add(b.sum_grad, b.sum_hess);
+  }
+  /*! \brief substract the statistics by b */
+  inline GradStats Substract(const GradStats &b) const {
+    GradStats res;
+    res.sum_grad = this->sum_grad - b.sum_grad;
+    res.sum_hess = this->sum_hess - b.sum_hess;
+    return res;
+  }
+  /*! \return whether the statistics is not used yet */
+  inline bool Empty(void) const {
+    return sum_hess == 0.0;
+  }
+};
+
+/*! \brief training parameters for regression tree */
+struct TrainParam{
+  // learning step size for a time
+  float learning_rate;
+  // minimum loss change required for a split
+  float min_split_loss;
+  // maximum depth of a tree
+  int max_depth;
+  //----- the rest parameters are less important ----
+  // minimum amount of hessian(weight) allowed in a child
+  float min_child_weight;
+  // weight decay parameter used to control leaf fitting
+  float reg_lambda;
+  // reg method
+  int reg_method;
+  // default direction choice
+  int default_direction;
+  // whether we want to do subsample
+  float subsample;
+  // whether to subsample columns each split, in each level
+  float colsample_bylevel;
+  // whether to subsample columns during tree construction
+  float colsample_bytree;
+  // speed optimization for dense column
+  float opt_dense_col;
+  // number of threads to be used for tree construction,
+  // if OpenMP is enabled, if equals 0, use system default
+  int nthread;
+  /*! \brief constructor */
+  TrainParam(void) {
+    learning_rate = 0.3f;
+    min_child_weight = 1.0f;
+    max_depth = 6;
+    reg_lambda = 1.0f;
+    reg_method = 2;
+    default_direction = 0;
+    subsample = 1.0f;
+    colsample_bytree = 1.0f;
+    colsample_bylevel = 1.0f;
+    opt_dense_col = 1.0f;
+    nthread = 0;
+  }
+  /*! 
+   * \brief set parameters from outside 
+   * \param name name of the parameter
+   * \param val  value of the parameter
+   */            
+  inline void SetParam(const char *name, const char *val) {
+    // sync-names
+    if (!strcmp(name, "gamma")) min_split_loss = static_cast<float>(atof(val));
+    if (!strcmp(name, "eta")) learning_rate = static_cast<float>(atof(val));
+    if (!strcmp(name, "lambda")) reg_lambda = static_cast<float>(atof(val));
+    if (!strcmp(name, "learning_rate")) learning_rate = static_cast<float>(atof(val));
+    if (!strcmp(name, "min_child_weight")) min_child_weight = static_cast<float>(atof(val));
+    if (!strcmp(name, "min_split_loss")) min_split_loss = static_cast<float>(atof(val));
+    if (!strcmp(name, "reg_lambda")) reg_lambda = static_cast<float>(atof(val));
+    if (!strcmp(name, "reg_method")) reg_method = static_cast<float>(atof(val));
+    if (!strcmp(name, "subsample")) subsample = static_cast<float>(atof(val));
+    if (!strcmp(name, "colsample_bylevel")) colsample_bylevel = static_cast<float>(atof(val));
+    if (!strcmp(name, "colsample_bytree")) colsample_bytree  = static_cast<float>(atof(val));
+    if (!strcmp(name, "opt_dense_col")) opt_dense_col = static_cast<float>(atof(val));
+    if (!strcmp(name, "max_depth")) max_depth = atoi(val);
+    if (!strcmp(name, "nthread")) nthread = atoi(val);
+    if (!strcmp(name, "default_direction")) {
+      if (!strcmp(val, "learn")) default_direction = 0;
+      if (!strcmp(val, "left")) default_direction = 1;
+      if (!strcmp(val, "right")) default_direction = 2;
+    }
+  }
+  // calculate the cost of loss function
+  inline double CalcGain(double sum_grad, double sum_hess) const {
+    if (sum_hess < min_child_weight) {
+      return 0.0;
+    }
+    switch (reg_method) {
+      case 1 : return Sqr(ThresholdL1(sum_grad, reg_lambda)) / sum_hess;
+      case 2 : return Sqr(sum_grad) / (sum_hess + reg_lambda);
+      case 3 : return
+          Sqr(ThresholdL1(sum_grad, 0.5 * reg_lambda)) /
+          (sum_hess + 0.5 * reg_lambda);
+      default: return Sqr(sum_grad) / sum_hess;
+    }
+  }
+  // calculate weight given the statistics
+  inline double CalcWeight(double sum_grad, double sum_hess) const {
+    if (sum_hess < min_child_weight) {
+      return 0.0;
+    } else {
+      switch (reg_method) {
+        case 1: return - ThresholdL1(sum_grad, reg_lambda) / sum_hess;
+        case 2: return - sum_grad / (sum_hess + reg_lambda);
+        case 3: return
+            - ThresholdL1(sum_grad, 0.5 * reg_lambda) /
+            (sum_hess + 0.5 * reg_lambda);
+        default: return - sum_grad / sum_hess;
+      }
+    }
+  }
+  /*! \brief whether need forward small to big search: default right */
+  inline bool need_forward_search(float col_density = 0.0f) const {
+    return this->default_direction == 2 ||
+        (default_direction == 0 && (col_density < opt_dense_col));
+  }
+  /*! \brief whether need backward big to small search: default left */
+  inline bool need_backward_search(float col_density = 0.0f) const {
+    return this->default_direction != 2;
+  }
+  /*! \brief given the loss change, whether we need to invode prunning */
+  inline bool need_prune(double loss_chg, int depth) const {
+    return loss_chg < this->min_split_loss;
+  }
+  /*! \brief whether we can split with current hessian */
+  inline bool cannot_split(double sum_hess, int depth) const {
+    return sum_hess < this->min_child_weight * 2.0;
+  }
+  // code support for template data
+  inline double CalcWeight(const GradStats &d) const {
+    return this->CalcWeight(d.sum_grad, d.sum_hess);
+  }
+  inline double CalcGain(const GradStats &d) const {
+    return this->CalcGain(d.sum_grad, d.sum_hess);
+  }
+
+ protected:
+  // functions for L1 cost
+  inline static double ThresholdL1(double w, double lambda) {
+    if (w > +lambda) return w - lambda;
+    if (w < -lambda) return w + lambda;
+    return 0.0;
+  }
+  inline static double Sqr(double a) {
+    return a * a;
+  }
+};
+
+/*! 
+ * \brief statistics that is helpful to store 
+ *   and represent a split solution for the tree
+ */
+struct SplitEntry{
+  /*! \brief loss change after split this node */
+  bst_float loss_chg;
+  /*! \brief split index */
+  unsigned sindex;
+  /*! \brief split value */
+  float split_value;
+  /*! \brief constructor */
+  SplitEntry(void) : loss_chg(0.0f), sindex(0), split_value(0.0f) {}
+  /*! 
+   * \brief decides whether a we can replace current entry with the statistics given 
+   *   This function gives better priority to lower index when loss_chg equals
+   *    not the best way, but helps to give consistent result during multi-thread execution
+   * \param loss_chg the loss reduction get through the split
+   * \param split_index the feature index where the split is on 
+   */
+  inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const {
+    if (this->split_index() <= split_index) {
+      return loss_chg > this->loss_chg;
+    } else {
+      return !(this->loss_chg > loss_chg);
+    }
+  }
+  /*! 
+   * \brief update the split entry, replace it if e is better
+   * \param e candidate split solution
+   * \return whether the proposed split is better and can replace current split
+   */
+  inline bool Update(const SplitEntry &e) {
+    if (this->NeedReplace(e.loss_chg, e.split_index())) {
+      this->loss_chg = e.loss_chg;
+      this->sindex = e.sindex;
+      this->split_value = e.split_value;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  /*! 
+   * \brief update the split entry, replace it if e is better
+   * \param loss_chg loss reduction of new candidate
+   * \param split_index feature index to split on
+   * \param split_value the split point
+   * \param default_left whether the missing value goes to left
+   * \return whether the proposed split is better and can replace current split
+   */
+  inline bool Update(bst_float loss_chg, unsigned split_index,
+                     float split_value, bool default_left) {
+    if (this->NeedReplace(loss_chg, split_index)) {
+      this->loss_chg = loss_chg;
+      if (default_left) split_index |= (1U << 31);
+      this->sindex = split_index;
+      this->split_value = split_value;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  /*!\return feature index to split on */
+  inline unsigned split_index(void) const {
+    return sindex & ((1U << 31) - 1U);
+  }
+  /*!\return whether missing value goes to left branch */
+  inline bool default_left(void) const {
+    return (sindex >> 31) != 0;
+  }
+};
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_PARAM_H_
diff --git a/tree/updater.h b/tree/updater.h
new file mode 100644
index 000000000..50b30e69f
--- /dev/null
+++ b/tree/updater.h
@@ -0,0 +1,70 @@
+#ifndef XGBOOST_TREE_UPDATER_H_
+#define XGBOOST_TREE_UPDATER_H_
+/*!
+ * \file updater.h
+ * \brief interface to update the tree
+ * \author Tianqi Chen
+ */
+#include <vector>
+
+#include "../data.h"
+#include "./model.h"
+
+namespace xgboost {
+namespace tree {
+/*! 
+ * \brief interface of tree update module, that performs update of a tree
+ * \tparam FMatrix the data type updater taking
+ */
+template<typename FMatrix>
+class IUpdater {
+ public:
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val  value of the parameter
+   */  
+  virtual void SetParam(const char *name, const char *val) = 0;
+  /*!
+   * \brief peform update to the tree models
+   * \param gpair the gradient pair statistics of the data
+   * \param fmat feature matrix that provide access to features
+   * \param root_index pre-partitioned root_index of each instance,
+   *          root_index.size() can be 0 which indicates that no pre-partition involved
+   * \param trees pointer to the trese to be updated, upater will change the content of the tree
+   *   note: all the trees in the vector are updated, with the same statistics, 
+   *         but maybe different random seeds, usually one tree is passed in at a time, 
+   *         there can be multiple trees when we train random forest style model
+   */
+  virtual void Update(const std::vector<bst_gpair> &gpair,
+                      FMatrix &fmat,
+                      const std::vector<unsigned> &root_index,
+                      const std::vector<RegTree*> &trees) = 0;
+  // destructor
+  virtual ~IUpdater(void) {}
+};
+
+}  // namespace tree
+}  // namespace xgboost
+
+#include "./updater_prune-inl.hpp"
+#include "./updater_colmaker-inl.hpp"
+
+namespace xgboost {
+namespace tree {
+/*! 
+ * \brief create a updater based on name 
+ * \param name name of updater
+ * \return return the updater instance
+ */
+template<typename FMatrix>
+inline IUpdater<FMatrix>* CreateUpdater(const char *name) {
+  if (!strcmp(name, "prune")) return new TreePruner<FMatrix>();
+  if (!strcmp(name, "grow_colmaker")) return new ColMaker<FMatrix, GradStats>();
+  utils::Error("unknown updater:%s", name);
+  return NULL;
+}
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_UPDATER_H_
diff --git a/tree/updater_colmaker-inl.hpp b/tree/updater_colmaker-inl.hpp
new file mode 100644
index 000000000..3223eec6a
--- /dev/null
+++ b/tree/updater_colmaker-inl.hpp
@@ -0,0 +1,357 @@
+#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
+#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
+/*!
+ * \file updater_colmaker-inl.hpp
+ * \brief use columnwise update to construct a tree
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <algorithm>
+#include "./param.h"
+#include "./updater.h"
+#include "../utils/omp.h"
+#include "../utils/random.h"
+
+namespace xgboost {
+namespace tree {
+/*! \brief pruner that prunes a tree after growing finishs */
+template<typename FMatrix, typename TStats>
+class ColMaker: public IUpdater<FMatrix> {
+ public:
+  virtual ~ColMaker(void) {}
+  // set training parameter
+  virtual void SetParam(const char *name, const char *val) {
+    param.SetParam(name, val);
+  }
+  virtual void Update(const std::vector<bst_gpair> &gpair,
+                      FMatrix &fmat,
+                      const std::vector<unsigned> &root_index,
+                      const std::vector<RegTree*> &trees) {
+    fmat.InitColAccess();
+    for (size_t i = 0; i < trees.size(); ++i) {
+      Builder builder(param);
+      builder.Update(gpair, fmat, root_index, trees[i]);
+    }
+  }
+
+ private:
+  // training parameter
+  TrainParam param;
+  // data structure
+  /*! \brief per thread x per node entry to store tmp data */
+  struct ThreadEntry {
+    /*! \brief statistics of data*/
+    TStats stats;
+    /*! \brief last feature value scanned */
+    float  last_fvalue;
+    /*! \brief current best solution */
+    SplitEntry best;
+    // constructor
+    ThreadEntry(void) {
+      stats.Clear();
+    }
+  };
+  struct NodeEntry {
+    /*! \brief statics for node entry */
+    TStats stats;
+    /*! \brief loss of this node, without split */
+    bst_float root_gain;
+    /*! \brief weight calculated related to current data */
+    float weight;
+    /*! \brief current best solution */
+    SplitEntry best;
+    // constructor
+    NodeEntry(void) : root_gain(0.0f), weight(0.0f){
+      stats.Clear();
+    }
+  };
+  // actual builder that runs the algorithm
+  struct Builder{
+   public:
+    // constructor
+    explicit Builder(const TrainParam &param) : param(param) {}
+    // update one tree, growing
+    virtual void Update(const std::vector<bst_gpair> &gpair, FMatrix &fmat,
+                        const std::vector<unsigned> &root_index,
+                        RegTree *p_tree) {
+      this->InitData(gpair, fmat, root_index, *p_tree);
+      this->InitNewNode(qexpand, gpair, *p_tree);
+      
+      for (int depth = 0; depth < param.max_depth; ++depth) {
+        this->FindSplit(depth, this->qexpand, gpair, fmat, p_tree);
+        this->ResetPosition(this->qexpand, fmat, *p_tree);
+        this->UpdateQueueExpand(*p_tree, &this->qexpand);
+        this->InitNewNode(qexpand, gpair, *p_tree);
+        // if nothing left to be expand, break
+        if (qexpand.size() == 0) break;
+      }    
+      // set all the rest expanding nodes to leaf
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate);
+      }
+      // remember auxiliary statistics in the tree node
+      for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
+        p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg;
+        p_tree->stat(nid).base_weight = snode[nid].weight;
+        p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess);
+      }
+    }
+
+   private:
+    // initialize temp data structure
+    inline void InitData(const std::vector<bst_gpair> &gpair, FMatrix &fmat,
+                         const std::vector<unsigned> &root_index, const RegTree &tree) {
+      utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree");
+      {// setup position
+        position.resize(gpair.size());
+        if (root_index.size() == 0) {
+          std::fill(position.begin(), position.end(), 0);
+        } else {
+          for (size_t i = 0; i < root_index.size(); ++i) {
+            position[i] = root_index[i];
+            utils::Assert(root_index[i] < (unsigned)tree.param.num_roots, "root index exceed setting");
+          }
+        }
+        // mark delete for the deleted datas
+        for (size_t i = 0; i < gpair.size(); ++i) {
+          if (gpair[i].hess < 0.0f) position[i] = -1;
+        }
+        // mark subsample
+        if (param.subsample < 1.0f) {
+          for (size_t i = 0; i < gpair.size(); ++i) {
+            if (gpair[i].hess < 0.0f) continue;
+            if (random::SampleBinary(param.subsample) == 0) position[i] = -1;
+          }
+        }
+      }
+    
+      {
+        // initialize feature index
+        unsigned ncol = static_cast<unsigned>(fmat.NumCol());
+        for (unsigned i = 0; i < ncol; ++i) {
+          if (fmat.GetColSize(i) != 0) feat_index.push_back(i);
+        }
+        unsigned n = static_cast<unsigned>(param.colsample_bytree * feat_index.size());
+        random::Shuffle(feat_index);
+        utils::Check(n > 0, "colsample_bytree is too small that no feature can be included");
+        feat_index.resize(n);
+      }
+      {// setup temp space for each thread
+        #pragma omp parallel
+        {
+          this->nthread = omp_get_num_threads();
+        }
+        // reserve a small space
+        stemp.clear();
+        stemp.resize(this->nthread, std::vector<ThreadEntry>());
+        for (size_t i = 0; i < stemp.size(); ++i) {
+          stemp[i].clear(); stemp[i].reserve(256);
+        }
+        snode.reserve(256);
+      }
+      {// expand query
+        qexpand.reserve(256); qexpand.clear();
+        for (int i = 0; i < tree.param.num_roots; ++i) {
+          qexpand.push_back(i);
+        }
+      }
+    }
+    /*! \brief initialize the base_weight, root_gain, and NodeEntry for all the new nodes in qexpand */
+    inline void InitNewNode(const std::vector<int> &qexpand,
+                            const std::vector<bst_gpair> &gpair,
+                            const RegTree &tree) {
+      {// setup statistics space for each tree node
+        for (size_t i = 0; i < stemp.size(); ++i) {
+          stemp[i].resize(tree.param.num_nodes, ThreadEntry());
+        }
+        snode.resize(tree.param.num_nodes, NodeEntry());
+      }
+      // setup position
+      const unsigned ndata = static_cast<unsigned>(position.size());
+      #pragma omp parallel for schedule(static)
+      for (unsigned i = 0; i < ndata; ++i) {
+        const int tid = omp_get_thread_num();
+        if (position[i] < 0) continue;
+        stemp[tid][position[i]].stats.Add(gpair[i]);
+      }
+      // sum the per thread statistics together
+      for (size_t j = 0; j < qexpand.size(); ++j) {
+        const int nid = qexpand[j];
+        TStats stats; stats.Clear();
+        for (size_t tid = 0; tid < stemp.size(); ++tid) {
+          stats.Add(stemp[tid][nid].stats);
+        }
+        // update node statistics
+        snode[nid].stats = stats;
+        snode[nid].root_gain = param.CalcGain(stats);
+        snode[nid].weight = param.CalcWeight(stats);
+      }
+    }
+    /*! \brief update queue expand add in new leaves */
+    inline void UpdateQueueExpand(const RegTree &tree, std::vector<int> *p_qexpand) {
+      std::vector<int> &qexpand = *p_qexpand;
+      std::vector<int> newnodes;
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        if (!tree[ nid ].is_leaf()) {
+          newnodes.push_back(tree[nid].cleft());
+          newnodes.push_back(tree[nid].cright());
+        }
+      }
+      // use new nodes for qexpand
+      qexpand = newnodes;
+    }
+    // enumerate the split values of specific feature
+    template<typename Iter>
+    inline void EnumerateSplit(Iter it, unsigned fid,
+                               const std::vector<bst_gpair> &gpair,
+                               std::vector<ThreadEntry> &temp,
+                               bool is_forward_search) {
+      // clear all the temp statistics
+      for (size_t j = 0; j < qexpand.size(); ++j) {
+        temp[qexpand[j]].stats.Clear();
+      }
+      while (it.Next()) {
+        const bst_uint ridx = it.rindex();
+        const int nid = position[ridx];
+        if (nid < 0) continue;
+        // start working
+        const float fvalue = it.fvalue();
+        // get the statistics of nid
+        ThreadEntry &e = temp[nid];
+        // test if first hit, this is fine, because we set 0 during init
+        if (e.stats.Empty()) {
+          e.stats.Add(gpair[ridx]);
+          e.last_fvalue = fvalue;
+        } else {
+          // try to find a split
+          if (fabsf(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
+            TStats c = snode[nid].stats.Substract(e.stats);
+            if (c.sum_hess >= param.min_child_weight) {
+              double loss_chg = param.CalcGain(e.stats) + param.CalcGain(c) - snode[nid].root_gain;
+              e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, !is_forward_search);
+            }
+          }
+          // update the statistics
+          e.stats.Add(gpair[ridx]);
+          e.last_fvalue = fvalue;
+        }
+      }
+      // finish updating all statistics, check if it is possible to include all sum statistics
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        ThreadEntry &e = temp[nid];
+        TStats c = snode[nid].stats.Substract(e.stats);
+        if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
+          const double loss_chg = param.CalcGain(e.stats) + param.CalcGain(c) - snode[nid].root_gain;
+          const float delta = is_forward_search ? rt_eps : -rt_eps;
+          e.best.Update(loss_chg, fid, e.last_fvalue + delta, !is_forward_search);
+        }
+      }
+    }
+    // find splits at current level, do split per level
+    inline void FindSplit(int depth, const std::vector<int> &qexpand,
+                          const std::vector<bst_gpair> &gpair, const FMatrix &fmat,
+                          RegTree *p_tree) {
+      std::vector<unsigned> feat_set = feat_index;
+      if (param.colsample_bylevel != 1.0f) {
+        random::Shuffle(feat_set);
+        unsigned n = static_cast<unsigned>(param.colsample_bylevel * feat_index.size());
+        utils::Check(n > 0, "colsample_bylevel is too small that no feature can be included");
+        feat_set.resize(n);
+      }
+      // start enumeration
+      const unsigned nsize = static_cast<unsigned>(feat_set.size());
+      #pragma omp parallel for schedule(dynamic, 1)
+      for (unsigned i = 0; i < nsize; ++i) {
+        const unsigned fid = feat_set[i];
+        const int tid = omp_get_thread_num();
+        if (param.need_forward_search(fmat.GetColDensity(fid))) {
+          this->EnumerateSplit(fmat.GetSortedCol(fid), fid, gpair, stemp[tid], true);
+        }
+        if (param.need_backward_search(fmat.GetColDensity(fid))) {
+          this->EnumerateSplit(fmat.GetReverseSortedCol(fid), fid, gpair, stemp[tid], false);
+        }
+      }
+      // after this each thread's stemp will get the best candidates, aggregate results
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        NodeEntry &e = snode[nid];
+        for (int tid = 0; tid < this->nthread; ++tid) {
+          e.best.Update(stemp[tid][nid].best);
+        }
+        // now we know the solution in snode[nid], set split
+        if (e.best.loss_chg > rt_eps) {
+          p_tree->AddChilds(nid);
+          (*p_tree)[nid].set_split(e.best.split_index(), e.best.split_value, e.best.default_left());
+        } else {
+          (*p_tree)[nid].set_leaf(e.weight * param.learning_rate);
+        }
+      }
+    }
+    // reset position of each data points after split is created in the tree
+    inline void ResetPosition(const std::vector<int> &qexpand, const FMatrix &fmat, const RegTree &tree) {
+      // step 1, set default direct nodes to default, and leaf nodes to -1
+      const unsigned ndata = static_cast<unsigned>(position.size());
+      #pragma omp parallel for schedule(static)
+      for (unsigned i = 0; i < ndata; ++i) {
+        const int nid = position[i];
+        if (nid >= 0) {
+          if (tree[nid].is_leaf()) {
+            position[i] = -1;
+          } else {
+            // push to default branch, correct latter
+            position[i] = tree[nid].default_left() ? tree[nid].cleft(): tree[nid].cright();
+          }
+        }
+      }
+      // step 2, classify the non-default data into right places
+      std::vector<unsigned> fsplits;
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        if (!tree[nid].is_leaf()) fsplits.push_back(tree[nid].split_index());
+      }
+      std::sort(fsplits.begin(), fsplits.end());
+      fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
+      // start put things into right place
+      const unsigned nfeats = static_cast<unsigned>(fsplits.size());
+      #pragma omp parallel for schedule(dynamic, 1)
+      for (unsigned i = 0; i < nfeats; ++i) {
+        const unsigned fid = fsplits[i];
+        for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
+          const bst_uint ridx = it.rindex();
+          int nid = position[ridx];
+          if (nid == -1) continue;
+          // go back to parent, correct those who are not default
+          nid = tree[nid].parent();
+          if (tree[nid].split_index() == fid) {
+            if (it.fvalue() < tree[nid].split_cond()) {
+              position[ridx] = tree[nid].cleft();
+            } else {
+              position[ridx] = tree[nid].cright();
+            }
+          }
+        }
+      }
+    }
+    //--data fields--
+    const TrainParam &param;
+    // number of omp thread used during training
+    int nthread;
+    // Per feature: shuffle index of each feature index
+    std::vector<unsigned> feat_index;
+    // Instance Data: current node position in the tree of each instance
+    std::vector<int> position;
+    // PerThread x PerTreeNode: statistics for per thread construction
+    std::vector< std::vector<ThreadEntry> > stemp;
+    /*! \brief TreeNode Data: statistics for each constructed node */
+    std::vector<NodeEntry> snode;
+    /*! \brief queue of nodes to be expanded */
+    std::vector<int> qexpand;
+  };
+};
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
diff --git a/tree/updater_prune-inl.hpp b/tree/updater_prune-inl.hpp
new file mode 100644
index 000000000..e96951684
--- /dev/null
+++ b/tree/updater_prune-inl.hpp
@@ -0,0 +1,67 @@
+#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
+#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
+/*!
+ * \file updater_prune-inl.hpp
+ * \brief prune a tree given the statistics 
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include "./param.h"
+#include "./updater.h"
+
+namespace xgboost {
+namespace tree {
+/*! \brief pruner that prunes a tree after growing finishs */
+template<typename FMatrix>
+class TreePruner: public IUpdater<FMatrix> {
+ public:
+  virtual ~TreePruner(void) {}
+  // set training parameter
+  virtual void SetParam(const char *name, const char *val) {
+    param.SetParam(name, val);
+  }
+  // update the tree, do pruning
+  virtual void Update(const std::vector<bst_gpair> &gpair, FMatrix &fmat,
+                      const std::vector<unsigned> &root_index,
+                      const std::vector<RegTree*> &trees) {
+    for (size_t i = 0; i < trees.size(); ++i) {
+      this->DoPrune(*trees[i]);
+    }
+  }
+
+ private:
+  // try to prune off current leaf
+  inline void TryPruneLeaf(RegTree &tree, int nid, int depth) {
+    if (tree[nid].is_root()) return;
+    int pid = tree[nid].parent();
+    RegTree::NodeStat &s = tree.stat(pid);
+    ++s.leaf_child_cnt;
+
+    if (s.leaf_child_cnt >= 2 && param.need_prune(s.loss_chg, depth - 1)) {
+      // need to be pruned
+      tree.ChangeToLeaf(pid, param.learning_rate * s.base_weight);
+      // tail recursion
+      this->TryPruneLeaf(tree, pid, depth - 1);
+    }
+  }
+  /*! \brief do prunning of a tree */
+  inline void DoPrune(RegTree &tree) {
+    // initialize auxiliary statistics
+    for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
+      tree.stat(nid).leaf_child_cnt = 0;
+    }
+    for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
+      if (tree[nid].is_leaf()) {
+        this->TryPruneLeaf(tree, nid, tree.GetDepth(nid));
+      }
+    }
+  }
+
+ private:
+  // training parameter
+  TrainParam param;
+};
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
diff --git a/utils/xgboost_config.h b/utils/config.h
similarity index 100%
rename from utils/xgboost_config.h
rename to utils/config.h
diff --git a/utils/fmap.h b/utils/fmap.h
new file mode 100644
index 000000000..f9437cc6c
--- /dev/null
+++ b/utils/fmap.h
@@ -0,0 +1,80 @@
+#ifndef XGBOOST_UTILS_FMAP_H_
+#define XGBOOST_UTILS_FMAP_H_
+/*!
+ * \file fmap.h
+ * \brief helper class that holds the feature names and interpretations
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <string>
+#include <cstring>
+#include "./utils.h"
+
+namespace xgboost {
+namespace utils {
+/*! \brief helper class that holds the feature names and interpretations */
+class FeatMap {
+ public:
+  enum Type {
+    kIndicator = 0,
+    kQuantitive = 1,
+    kInteger = 2,
+    kFloat = 3
+  };
+  // function definitions
+  /*! \brief load feature map from text format */
+  inline void LoadText(const char *fname) {
+    FILE *fi = utils::FopenCheck(fname, "r");
+    this->LoadText(fi);
+    fclose(fi);
+  }
+  /*! \brief load feature map from text format */
+  inline void LoadText(FILE *fi) {
+    int fid;
+    char fname[1256], ftype[1256];
+    while (fscanf(fi, "%d\t%[^\t]\t%s\n", &fid, fname, ftype) == 3) {
+      this->PushBack(fid, fname, ftype);
+    }
+  }
+  /*!\brief push back feature map */
+  inline void PushBack(int fid, const char *fname, const char *ftype) {
+    utils::Check(fid == static_cast<int>(names_.size()), "invalid fmap format");
+    names_.push_back(std::string(fname));
+    types_.push_back(GetType(ftype));
+  }
+  inline void Clear(void) {
+    names_.clear(); types_.clear();
+  }
+  /*! \brief number of known features */
+  size_t size(void) const {
+    return names_.size();
+  }
+  /*! \brief return name of specific feature */
+  const char* name(size_t idx) const {
+    utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
+    return names_[idx].c_str();
+  }
+  /*! \brief return type of specific feature */
+  const Type& type(size_t idx) const {
+    utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
+    return types_[idx];
+  }
+
+ private:
+  inline static Type GetType(const char *tname) {
+    if (!strcmp("i", tname)) return kIndicator;
+    if (!strcmp("q", tname)) return kQuantitive;
+    if (!strcmp("int", tname)) return kInteger;
+    if (!strcmp("float", tname)) return kFloat;
+    utils::Error("unknown feature type, use i for indicator and q for quantity");
+    return kIndicator;
+  }
+  /*! \brief name of the feature */
+  std::vector<std::string> names_;
+  /*! \brief type of the feature */
+  std::vector<Type> types_;
+};
+
+}  // namespace utils
+}  // namespace xgboost
+#endif  // XGBOOST_FMAP_H_
diff --git a/utils/io.h b/utils/io.h
new file mode 100644
index 000000000..b52acf764
--- /dev/null
+++ b/utils/io.h
@@ -0,0 +1,104 @@
+#ifndef XGBOOST_UTILS_IO_H
+#define XGBOOST_UTILS_IO_H
+#include <cstdio>
+#include <vector>
+#include <string>
+#include "./utils.h"
+/*!
+ * \file io.h
+ * \brief general stream interface for serialization, I/O
+ * \author Tianqi Chen
+ */
+namespace xgboost {
+namespace utils {
+/*!
+ * \brief interface of stream I/O, used to serialize model
+ */
+class IStream {
+ public:
+  /*!
+   * \brief read data from stream
+   * \param ptr pointer to memory buffer
+   * \param size size of block
+   * \return usually is the size of data readed
+   */
+  virtual size_t Read(void *ptr, size_t size) = 0;
+  /*!
+   * \brief write data to stream
+   * \param ptr pointer to memory buffer
+   * \param size size of block
+   */
+  virtual void Write(const void *ptr, size_t size) = 0;
+  /*! \brief virtual destructor */
+  virtual ~IStream(void) {}
+
+ public:
+  // helper functions to write various of data structures
+  /*!
+   * \brief binary serialize a vector 
+   * \param vec vector to be serialized
+   */
+  template<typename T>
+  inline void Write(const std::vector<T> &vec) {
+    uint64_t sz = vec.size();
+    this->Write(&sz, sizeof(sz));
+    this->Write(&vec[0], sizeof(T) * sz);
+  }
+  /*!
+   * \brief binary load a vector 
+   * \param out_vec vector to be loaded
+   * \return whether load is successfull
+   */
+  template<typename T>
+  inline bool Read(std::vector<T> *out_vec) {
+    uint64_t sz;
+    if (this->Read(&sz, sizeof(sz)) == 0) return false;
+    out_vec->resize(sz);
+    if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false;
+    return true;
+  }
+  /*!
+   * \brief binary serialize a string
+   * \param str the string to be serialized
+   */ 
+  inline void Write(const std::string &str) {
+    uint64_t sz = str.length();
+    this->Write(&sz, sizeof(sz));
+    this->Write(&str[0], sizeof(char) * sz);
+  }
+  /*!
+   * \brief binary load a string
+   * \param out_str string to be loaded
+   * \return whether load is successful
+   */
+  inline bool Read(std::string *out_str) {
+    uint64_t sz;
+    if (this->Read(&sz, sizeof(sz)) == 0) return false;
+    out_str->resize(sz);
+    if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) return false;
+    return true;
+  }
+};
+
+/*! \brief implementation of file i/o stream */
+class FileStream : public IStream {
+ private:
+  FILE *fp;
+ public:
+  explicit FileStream(FILE *fp) {
+    this->fp = fp;
+  }
+  virtual size_t Read(void *ptr, size_t size) {
+    return fread(ptr, size, 1, fp);
+  }
+  virtual void Write(const void *ptr, size_t size) {
+    fwrite(ptr, size, 1, fp);
+  }
+  inline void Close(void) {
+    fclose(fp);
+  }
+};
+
+}  // namespace utils
+}  // namespace xgboost
+#endif
diff --git a/utils/iterator.h b/utils/iterator.h
new file mode 100644
index 000000000..32ab64aa9
--- /dev/null
+++ b/utils/iterator.h
@@ -0,0 +1,40 @@
+#ifndef XGBOOST_UTILS_ITERATOR_H
+#define XGBOOST_UTILS_ITERATOR_H
+#include <cstdio>
+/*!
+ * \file iterator.h
+ * \brief itertator interface
+ * \author Tianqi Chen
+ */
+namespace xgboost {
+namespace utils {
+/*!
+ * \brief iterator interface
+ * \tparam DType data type
+ */
+template<typename DType>
+class IIterator {
+ public:
+  /*!
+   * \brief set the parameter 
+   * \param name name of parameter
+   * \param val  value of parameter
+   */
+  virtual void SetParam(const char *name, const char *val) = 0;
+  /*! \brief initalize the iterator so that we can use the iterator */
+  virtual void Init(void) = 0;
+  /*! \brief set before first of the item */
+  virtual void BeforeFirst(void) = 0;
+  /*! \brief move to next item */
+  virtual bool Next(void) = 0;
+  /*! \brief get current data */
+  virtual const DType &Value(void) const = 0;
+ public:
+  /*! \brief constructor */
+  virtual ~IIterator(void) {}
+};
+
+}  // namespace utils
+}  // namespace xgboost
+#endif
+
diff --git a/utils/matrix_csr.h b/utils/matrix_csr.h
new file mode 100644
index 000000000..31022553b
--- /dev/null
+++ b/utils/matrix_csr.h
@@ -0,0 +1,123 @@
+#ifndef XGBOOST_UTILS_MATRIX_CSR_H_
+#define XGBOOST_UTILS_MATRIX_CSR_H_
+/*!
+ * \file matrix_csr.h
+ * \brief this file defines some easy to use STL based class for in memory sparse CSR matrix
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <algorithm>
+#include "./utils.h"
+
+namespace xgboost {
+namespace utils {
+/*!
+ * \brief a class used to help construct CSR format matrix,
+ *        can be used to convert row major CSR to column major CSR
+ * \tparam IndexType type of index used to store the index position, usually unsigned or size_t
+ * \tparam whether enabling the usage of aclist, this option must be enabled manually
+ */
+template<typename IndexType, bool UseAcList = false>
+struct SparseCSRMBuilder {
+ private:
+  /*! \brief dummy variable used in the indicator matrix construction */
+  std::vector<size_t> dummy_aclist;
+  /*! \brief pointer to each of the row */
+  std::vector<size_t> &rptr;
+  /*! \brief index of nonzero entries in each row */
+  std::vector<IndexType> &findex;
+  /*! \brief a list of active rows, used when many rows are empty */
+  std::vector<size_t> &aclist;
+
+ public:
+  SparseCSRMBuilder(std::vector<size_t> &p_rptr,
+                    std::vector<IndexType> &p_findex)
+      :rptr(p_rptr), findex(p_findex), aclist(dummy_aclist) {
+    Assert(!UseAcList, "enabling bug");
+  }
+  /*! \brief use with caution! rptr must be cleaned before use */
+  SparseCSRMBuilder(std::vector<size_t> &p_rptr,
+                    std::vector<IndexType> &p_findex,
+                    std::vector<size_t> &p_aclist)
+      :rptr(p_rptr), findex(p_findex), aclist(p_aclist) {
+    Assert(UseAcList, "must manually enable the option use aclist");
+  }
+
+ public:
+  /*!
+   * \brief step 1: initialize the number of rows in the data, not necessary exact
+   * \nrows number of rows in the matrix, can be smaller than expected
+   */
+  inline void InitBudget(size_t nrows = 0) {
+    if (!UseAcList) {
+      rptr.clear();
+      rptr.resize(nrows + 1, 0);
+    } else {
+      Assert(nrows + 1 == rptr.size(), "rptr must be initialized already");
+      this->Cleanup();
+    }
+  }
+  /*!
+   * \brief step 2: add budget to each rows, this function is called when aclist is used
+   * \param row_id the id of the row
+   * \param nelem  number of element budget add to this row
+   */
+  inline void AddBudget(size_t row_id, size_t nelem = 1) {
+    if (rptr.size() < row_id + 2) {
+      rptr.resize(row_id + 2, 0);
+    }
+    if (UseAcList) {
+      if (rptr[row_id + 1] == 0) aclist.push_back(row_id);
+    }
+    rptr[row_id + 1] += nelem;
+  }
+  /*! \brief step 3: initialize the necessary storage */
+  inline void InitStorage(void) {
+    // initialize rptr to be beginning of each segment
+    size_t start = 0;
+    if (!UseAcList) {
+      for (size_t i = 1; i < rptr.size(); i++) {
+        size_t rlen = rptr[i];
+        rptr[i] = start;
+        start += rlen;
+      }
+    } else {
+      // case with active list
+      std::sort(aclist.begin(), aclist.end());
+      for (size_t i = 0; i < aclist.size(); i++) {
+        size_t ridx = aclist[i];
+        size_t rlen = rptr[ridx + 1];
+        rptr[ridx + 1] = start;
+        // set previous rptr to right position if previous feature is not active
+        if (i == 0 || ridx != aclist[i - 1] + 1) rptr[ridx] = start;
+        start += rlen;
+      }
+    }
+    findex.resize(start);
+  }
+  /*!
+   * \brief step 4:
+   * used in indicator matrix construction, add new
+   * element to each row, the number of calls shall be exactly same as add_budget
+   */
+  inline void PushElem(size_t row_id, IndexType col_id) {
+    size_t &rp = rptr[row_id + 1];
+    findex[rp++] = col_id;
+  }
+  /*!
+   * \brief step 5: only needed when aclist is used
+   * clean up the rptr for next usage
+   */
+  inline void Cleanup(void) {
+    Assert(UseAcList, "this function can only be called use AcList");
+    for (size_t i = 0; i < aclist.size(); i++) {
+      const size_t ridx = aclist[i];
+      rptr[ridx] = 0; rptr[ridx + 1] = 0;
+    }
+    aclist.clear();
+  }
+};
+
+}  // namespace utils
+}  // namespace xgboost
+#endif
diff --git a/utils/xgboost_omp.h b/utils/omp.h
similarity index 71%
rename from utils/xgboost_omp.h
rename to utils/omp.h
index ea1e7173c..46127f631 100644
--- a/utils/xgboost_omp.h
+++ b/utils/omp.h
@@ -1,12 +1,10 @@
-#ifndef XGBOOST_OMP_H
-#define XGBOOST_OMP_H
+#ifndef XGBOOST_UTILS_OMP_H_
+#define XGBOOST_UTILS_OMP_H_
 /*!
- * \file xgboost_omp.h
+ * \file omp.h
  * \brief header to handle OpenMP compatibility issues
- *
- * \author Tianqi Chen: tianqi.tchen@gmail.com
+ * \author Tianqi Chen
  */
-
 #if defined(_OPENMP)
 #include <omp.h>
 #else
@@ -15,4 +13,4 @@ inline int omp_get_thread_num() { return 0; }
 inline int omp_get_num_threads() { return 1; }
 inline void omp_set_num_threads(int nthread) {}
 #endif
-#endif
+#endif  // XGBOOST_UTILS_OMP_H_
diff --git a/utils/random.h b/utils/random.h
new file mode 100644
index 000000000..18e40baff
--- /dev/null
+++ b/utils/random.h
@@ -0,0 +1,102 @@
+#ifndef XGBOOST_UTILS_RANDOM_H_
+#define XGBOOST_UTILS_RANDOM_H_
+/*!
+ * \file xgboost_random.h
+ * \brief PRNG to support random number generation
+ * \author Tianqi Chen: tianqi.tchen@gmail.com
+ *
+ * Use standard PRNG from stdlib
+ */
+#include <cmath>
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include "./utils.h"
+
+/*! namespace of PRNG */
+namespace xgboost {
+namespace random {
+
+/*! \brief seed the PRNG */
+inline void Seed(uint32_t seed) {
+  srand(seed);
+}
+/*! \brief return a real number uniform in [0,1) */
+inline double NextDouble(void) {
+  return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
+}
+/*! \brief return a real numer uniform in (0,1) */
+inline double NextDouble2(void) {
+  return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
+}
+
+/*! \brief return a random number */
+inline uint32_t NextUInt32(void) {
+  return (uint32_t)rand();
+}
+/*! \brief return a random number in n */
+inline uint32_t NextUInt32(uint32_t n) {
+  return (uint32_t)floor(NextDouble() * n);
+}
+/*! \brief return  x~N(0,1) */
+inline double SampleNormal() {
+  double x, y, s;
+  do {
+    x = 2 * NextDouble2() - 1.0;
+    y = 2 * NextDouble2() - 1.0;
+    s = x*x + y*y;
+  } while (s >= 1.0 || s == 0.0);
+
+  return x * sqrt(-2.0 * log(s) / s);
+}
+
+/*! \brief return iid x,y ~N(0,1) */
+inline void SampleNormal2D(double &xx, double &yy) {
+  double x, y, s;
+  do {
+    x = 2 * NextDouble2() - 1.0;
+    y = 2 * NextDouble2() - 1.0;
+    s = x*x + y*y;
+  } while (s >= 1.0 || s == 0.0);
+  double t = sqrt(-2.0 * log(s) / s);
+  xx = x * t;
+  yy = y * t;
+}
+/*! \brief return  x~N(mu,sigma^2) */
+inline double SampleNormal(double mu, double sigma) {
+  return SampleNormal() * sigma + mu;
+}
+/*! \brief  return 1 with probability p, coin flip */
+inline int SampleBinary(double p) {
+  return NextDouble() < p;
+}
+
+template<typename T>
+inline void Shuffle(T *data, size_t sz) {
+  if (sz == 0) return;
+  for (uint32_t i = (uint32_t)sz - 1; i > 0; i--){
+    std::swap(data[i], data[NextUInt32(i + 1)]);
+  }
+}
+// random shuffle the data inside, require PRNG 
+template<typename T>
+inline void Shuffle(std::vector<T> &data) {
+  Shuffle(&data[0], data.size());
+}
+
+/*! \brief random number generator with independent random number seed*/
+struct Random{
+  /*! \brief set random number seed */
+  inline void Seed(unsigned sd) {
+    this->rseed = sd;
+  }
+  /*! \brief return a real number uniform in [0,1) */
+  inline double RandDouble(void) {
+    return static_cast<double>( rand_r( &rseed ) ) / (static_cast<double>( RAND_MAX )+1.0);
+  }
+  // random number seed
+  unsigned rseed;
+};
+}  // namespace random
+}  // namespace xgboost
+#endif  // XGBOOST_UTILS_RANDOM_H_
diff --git a/utils/utils.h b/utils/utils.h
new file mode 100644
index 000000000..5244478f1
--- /dev/null
+++ b/utils/utils.h
@@ -0,0 +1,94 @@
+#ifndef XGBOOST_UTILS_UTILS_H_
+#define XGBOOST_UTILS_UTILS_H_
+/*!
+ * \file utils.h
+ * \brief simple utils to support the code
+ * \author Tianqi Chen
+ */
+#define _CRT_SECURE_NO_WARNINGS
+#ifdef _MSC_VER
+#define fopen64 fopen
+#else
+#ifdef _FILE_OFFSET_BITS
+#if _FILE_OFFSET_BITS == 32
+#warning "FILE OFFSET BITS defined to be 32 bit"
+#endif
+#endif
+
+#ifdef __APPLE__
+#define off64_t off_t
+#define fopen64 fopen
+#endif
+
+#define _FILE_OFFSET_BITS 64
+extern "C" {
+#include <sys/types.h>
+};
+#endif
+
+#ifdef _MSC_VER
+typedef unsigned char uint8_t;
+typedef unsigned short int uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long uint64_t;
+typedef long int64_t;
+#else
+#include <inttypes.h>
+#endif
+
+
+#include <cstdio>
+#include <cstdarg>
+#include <cstdlib>
+
+namespace xgboost {
+/*! \brief namespace for helper utils of the project */
+namespace utils {
+
+/*! \brief assert an condition is true, use this to handle debug information */
+inline void Assert(bool exp, const char *fmt, ...) {
+  if (!exp) {
+    va_list args;
+    va_start(args, fmt);
+    fprintf(stderr, "AssertError:");
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fprintf(stderr, "\n");
+    exit(-1);
+  }
+}
+
+/*!\brief same as assert, but this is intended to be used as message for user*/
+inline void Check(bool exp, const char *fmt, ...) {
+  if (!exp) {
+    va_list args;
+    va_start(args, fmt);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fprintf(stderr, "\n");
+    exit(-1);
+  }
+}
+
+/*! \brief report error message, same as check */
+inline void Error(const char *fmt, ...) {
+  {
+    va_list args;
+    va_start(args, fmt);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fprintf(stderr, "\n");
+    exit(-1);
+  }
+}
+
+/*! \brief replace fopen, report error when the file open fails */
+inline FILE *FopenCheck(const char *fname, const char *flag) {
+  FILE *fp = fopen64(fname, flag);
+  Check(fp != NULL, "can not open file \"%s\"\n", fname);
+  return fp;
+}
+
+}  // namespace utils
+}  // namespace xgboost
+#endif  // XGBOOST_UTILS_UTILS_H_
diff --git a/utils/xgboost_fmap.h b/utils/xgboost_fmap.h
deleted file mode 100644
index e549c4d7f..000000000
--- a/utils/xgboost_fmap.h
+++ /dev/null
@@ -1,123 +0,0 @@
-#ifndef XGBOOST_FMAP_H
-#define XGBOOST_FMAP_H
-/*!
- * \file xgboost_fmap.h
- * \brief helper class that holds the feature names and interpretations
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-#include <vector>
-#include <string>
-#include <cstring>
-#include "xgboost_utils.h"
-
-namespace xgboost{
-    namespace utils{
-        /*! \brief helper class that holds the feature names and interpretations */
-        class FeatMap{
-        public:
-            enum Type{
-                kIndicator = 0,
-                kQuantitive = 1,
-                kInteger = 2,
-                kFloat = 3
-            };
-        public:
-            /*! \brief load feature map from text format */
-            inline void LoadText(const char *fname){
-                FILE *fi = utils::FopenCheck(fname, "r");
-                this->LoadText(fi);
-                fclose(fi);
-            }
-            /*! \brief load feature map from text format */
-            inline void LoadText(FILE *fi){
-                int fid;
-                char fname[1256], ftype[1256];
-                while (fscanf(fi, "%d\t%[^\t]\t%s\n", &fid, fname, ftype) == 3){
-                    utils::Assert(fid == (int)names_.size(), "invalid fmap format");
-                    names_.push_back(std::string(fname));
-                    types_.push_back(GetType(ftype));
-                }
-            }
-            /*! \brief number of known features */
-            size_t size(void) const{
-                return names_.size();
-            }
-            /*! \brief return name of specific feature */
-            const char* name(size_t idx) const{
-                utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
-                return names_[idx].c_str();
-            }
-            /*! \brief return type of specific feature */
-            const Type& type(size_t idx) const{
-                utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
-                return types_[idx];
-            }
-        private:
-            inline static Type GetType(const char *tname){
-                if (!strcmp("i", tname)) return kIndicator;
-                if (!strcmp("q", tname)) return kQuantitive;
-                if (!strcmp("int", tname)) return kInteger;
-                if (!strcmp("float", tname)) return kFloat;
-                utils::Error("unknown feature type, use i for indicator and q for quantity");
-                return kIndicator;
-            }
-        private:
-            /*! \brief name of the feature */
-            std::vector<std::string> names_;
-            /*! \brief type of the feature */
-            std::vector<Type>        types_;
-        };
-    }; // namespace utils
-
-    namespace utils{
-        /*! \brief feature constraint, allow or disallow some feature during training */
-        class FeatConstrain{
-        public:
-            FeatConstrain(void){
-                default_state_ = +1;
-            }
-            /*!\brief set parameters */
-            inline void SetParam(const char *name, const char *val){
-                int a, b;
-                if (!strcmp(name, "fban")){
-                    this->ParseRange(val, a, b);
-                    this->SetRange(a, b, -1);
-                }
-                if (!strcmp(name, "fpass")){
-                    this->ParseRange(val, a, b);
-                    this->SetRange(a, b, +1);
-                }
-                if (!strcmp(name, "fdefault")){
-                    default_state_ = atoi(val);
-                }
-            }
-            /*! \brief whether constrain is specified */
-            inline bool HasConstrain(void) const {
-                return state_.size() != 0 && default_state_ == 1;
-            }
-            /*! \brief whether a feature index is banned or not */
-            inline bool NotBanned(unsigned index) const{
-                int rt = index < state_.size() ? state_[index] : default_state_;
-                if (rt == 0) rt = default_state_;
-                return rt == 1;
-            }
-        private:
-            inline void SetRange(int a, int b, int st){
-                if (b >(int)state_.size()) state_.resize(b, 0);
-                for (int i = a; i < b; ++i){
-                    state_[i] = st;
-                }
-            }
-            inline void ParseRange(const char *val, int &a, int &b){
-                if (sscanf(val, "%d-%d", &a, &b) == 2) return;
-                utils::Assert(sscanf(val, "%d", &a) == 1);
-                b = a + 1;
-            }
-            /*! \brief default state */
-            int default_state_;
-            /*! \brief whether the state here is, +1:pass, -1: ban, 0:default */
-            std::vector<int> state_;
-        };
-    }; // namespace utils
-}; // namespace xgboost
-#endif // XGBOOST_FMAP_H
diff --git a/utils/xgboost_matrix_csr.h b/utils/xgboost_matrix_csr.h
deleted file mode 100644
index 7ac9a30b6..000000000
--- a/utils/xgboost_matrix_csr.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*!
- * \file xgboost_matrix_csr.h
- * \brief this file defines some easy to use STL based class for in memory sparse CSR matrix
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-#ifndef XGBOOST_MATRIX_CSR_H
-#define XGBOOST_MATRIX_CSR_H
-#include <vector>
-#include <algorithm>
-#include "xgboost_utils.h"
-
-namespace xgboost{
-    namespace utils{
-        /*!
-         * \brief a class used to help construct CSR format matrix,
-         *        can be used to convert row major CSR to column major CSR
-         * \tparam IndexType type of index used to store the index position, usually unsigned or size_t
-         * \tparam whether enabling the usage of aclist, this option must be enabled manually
-         */
-        template<typename IndexType, bool UseAcList = false>
-        struct SparseCSRMBuilder{
-        private:
-            /*! \brief dummy variable used in the indicator matrix construction */
-            std::vector<size_t> dummy_aclist;
-            /*! \brief pointer to each of the row */
-            std::vector<size_t>    &rptr;
-            /*! \brief index of nonzero entries in each row */
-            std::vector<IndexType> &findex;
-            /*! \brief a list of active rows, used when many rows are empty */
-            std::vector<size_t>    &aclist;
-        public:
-            SparseCSRMBuilder(std::vector<size_t> &p_rptr,
-                std::vector<IndexType> &p_findex)
-                :rptr(p_rptr), findex(p_findex), aclist(dummy_aclist){
-                Assert(!UseAcList, "enabling bug");
-            }
-            /*! \brief use with caution! rptr must be cleaned before use */
-            SparseCSRMBuilder(std::vector<size_t> &p_rptr,
-                std::vector<IndexType> &p_findex,
-                std::vector<size_t> &p_aclist)
-                :rptr(p_rptr), findex(p_findex), aclist(p_aclist){
-                Assert(UseAcList, "must manually enable the option use aclist");
-            }
-        public:
-            /*!
-             * \brief step 1: initialize the number of rows in the data, not necessary exact
-             * \nrows number of rows in the matrix, can be smaller than expected
-             */
-            inline void InitBudget(size_t nrows = 0){
-                if (!UseAcList){
-                    rptr.clear();
-                    rptr.resize(nrows + 1, 0);
-                }
-                else{
-                    Assert(nrows + 1 == rptr.size(), "rptr must be initialized already");
-                    this->Cleanup();
-                }
-            }
-            /*!
-             * \brief step 2: add budget to each rows, this function is called when aclist is used
-             * \param row_id the id of the row
-             * \param nelem  number of element budget add to this row
-             */
-            inline void AddBudget(size_t row_id, size_t nelem = 1){
-                if (rptr.size() < row_id + 2){
-                    rptr.resize(row_id + 2, 0);
-                }
-                if (UseAcList){
-                    if (rptr[row_id + 1] == 0) aclist.push_back(row_id);
-                }
-                rptr[row_id + 1] += nelem;
-            }
-            /*! \brief step 3: initialize the necessary storage */
-            inline void InitStorage(void){
-                // initialize rptr to be beginning of each segment
-                size_t start = 0;
-                if (!UseAcList){
-                    for (size_t i = 1; i < rptr.size(); i++){
-                        size_t rlen = rptr[i];
-                        rptr[i] = start;
-                        start += rlen;
-                    }
-                }
-                else{
-                    // case with active list
-                    std::sort(aclist.begin(), aclist.end());
-
-                    for (size_t i = 0; i < aclist.size(); i++){
-                        size_t ridx = aclist[i];
-                        size_t rlen = rptr[ridx + 1];
-                        rptr[ridx + 1] = start;
-                        // set previous rptr to right position if previous feature is not active
-                        if (i == 0 || ridx != aclist[i - 1] + 1) rptr[ridx] = start;
-                        start += rlen;
-                    }
-                }
-                findex.resize(start);
-            }
-            /*!
-             * \brief step 4:
-             * used in indicator matrix construction, add new
-             * element to each row, the number of calls shall be exactly same as add_budget
-             */
-            inline void PushElem(size_t row_id, IndexType col_id){
-                size_t &rp = rptr[row_id + 1];
-                findex[rp++] = col_id;
-            }
-            /*!
-             * \brief step 5: only needed when aclist is used
-             * clean up the rptr for next usage
-             */
-            inline void Cleanup(void){
-                Assert(UseAcList, "this function can only be called use AcList");
-                for (size_t i = 0; i < aclist.size(); i++){
-                    const size_t ridx = aclist[i];
-                    rptr[ridx] = 0; rptr[ridx + 1] = 0;
-                }
-                aclist.clear();
-            }
-        };
-    };
-
-    namespace utils{
-        /*!
-         * \brief simple sparse matrix container
-         * \tparam IndexType type of index used to store the index position, usually unsigned or size_t
-         */
-        template<typename IndexType>
-        struct SparseCSRMat{
-        private:
-            /*! \brief pointer to each of the row */
-            std::vector<size_t>    rptr;
-            /*! \brief index of nonzero entries in each row */
-            std::vector<IndexType> findex;
-        public:
-            /*! \brief matrix builder*/
-            SparseCSRMBuilder<IndexType> builder;
-        public:
-            SparseCSRMat(void) :builder(rptr, findex){
-            }
-        public:
-            /*! \return number of rows in the matrx */
-            inline size_t NumRow(void) const{
-                return rptr.size() - 1;
-            }
-            /*! \return number of elements r-th row */
-            inline size_t NumElem(size_t r) const{
-                return rptr[r + 1] - rptr[r];
-            }
-            /*! \return r-th row */
-            inline const IndexType *operator[](size_t r) const{
-                return &findex[rptr[r]];
-            }
-        };
-    };
-};
-#endif
diff --git a/utils/xgboost_random.h b/utils/xgboost_random.h
deleted file mode 100644
index c4a4e763e..000000000
--- a/utils/xgboost_random.h
+++ /dev/null
@@ -1,148 +0,0 @@
-#ifndef XGBOOST_RANDOM_H
-#define XGBOOST_RANDOM_H
-/*!
- * \file xgboost_random.h
- * \brief PRNG to support random number generation
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- *
- * Use standard PRNG from stdlib
- */
-#include <cmath>
-#include <cstdlib>
-#include <vector>
-
-#ifdef _MSC_VER
-typedef unsigned char uint8_t;
-typedef unsigned short int uint16_t;
-typedef unsigned int  uint32_t;
-#else
-#include <inttypes.h>
-#endif
-
-/*! namespace of PRNG */
-namespace xgboost{
-    namespace random{
-        /*! \brief seed the PRNG */
-        inline void Seed(uint32_t seed){
-            srand(seed);
-        }
-
-        /*! \brief return a real number uniform in [0,1) */
-        inline double NextDouble(){
-            return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
-        }
-        /*! \brief return a real numer uniform in (0,1) */
-        inline double NextDouble2(){
-            return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
-        }
-    };
-
-    namespace random{
-        /*! \brief return a random number */
-        inline uint32_t NextUInt32(void){
-            return (uint32_t)rand();
-        }
-        /*! \brief return a random number in n */
-        inline uint32_t NextUInt32(uint32_t n){
-            return (uint32_t)floor(NextDouble() * n);
-        }
-        /*! \brief return  x~N(0,1) */
-        inline double SampleNormal(){
-            double x, y, s;
-            do{
-                x = 2 * NextDouble2() - 1.0;
-                y = 2 * NextDouble2() - 1.0;
-                s = x*x + y*y;
-            } while (s >= 1.0 || s == 0.0);
-
-            return x * sqrt(-2.0 * log(s) / s);
-        }
-
-        /*! \brief return iid x,y ~N(0,1) */
-        inline void SampleNormal2D(double &xx, double &yy){
-            double x, y, s;
-            do{
-                x = 2 * NextDouble2() - 1.0;
-                y = 2 * NextDouble2() - 1.0;
-                s = x*x + y*y;
-            } while (s >= 1.0 || s == 0.0);
-            double t = sqrt(-2.0 * log(s) / s);
-            xx = x * t;
-            yy = y * t;
-        }
-        /*! \brief return  x~N(mu,sigma^2) */
-        inline double SampleNormal(double mu, double sigma){
-            return SampleNormal() * sigma + mu;
-        }
-
-        /*! \brief  return 1 with probability p, coin flip */
-        inline int SampleBinary(double p){
-            return NextDouble() < p;
-        }
-
-        /*! \brief  return distribution from Gamma( alpha, beta ) */
-        inline double SampleGamma(double alpha, double beta) {
-            if (alpha < 1.0) {
-                double u;
-                do {
-                    u = NextDouble();
-                } while (u == 0.0);
-                return SampleGamma(alpha + 1.0, beta) * pow(u, 1.0 / alpha);
-            }
-            else {
-                double d, c, x, v, u;
-                d = alpha - 1.0 / 3.0;
-                c = 1.0 / sqrt(9.0 * d);
-                do {
-                    do {
-                        x = SampleNormal();
-                        v = 1.0 + c*x;
-                    } while (v <= 0.0);
-                    v = v * v * v;
-                    u = NextDouble();
-                } while ((u >= (1.0 - 0.0331 * (x*x) * (x*x)))
-                    && (log(u) >= (0.5 * x * x + d * (1.0 - v + log(v)))));
-                return d * v / beta;
-            }
-        }
-
-        template<typename T>
-        inline void Exchange(T &a, T &b){
-            T c;
-            c = a;
-            a = b;
-            b = c;
-        }
-
-        template<typename T>
-        inline void Shuffle(T *data, size_t sz){
-            if (sz == 0) return;
-            for (uint32_t i = (uint32_t)sz - 1; i > 0; i--){
-                Exchange(data[i], data[NextUInt32(i + 1)]);
-            }
-        }
-        // random shuffle the data inside, require PRNG 
-        template<typename T>
-        inline void Shuffle(std::vector<T> &data){
-            Shuffle(&data[0], data.size());
-        }
-    };
-    
-    namespace random{
-        /*! \brief random number generator with independent random number seed*/
-        struct Random{
-            /*! \brief set random number seed */
-            inline void Seed( unsigned sd ){
-                this->rseed = sd;
-            }
-            /*! \brief return a real number uniform in [0,1) */
-            inline double RandDouble( void ){               
-                return static_cast<double>( rand_r( &rseed ) ) / (static_cast<double>( RAND_MAX )+1.0);
-            }
-            // random number seed
-            unsigned rseed;
-        };
-    };
-};
-
-#endif
diff --git a/utils/xgboost_stream.h b/utils/xgboost_stream.h
deleted file mode 100644
index b7b513d18..000000000
--- a/utils/xgboost_stream.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef XGBOOST_STREAM_H
-#define XGBOOST_STREAM_H
-
-#include <cstdio>
-/*!
- * \file xgboost_stream.h
- * \brief general stream interface for serialization
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-namespace xgboost{
-    namespace utils{
-        /*!
-         * \brief interface of stream I/O, used to serialize model
-         */
-        class IStream{
-        public:
-            /*!
-             * \brief read data from stream
-             * \param ptr pointer to memory buffer
-             * \param size size of block
-             * \return usually is the size of data readed
-             */
-            virtual size_t Read(void *ptr, size_t size) = 0;
-            /*!
-             * \brief write data to stream
-             * \param ptr pointer to memory buffer
-             * \param size size of block
-             */
-            virtual void Write(const void *ptr, size_t size) = 0;
-            /*! \brief virtual destructor */
-            virtual ~IStream(void){}
-        };
-
-        /*! \brief implementation of file i/o stream */
-        class FileStream : public IStream{
-        private:
-            FILE *fp;
-        public:
-            FileStream(FILE *fp){
-                this->fp = fp;
-            }
-            virtual size_t Read(void *ptr, size_t size){
-                return fread(ptr, size, 1, fp);
-            }
-            virtual void Write(const void *ptr, size_t size){
-                fwrite(ptr, size, 1, fp);
-            }
-            inline void Close(void){
-                fclose(fp);
-            }
-        };
-    };
-};
-#endif
diff --git a/utils/xgboost_utils.h b/utils/xgboost_utils.h
deleted file mode 100644
index e7746a881..000000000
--- a/utils/xgboost_utils.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef XGBOOST_UTILS_H
-#define XGBOOST_UTILS_H
-/*!
- * \file xgboost_utils.h
- * \brief simple utils to support the code
- * \author Tianqi Chen: tianqi.tchen@gmail.com
- */
-
-#define _CRT_SECURE_NO_WARNINGS
-#ifdef _MSC_VER
-#define fopen64 fopen
-#else
-
-// use 64 bit offset, either to include this header in the beginning, or 
-#ifdef _FILE_OFFSET_BITS
-#if _FILE_OFFSET_BITS == 32
-#warning "FILE OFFSET BITS defined to be 32 bit"
-#endif
-#endif
-
-#ifdef __APPLE__
-#define off64_t off_t
-#define fopen64 fopen
-#endif
-
-#define _FILE_OFFSET_BITS 64
-extern "C"{    
-#include <sys/types.h>
-};
-#include <cstdio>
-#endif
-
-#include <cstdio>
-#include <cstdlib>
-
-namespace xgboost{
-    /*! \brief namespace for helper utils of the project */
-    namespace utils{
-        inline void Error(const char *msg){
-            fprintf(stderr, "Error:%s\n", msg);
-            fflush(stderr);
-            exit(-1);
-        }
-
-        inline void Assert(bool exp){
-            if (!exp) Error("AssertError");
-        }
-
-        inline void Assert(bool exp, const char *msg){
-            if (!exp) Error(msg);
-        }
-
-        inline void Warning(const char *msg){
-            fprintf(stderr, "warning:%s\n", msg);
-        }
-
-        /*! \brief replace fopen, report error when the file open fails */
-        inline FILE *FopenCheck(const char *fname, const char *flag){
-            FILE *fp = fopen64(fname, flag);
-            if (fp == NULL){
-                fprintf(stderr, "can not open file \"%s\" \n", fname);
-                fflush(stderr);
-                exit(-1);
-            }
-            return fp;
-        }
-    };
-};
-
-#endif
diff --git a/xgunity.cpp b/xgunity.cpp
new file mode 100644
index 000000000..9f5afdd19
--- /dev/null
+++ b/xgunity.cpp
@@ -0,0 +1,27 @@
+#include "tree/updater.h"
+#include "gbm/gbm.h"
+#include "utils/omp.h"
+#include "utils/utils.h"
+#include "utils/random.h"
+#include "learner/objective.h"
+#include "learner/learner-inl.hpp"
+
+// pass compile flag
+
+using namespace xgboost;
+int main(void){
+
+  FMatrixS fmat(NULL);  
+  tree::RegTree tree;
+  tree::TrainParam param;
+  std::vector<bst_gpair> gpair;
+  std::vector<unsigned> roots;
+  tree::IUpdater<FMatrixS> *up = tree::CreateUpdater<FMatrixS>("prune"); 
+  gbm::IGradBooster<FMatrixS> *gbm = new gbm::GBTree<FMatrixS>();
+  std::vector<tree::RegTree*> trees;
+  learner::IObjFunction *func = learner::CreateObjFunction("reg:linear");
+  learner::BoostLearner<FMatrixS> *learner= new learner::BoostLearner<FMatrixS>();
+  up->Update(gpair, fmat, roots, trees);
+
+  return 0;
+}

From dafa44753a2299d606ad07f126aa7abc94f7fe64 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 15 Aug 2014 20:22:54 -0700
Subject: [PATCH 02/52] chg readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1d0a836b1..5eae58052 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.
 
 xgboost-unity
 =======
-experimental branch: refactor xgboost, cleaner code, more flexibility
+experimental branch(not usable yet): refactor xgboost, cleaner code, more flexibility
 
 Build
 ======

From 3589e8252f45f93df2d3cea178b2c8531d5ff30a Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 15 Aug 2014 21:02:33 -0700
Subject: [PATCH 03/52] refactor config

---
 learner/objective.h           |   9 +-
 tree/updater_colmaker-inl.hpp |   2 +-
 utils/config.h                | 391 ++++++++++++++++------------------
 3 files changed, 189 insertions(+), 213 deletions(-)

diff --git a/learner/objective.h b/learner/objective.h
index 2ae5b7d3e..e38f7cfe4 100644
--- a/learner/objective.h
+++ b/learner/objective.h
@@ -57,7 +57,6 @@ class IObjFunction{
     return base_score;
   }
 };
-
 }  // namespace learner
 }  // namespace xgboost
 
@@ -68,10 +67,10 @@ namespace xgboost {
 namespace learner {
 /*! \brief factory funciton to create objective function by name */
 inline IObjFunction* CreateObjFunction(const char *name) {
-  if (!strcmp("reg:linear", name)) return new RegLossObj( LossType::kLinearSquare );
-  if (!strcmp("reg:logistic", name)) return new RegLossObj( LossType::kLogisticNeglik );
-  if (!strcmp("binary:logistic", name)) return new RegLossObj( LossType::kLogisticClassify );
-  if (!strcmp("binary:logitraw", name)) return new RegLossObj( LossType::kLogisticRaw );
+  if (!strcmp("reg:linear", name)) return new RegLossObj(LossType::kLinearSquare);
+  if (!strcmp("reg:logistic", name)) return new RegLossObj(LossType::kLogisticNeglik);
+  if (!strcmp("binary:logistic", name)) return new RegLossObj(LossType::kLogisticClassify);
+  if (!strcmp("binary:logitraw", name)) return new RegLossObj(LossType::kLogisticRaw);
   utils::Error("unknown objective function type: %s", name);
   return NULL;
 }
diff --git a/tree/updater_colmaker-inl.hpp b/tree/updater_colmaker-inl.hpp
index 3223eec6a..483f0fda8 100644
--- a/tree/updater_colmaker-inl.hpp
+++ b/tree/updater_colmaker-inl.hpp
@@ -27,7 +27,7 @@ class ColMaker: public IUpdater<FMatrix> {
                       FMatrix &fmat,
                       const std::vector<unsigned> &root_index,
                       const std::vector<RegTree*> &trees) {
-    fmat.InitColAccess();
+    
     for (size_t i = 0; i < trees.size(); ++i) {
       Builder builder(param);
       builder.Update(gpair, fmat, root_index, trees[i]);
diff --git a/utils/config.h b/utils/config.h
index 22a343370..921d5f8e0 100644
--- a/utils/config.h
+++ b/utils/config.h
@@ -1,219 +1,196 @@
-#ifndef XGBOOST_CONFIG_H
-#define XGBOOST_CONFIG_H
+#ifndef XGBOOST_UTILS_CONFIG_H_
+#define XGBOOST_UTILS_CONFIG_H_
 /*!
- * \file xgboost_config.h
+ * \file config.h
  * \brief helper class to load in configures from file
- * \author Tianqi Chen: tianqi.tchen@gmail.com
+ * \author Tianqi Chen
  */
-#define _CRT_SECURE_NO_WARNINGS
 #include <cstdio>
 #include <cstring>
 #include <string>
-#include "xgboost_utils.h"
-#include <vector>
+#include <istream>
+#include <fstream>
+#include "./utils.h"
 
-namespace xgboost{
-    namespace utils{
-        /*!
-         * \brief an iterator that iterates over a configure file and gets the configures
-         */
-        class ConfigIterator{
-        public:
-            /*!
-             * \brief constructor
-             * \param fname name of configure file
-             */
-            ConfigIterator(const char *fname){
-                fi = FopenCheck(fname, "r");
-                ch_buf = fgetc(fi);
-            }
-            /*! \brief destructor */
-            ~ConfigIterator(){
-                fclose(fi);
-            }
-            /*!
-             * \brief get current name, called after Next returns true
-             * \return current parameter name
-             */
-            inline const char *name(void)const{
-                return s_name;
-            }
-            /*!
-             * \brief get current value, called after Next returns true
-             * \return current parameter value
-             */
-            inline const char *val(void) const{
-                return s_val;
-            }
-            /*!
-             * \brief move iterator to next position
-             * \return true if there is value in next position
-             */
-            inline bool Next(void){
-                while (!feof(fi)){
-                    GetNextToken(s_name);
-                    if (s_name[0] == '=')  return false;
-                    if (GetNextToken(s_buf) || s_buf[0] != '=') return false;
-                    if (GetNextToken(s_val) || s_val[0] == '=') return false;
-                    return true;
-                }
-                return false;
-            }
-        private:
-            FILE *fi;
-            char ch_buf;
-            char s_name[256], s_val[256], s_buf[246];
+namespace xgboost {
+namespace utils {
+/*! 
+ * \brief base implementation of config reader
+ */
+class ConfigReaderBase {
+ public:
+  /*! 
+   * \brief get current name, called after Next returns true
+   * \return current parameter name 
+   */
+  inline const char *name(void) const {
+    return s_name;
+  }
+  /*! 
+   * \brief get current value, called after Next returns true
+   * \return current parameter value 
+   */
+  inline const char *val(void) const {
+    return s_val;
+  }
+  /*! 
+   * \brief move iterator to next position
+   * \return true if there is value in next position
+   */
+  inline bool Next(void) {
+    while (!this->IsEnd()) {
+      GetNextToken(s_name);
+      if (s_name[0] == '=') return false;
+      if (GetNextToken( s_buf ) || s_buf[0] != '=') return false;
+      if (GetNextToken( s_val ) || s_val[0] == '=') return false;
+      return true;
+    }
+    return false;
+  }
+  // called before usage
+  inline void Init(void) {
+    ch_buf = this->GetChar();
+  }
 
-            inline void SkipLine(){
-                do{
-                    ch_buf = fgetc(fi);
-                } while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r');
-            }
+ protected:
+  /*!
+   * \brief to be implemented by subclass,
+   * get next token, return EOF if end of file 
+   */
+  virtual char GetChar(void) = 0;
+  /*! \brief to be implemented by child, check if end of stream */
+  virtual bool IsEnd(void) = 0;
 
-            inline void ParseStr(char tok[]){
-                int i = 0;
-                while ((ch_buf = fgetc(fi)) != EOF){
-                    switch (ch_buf){
-                    case '\\': tok[i++] = fgetc(fi); break;
-                    case '\"': tok[i++] = '\0';
-                        return;
-                    case '\r':
-                    case '\n': Error("unterminated string"); break;
-                    default: tok[i++] = ch_buf;
-                    }
-                }
-                Error("unterminated string");
-            }
-            // return newline 
-            inline bool GetNextToken(char tok[]){
-                int i = 0;
-                bool new_line = false;
-                while (ch_buf != EOF){
-                    switch (ch_buf){
-                    case '#': SkipLine(); new_line = true; break;
-                    case '\"':
-                        if (i == 0){
-                            ParseStr(tok); ch_buf = fgetc(fi); return new_line;
-                        }
-                        else{
-                            Error("token followed directly by string");
-                        }
-                    case '=':
-                        if (i == 0) {
-                            ch_buf = fgetc(fi);
-                            tok[0] = '=';
-                            tok[1] = '\0';
-                        }
-                        else{
-                            tok[i] = '\0';
-                        }
-                        return new_line;
-                    case '\r':
-                    case '\n':
-                        if (i == 0) new_line = true;
-                    case '\t':
-                    case ' ':
-                        ch_buf = fgetc(fi);
-                        if (i > 0){
-                            tok[i] = '\0';
-                            return new_line;
-                        }
-                        break;
-                    default:
-                        tok[i++] = ch_buf;
-                        ch_buf = fgetc(fi);
-                        break;
-                    }
-                }
-                return true;
-            }
-        };
-    };
+ private:
+  char ch_buf;
+  char s_name[100000], s_val[100000], s_buf[100000];
 
-    namespace utils{
-        /*!
-         * \brief a class that save parameter configurations
-         *        temporally and allows to get them out later
-         *        there are two kinds of priority in ConfigSaver
-         */
-        class ConfigSaver{
-        public:
-            /*! \brief constructor */
-            ConfigSaver(void){ idx = 0; }
-            /*! \brief clear all saves */
-            inline void Clear(void){
-                idx = 0;
-                names.clear(); values.clear();
-                names_high.clear(); values_high.clear();
-            }
-            /*!
-             * \brief push back a parameter setting
-             * \param name name of parameter
-             * \param val  value of parameter
-             * \param priority whether the setting has higher priority: high priority occurs
-             *        latter when read from ConfigSaver, and can overwrite existing settings
-             */
-            inline void PushBack(const char *name, const char *val, int priority = 0){
-                if (priority == 0){
-                    names.push_back(std::string(name));
-                    values.push_back(std::string(val));
-                }
-                else{
-                    names_high.push_back(std::string(name));
-                    values_high.push_back(std::string(val));
-                }
-            }
-            /*! \brief set pointer to beginning of the ConfigSaver */
-            inline void BeforeFirst(void){
-                idx = 0;
-            }
-            /*!
-             * \brief move iterator to next position
-             * \return true if there is value in next position
-             */
-            inline bool Next(void){
-                if (idx >= names.size() + names_high.size()){
-                    return false;
-                }
-                idx++;
-                return true;
-            }
-            /*!
-             * \brief get current name, called after Next returns true
-             * \return current parameter name
-             */
-            inline const char *name(void) const{
-                Assert(idx > 0, "can't call name before first");
-                size_t i = idx - 1;
-                if (i >= names.size()){
-                    return names_high[i - names.size()].c_str();
-                }
-                else{
-                    return names[i].c_str();
-                }
-            }
-            /*!
-             * \brief get current value, called after Next returns true
-             * \return current parameter value
-             */
-            inline const char *val(void) const{
-                Assert(idx > 0, "can't call name before first");
-                size_t i = idx - 1;
-                if (i >= values.size()){
-                    return values_high[i - values.size()].c_str();
-                }
-                else{
-                    return values[i].c_str();
-                }
-            }
-        private:
-            std::vector<std::string> names;
-            std::vector<std::string> values;
-            std::vector<std::string> names_high;
-            std::vector<std::string> values_high;
-            size_t idx;
-        };
-    };
+  inline void SkipLine(void) {
+    do {
+      ch_buf = this->GetChar();
+    } while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r');
+  }
+
+  inline void ParseStr(char tok[]) {
+    int i = 0;
+    while ((ch_buf = this->GetChar()) != EOF) {
+      switch (ch_buf) {
+        case '\\': tok[i++] = this->GetChar(); break;
+        case '\"': tok[i++] = '\0'; return;
+        case '\r':
+        case '\n': Error("ConfigReader: unterminated string");
+        default: tok[i++] = ch_buf;
+      }
+    }
+    Error("ConfigReader: unterminated string");
+  }
+  inline void ParseStrML(char tok[]) {
+    int i = 0;
+    while ((ch_buf = this->GetChar()) != EOF) {
+      switch (ch_buf) {
+        case '\\': tok[i++] = this->GetChar(); break;
+        case '\'': tok[i++] = '\0'; return;
+        default: tok[i++] = ch_buf;
+      }
+    }
+    Error("unterminated string");
+  }
+  // return newline
+  inline bool GetNextToken(char tok[]) {
+    int i = 0;
+    bool new_line = false;
+    while (ch_buf != EOF) {
+      switch (ch_buf) {
+        case '#' : SkipLine(); new_line = true; break;
+        case '\"':
+          if (i == 0) {
+            ParseStr(tok); ch_buf = this->GetChar(); return new_line;
+          } else {
+            Error("ConfigReader: token followed directly by string");
+          }
+        case '\'':
+          if (i == 0) {
+            ParseStrML( tok ); ch_buf = this->GetChar(); return new_line;
+          } else {
+            Error("ConfigReader: token followed directly by string");
+          }
+        case '=':
+          if (i == 0) {
+            ch_buf = this->GetChar();
+            tok[0] = '=';
+            tok[1] = '\0';
+          } else {
+            tok[i] = '\0';
+          }
+          return new_line;
+        case '\r':
+        case '\n':
+          if (i == 0) new_line = true;
+        case '\t':
+        case ' ' :
+          ch_buf = this->GetChar();
+          if (i > 0) {
+            tok[i] = '\0';
+            return new_line;
+          }
+          break;
+        default:
+          tok[i++] = ch_buf;
+          ch_buf = this->GetChar();
+          break;
+      }
+    }
+    return true;
+  }
 };
-#endif
+/*!
+ * \brief an iterator use stream base, allows use all types of istream
+ */
+class ConfigStreamReader: public ConfigReaderBase {
+ public:
+  /*! 
+   * \brief constructor 
+   * \param istream input stream 
+   */
+  explicit ConfigStreamReader(std::istream &fin) : fin(fin) {}
 
+ protected:
+  virtual char GetChar(void) {
+    return fin.get();
+  }
+  /*! \brief to be implemented by child, check if end of stream */
+  virtual bool IsEnd(void) {
+    return fin.eof();
+  }
+
+ private:
+  std::istream &fin;
+};
+
+/*! 
+ * \brief an iterator that iterates over a configure file and gets the configures
+ */
+class ConfigIterator: public ConfigStreamReader {
+ public:
+  /*! 
+   * \brief constructor 
+   * \param fname name of configure file
+   */
+  explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) {
+    fi.open(fname);
+    if (fi.fail()) {
+      utils::Error("cannot open file %s", fname);
+    }
+    ConfigReaderBase::Init();
+  }
+  /*! \brief destructor */
+  ~ConfigIterator(void) {
+    fi.close();
+  }
+
+ private:
+  std::ifstream fi;
+};
+}  // namespace utils
+}  // namespace xgboost
+#endif  // XGBOOST_UTILS_CONFIG_H_

From 34dd409c5b2d820c9e120d0e195680e04e4c5a00 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 15 Aug 2014 21:04:23 -0700
Subject: [PATCH 04/52] mv code into src

---
 Makefile                                    |  2 +-
 data.h => src/data.h                        |  0
 {gbm => src/gbm}/gbm.h                      |  0
 {gbm => src/gbm}/gbtree-inl.hpp             |  0
 {learner => src/learner}/dmatrix.h          |  0
 {learner => src/learner}/evaluation-inl.hpp |  0
 {learner => src/learner}/evaluation.h       |  0
 {learner => src/learner}/helper_utils.h     |  0
 {learner => src/learner}/learner-inl.hpp    |  0
 {learner => src/learner}/objective-inl.hpp  |  0
 {learner => src/learner}/objective.h        |  0
 {tree => src/tree}/model.h                  |  0
 {tree => src/tree}/param.h                  |  0
 {tree => src/tree}/updater.h                |  0
 {tree => src/tree}/updater_colmaker-inl.hpp |  0
 {tree => src/tree}/updater_prune-inl.hpp    |  0
 {utils => src/utils}/config.h               |  0
 {utils => src/utils}/fmap.h                 |  0
 {utils => src/utils}/io.h                   |  0
 {utils => src/utils}/iterator.h             |  0
 {utils => src/utils}/matrix_csr.h           |  0
 {utils => src/utils}/omp.h                  |  0
 {utils => src/utils}/random.h               |  0
 {utils => src/utils}/utils.h                |  0
 xgunity.cpp                                 | 27 ---------------------
 25 files changed, 1 insertion(+), 28 deletions(-)
 rename data.h => src/data.h (100%)
 rename {gbm => src/gbm}/gbm.h (100%)
 rename {gbm => src/gbm}/gbtree-inl.hpp (100%)
 rename {learner => src/learner}/dmatrix.h (100%)
 rename {learner => src/learner}/evaluation-inl.hpp (100%)
 rename {learner => src/learner}/evaluation.h (100%)
 rename {learner => src/learner}/helper_utils.h (100%)
 rename {learner => src/learner}/learner-inl.hpp (100%)
 rename {learner => src/learner}/objective-inl.hpp (100%)
 rename {learner => src/learner}/objective.h (100%)
 rename {tree => src/tree}/model.h (100%)
 rename {tree => src/tree}/param.h (100%)
 rename {tree => src/tree}/updater.h (100%)
 rename {tree => src/tree}/updater_colmaker-inl.hpp (100%)
 rename {tree => src/tree}/updater_prune-inl.hpp (100%)
 rename {utils => src/utils}/config.h (100%)
 rename {utils => src/utils}/fmap.h (100%)
 rename {utils => src/utils}/io.h (100%)
 rename {utils => src/utils}/iterator.h (100%)
 rename {utils => src/utils}/matrix_csr.h (100%)
 rename {utils => src/utils}/omp.h (100%)
 rename {utils => src/utils}/random.h (100%)
 rename {utils => src/utils}/utils.h (100%)
 delete mode 100644 xgunity.cpp

diff --git a/Makefile b/Makefile
index 3a816b78e..61d3f6de0 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ OBJ =
 all: $(BIN) $(OBJ)
 export LDFLAGS= -pthread -lm 
 
-xgunity.exe: xgunity.cpp
+xgunity.exe: src/xgunity.cpp
 
 
 $(BIN) : 
diff --git a/data.h b/src/data.h
similarity index 100%
rename from data.h
rename to src/data.h
diff --git a/gbm/gbm.h b/src/gbm/gbm.h
similarity index 100%
rename from gbm/gbm.h
rename to src/gbm/gbm.h
diff --git a/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
similarity index 100%
rename from gbm/gbtree-inl.hpp
rename to src/gbm/gbtree-inl.hpp
diff --git a/learner/dmatrix.h b/src/learner/dmatrix.h
similarity index 100%
rename from learner/dmatrix.h
rename to src/learner/dmatrix.h
diff --git a/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
similarity index 100%
rename from learner/evaluation-inl.hpp
rename to src/learner/evaluation-inl.hpp
diff --git a/learner/evaluation.h b/src/learner/evaluation.h
similarity index 100%
rename from learner/evaluation.h
rename to src/learner/evaluation.h
diff --git a/learner/helper_utils.h b/src/learner/helper_utils.h
similarity index 100%
rename from learner/helper_utils.h
rename to src/learner/helper_utils.h
diff --git a/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
similarity index 100%
rename from learner/learner-inl.hpp
rename to src/learner/learner-inl.hpp
diff --git a/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
similarity index 100%
rename from learner/objective-inl.hpp
rename to src/learner/objective-inl.hpp
diff --git a/learner/objective.h b/src/learner/objective.h
similarity index 100%
rename from learner/objective.h
rename to src/learner/objective.h
diff --git a/tree/model.h b/src/tree/model.h
similarity index 100%
rename from tree/model.h
rename to src/tree/model.h
diff --git a/tree/param.h b/src/tree/param.h
similarity index 100%
rename from tree/param.h
rename to src/tree/param.h
diff --git a/tree/updater.h b/src/tree/updater.h
similarity index 100%
rename from tree/updater.h
rename to src/tree/updater.h
diff --git a/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
similarity index 100%
rename from tree/updater_colmaker-inl.hpp
rename to src/tree/updater_colmaker-inl.hpp
diff --git a/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp
similarity index 100%
rename from tree/updater_prune-inl.hpp
rename to src/tree/updater_prune-inl.hpp
diff --git a/utils/config.h b/src/utils/config.h
similarity index 100%
rename from utils/config.h
rename to src/utils/config.h
diff --git a/utils/fmap.h b/src/utils/fmap.h
similarity index 100%
rename from utils/fmap.h
rename to src/utils/fmap.h
diff --git a/utils/io.h b/src/utils/io.h
similarity index 100%
rename from utils/io.h
rename to src/utils/io.h
diff --git a/utils/iterator.h b/src/utils/iterator.h
similarity index 100%
rename from utils/iterator.h
rename to src/utils/iterator.h
diff --git a/utils/matrix_csr.h b/src/utils/matrix_csr.h
similarity index 100%
rename from utils/matrix_csr.h
rename to src/utils/matrix_csr.h
diff --git a/utils/omp.h b/src/utils/omp.h
similarity index 100%
rename from utils/omp.h
rename to src/utils/omp.h
diff --git a/utils/random.h b/src/utils/random.h
similarity index 100%
rename from utils/random.h
rename to src/utils/random.h
diff --git a/utils/utils.h b/src/utils/utils.h
similarity index 100%
rename from utils/utils.h
rename to src/utils/utils.h
diff --git a/xgunity.cpp b/xgunity.cpp
deleted file mode 100644
index 9f5afdd19..000000000
--- a/xgunity.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "tree/updater.h"
-#include "gbm/gbm.h"
-#include "utils/omp.h"
-#include "utils/utils.h"
-#include "utils/random.h"
-#include "learner/objective.h"
-#include "learner/learner-inl.hpp"
-
-// pass compile flag
-
-using namespace xgboost;
-int main(void){
-
-  FMatrixS fmat(NULL);  
-  tree::RegTree tree;
-  tree::TrainParam param;
-  std::vector<bst_gpair> gpair;
-  std::vector<unsigned> roots;
-  tree::IUpdater<FMatrixS> *up = tree::CreateUpdater<FMatrixS>("prune"); 
-  gbm::IGradBooster<FMatrixS> *gbm = new gbm::GBTree<FMatrixS>();
-  std::vector<tree::RegTree*> trees;
-  learner::IObjFunction *func = learner::CreateObjFunction("reg:linear");
-  learner::BoostLearner<FMatrixS> *learner= new learner::BoostLearner<FMatrixS>();
-  up->Update(gpair, fmat, roots, trees);
-
-  return 0;
-}

From d9dbd1efc621593cccb9966a116ce8a666220148 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 15 Aug 2014 21:06:44 -0700
Subject: [PATCH 05/52] modify readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5eae58052..9a0a77a17 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Build
 ======
 * Simply type make
 * If your compiler does not come with OpenMP support, it will fire an warning telling you that the code will compile into single thread mode, and you will get single thread xgboost
-  - You may get a error: -lgomp is not found, you can remove -fopenmp flag in Makefile to get single thread xgboost, or upgrade your compiler to compile multi-thread version
+* You may get a error: -lgomp is not found, you can remove -fopenmp flag in Makefile to get single thread xgboost, or upgrade your compiler to compile multi-thread version
 
 Project Logical Layout
 =======

From ac1cc15b905cb4ce22c63530d096d2da0376737f Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 15 Aug 2014 21:24:23 -0700
Subject: [PATCH 06/52] pass fmatrix as const

---
 src/gbm/gbm.h                     | 2 +-
 src/gbm/gbtree-inl.hpp            | 4 ++--
 src/tree/updater.h                | 2 +-
 src/tree/updater_colmaker-inl.hpp | 8 +++++---
 src/tree/updater_prune-inl.hpp    | 3 ++-
 5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h
index 640bcbafc..5a9a3af98 100644
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -46,7 +46,7 @@ class IGradBooster {
    *   root_index.size() can be 0 which indicates that no pre-partition involved
    */
   virtual void DoBoost(const std::vector<bst_gpair> &gpair,
-                       FMatrix &fmat,
+                       const FMatrix &fmat,
                        const std::vector<unsigned> &root_index) = 0;
   /*!
    * \brief generate predictions for given feature matrix
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index d610ce5ad..5ccbcd1f1 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -83,7 +83,7 @@ class GBTree : public IGradBooster<FMatrix> {
     utils::Assert(trees.size() == 0, "GBTree: model already initialized");
   }
   virtual void DoBoost(const std::vector<bst_gpair> &gpair,
-                       FMatrix &fmat,
+                       const FMatrix &fmat,
                        const std::vector<unsigned> &root_index) {
     if (mparam.num_output_group == 1) {
       this->BoostNewTrees(gpair, fmat, root_index, 0);
@@ -174,7 +174,7 @@ class GBTree : public IGradBooster<FMatrix> {
   }
   // do group specific group
   inline void BoostNewTrees(const std::vector<bst_gpair> &gpair,
-                            FMatrix &fmat,
+                            const FMatrix &fmat,
                             const std::vector<unsigned> &root_index,
                             int bst_group) {
     this->InitUpdater();
diff --git a/src/tree/updater.h b/src/tree/updater.h
index 50b30e69f..5c4075b65 100644
--- a/src/tree/updater.h
+++ b/src/tree/updater.h
@@ -37,7 +37,7 @@ class IUpdater {
    *         there can be multiple trees when we train random forest style model
    */
   virtual void Update(const std::vector<bst_gpair> &gpair,
-                      FMatrix &fmat,
+                      const FMatrix &fmat,
                       const std::vector<unsigned> &root_index,
                       const std::vector<RegTree*> &trees) = 0;
   // destructor
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index 483f0fda8..1868f8f41 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -24,7 +24,7 @@ class ColMaker: public IUpdater<FMatrix> {
     param.SetParam(name, val);
   }
   virtual void Update(const std::vector<bst_gpair> &gpair,
-                      FMatrix &fmat,
+                      const FMatrix &fmat,
                       const std::vector<unsigned> &root_index,
                       const std::vector<RegTree*> &trees) {
     
@@ -71,7 +71,8 @@ class ColMaker: public IUpdater<FMatrix> {
     // constructor
     explicit Builder(const TrainParam &param) : param(param) {}
     // update one tree, growing
-    virtual void Update(const std::vector<bst_gpair> &gpair, FMatrix &fmat,
+    virtual void Update(const std::vector<bst_gpair> &gpair,
+                        const FMatrix &fmat,
                         const std::vector<unsigned> &root_index,
                         RegTree *p_tree) {
       this->InitData(gpair, fmat, root_index, *p_tree);
@@ -100,7 +101,8 @@ class ColMaker: public IUpdater<FMatrix> {
 
    private:
     // initialize temp data structure
-    inline void InitData(const std::vector<bst_gpair> &gpair, FMatrix &fmat,
+    inline void InitData(const std::vector<bst_gpair> &gpair,
+                         const FMatrix &fmat,
                          const std::vector<unsigned> &root_index, const RegTree &tree) {
       utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree");
       {// setup position
diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp
index e96951684..bfb71b727 100644
--- a/src/tree/updater_prune-inl.hpp
+++ b/src/tree/updater_prune-inl.hpp
@@ -21,7 +21,8 @@ class TreePruner: public IUpdater<FMatrix> {
     param.SetParam(name, val);
   }
   // update the tree, do pruning
-  virtual void Update(const std::vector<bst_gpair> &gpair, FMatrix &fmat,
+  virtual void Update(const std::vector<bst_gpair> &gpair,
+                      const FMatrix &fmat,
                       const std::vector<unsigned> &root_index,
                       const std::vector<RegTree*> &trees) {
     for (size_t i = 0; i < trees.size(); ++i) {

From c4acb4fe0122f54ba728f2c5e82586e945685558 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sat, 16 Aug 2014 14:06:31 -0700
Subject: [PATCH 07/52] check in io module

---
 Makefile                      |   7 +-
 README.md                     |   3 +-
 src/data.h                    |  92 ++++++++++++---
 src/io/io.cpp                 |  16 +++
 src/io/io.h                   |  34 ++++++
 src/io/simple_dmatrix-inl.hpp | 216 ++++++++++++++++++++++++++++++++++
 src/learner/dmatrix.h         |  54 ++++++++-
 src/learner/learner-inl.hpp   |   6 +-
 src/utils/io.h                |  16 ++-
 src/utils/iterator.h          |   6 +-
 10 files changed, 417 insertions(+), 33 deletions(-)
 create mode 100644 src/io/io.cpp
 create mode 100644 src/io/io.h
 create mode 100644 src/io/simple_dmatrix-inl.hpp

diff --git a/Makefile b/Makefile
index 61d3f6de0..a8eb89c12 100644
--- a/Makefile
+++ b/Makefile
@@ -4,14 +4,14 @@ export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas
 
 # specify tensor path
 BIN = xgunity.exe
-OBJ = 
+OBJ = io.o
 .PHONY: clean all
 
 all: $(BIN) $(OBJ)
 export LDFLAGS= -pthread -lm 
 
 xgunity.exe: src/xgunity.cpp
-
+io.o: src/io/io.cpp
 
 $(BIN) : 
 	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
@@ -23,4 +23,5 @@ install:
 	cp -f -r $(BIN)  $(INSTALL_PATH)
 
 clean:
-	$(RM) $(OBJ) $(BIN) *~ */*~
+	$(RM) $(OBJ) $(BIN) *~ */*~ */*/*~
+
diff --git a/README.md b/README.md
index 9a0a77a17..f5b64b78a 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,8 @@ Build
 
 Project Logical Layout
 =======
-* Dependency order: learner->gbm->tree
+* Dependency order: io->learner->gbm->tree
+  - All module depends on data.h
 * tree are implementations of tree construction algorithms.
 * gbm is gradient boosting interface, that takes trees and other base learner to do boosting.
   - gbm only takes gradient as sufficient statistics, it does not compute the gradient.
diff --git a/src/data.h b/src/data.h
index 468482446..e37565a20 100644
--- a/src/data.h
+++ b/src/data.h
@@ -1,5 +1,5 @@
-#ifndef XGBOOST_UNITY_DATA_H
-#define XGBOOST_UNITY_DATA_H
+#ifndef XGBOOST_DATA_H
+#define XGBOOST_DATA_H
 /*!
  * \file data.h
  * \brief the input data structure for gradient boosting
@@ -8,6 +8,7 @@
 #include <cstdio>
 #include <vector>
 #include <limits>
+#include <climits>
 #include <algorithm>
 #include "utils/io.h"
 #include "utils/utils.h"
@@ -27,7 +28,7 @@ const float rt_eps = 1e-5f;
 const float rt_2eps = rt_eps * 2.0f;
 
 /*! \brief gradient statistics pair usually needed in gradient boosting */
-struct bst_gpair{
+struct bst_gpair {
   /*! \brief gradient statistics */
   bst_float grad;
   /*! \brief second order gradient statistics */
@@ -139,7 +140,7 @@ class FMatrixInterface {
    */
   inline float GetColDensity(size_t cidx) const;
   /*! \brief get the row iterator associated with FMatrix */
-  virtual utils::IIterator<SparseBatch>* RowIterator(void) const = 0;
+  inline utils::IIterator<SparseBatch>* RowIterator(void) const;
 };
 
 /*!
@@ -180,11 +181,13 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     }
   };
   /*! \brief constructor */
-  explicit FMatrixS(utils::IIterator<SparseBatch> *base_iter)
-      : iter_(base_iter) {}
+  FMatrixS(void) {
+    iter_ = NULL;
+    num_buffered_row_ = 0;
+  }
   // destructor
-  virtual ~FMatrixS(void) {
-    delete iter_;
+  ~FMatrixS(void) {
+    if (iter_ != NULL) delete iter_;
   }
   /*! \return whether column access is enabled */
   inline bool HaveColAccess(void) const {
@@ -219,15 +222,75 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     size_t nmiss = num_buffered_row_ - (col_ptr_[cidx+1] - col_ptr_[cidx]);
     return 1.0f - (static_cast<float>(nmiss)) / num_buffered_row_;
   }
-  virtual void InitColAccess(void) {
+  inline void InitColAccess(size_t max_nrow = ULONG_MAX) {
     if (this->HaveColAccess()) return;
-    const size_t max_nrow = std::numeric_limits<bst_uint>::max();
     this->InitColData(max_nrow);
   }
   /*! \brief get the row iterator associated with FMatrix */
-  virtual utils::IIterator<SparseBatch>* RowIterator(void) const {
+  inline utils::IIterator<SparseBatch>* RowIterator(void) const {
     return iter_;
   }
+  /*! \brief set iterator */
+  inline void set_iter(utils::IIterator<SparseBatch> *iter) {
+    this->iter_ = iter;
+  }
+  /*!
+   * \brief save column access data into stream
+   * \param fo output stream to save to
+   */
+  inline void SaveColAccess(utils::IStream &fo) {
+    fo.Write(&num_buffered_row_, sizeof(num_buffered_row_));
+    if (num_buffered_row_ != 0) {
+      SaveBinary(fo, col_ptr_, col_data_);
+    }
+  }
+  /*!
+   * \brief load column access data from stream
+   * \param fo output stream to load from
+   */
+  inline void LoadColAccess(utils::IStream &fi) {
+    utils::Check(fi.Read(&num_buffered_row_, sizeof(num_buffered_row_)) != 0,
+                 "invalid input file format");
+    if (num_buffered_row_ != 0) {
+      LoadBinary(fi, &col_ptr_, &col_data_);
+    }
+  }
+  /*!
+   * \brief save data to binary stream
+   * \param fo output stream
+   * \param ptr pointer data
+   * \param data data content
+   */
+  inline static void SaveBinary(utils::IStream &fo,
+                                const std::vector<size_t> &ptr,
+                                const std::vector<SparseBatch::Entry> &data) {
+    size_t nrow = ptr.size() - 1;
+    fo.Write(&nrow, sizeof(size_t));
+    fo.Write(&ptr[0], ptr.size() * sizeof(size_t));
+    if (data.size() != 0) {
+      fo.Write(&data[0], data.size() * sizeof(SparseBatch::Entry));
+    }
+  }
+  /*!
+   * \brief load data from binary stream
+   * \param fi input stream
+   * \param out_ptr pointer data
+   * \param out_data data content
+   */
+  inline static void LoadBinary(utils::IStream &fi,
+                                std::vector<size_t> *out_ptr,
+                                std::vector<SparseBatch::Entry> *out_data) {
+    size_t nrow;
+    utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format");
+    out_ptr->resize(nrow + 1);
+    utils::Check(fi.Read(&(*out_ptr)[0], out_ptr->size() * sizeof(size_t)) != 0, 
+                  "invalid input file format");
+    out_data->resize(out_ptr->back());
+    if (out_data->size() != 0) {
+      utils::Assert(fi.Read(&(*out_data)[0], out_data->size() * sizeof(SparseBatch::Entry)) != 0, 
+                    "invalid input file format");
+    }
+  }
 
  protected:
   /*!
@@ -278,16 +341,15 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
                 &col_data_[col_ptr_[i + 1]], Entry::CmpValue);
     }
   }
-
  private:
   // --- data structure used to support InitColAccess --
   utils::IIterator<SparseBatch> *iter_;
   /*! \brief number */
   size_t num_buffered_row_;
   /*! \brief column pointer of CSC format */
-  std::vector<size_t>  col_ptr_;
+  std::vector<size_t> col_ptr_;
   /*! \brief column datas in CSC format */
-  std::vector<SparseBatch::Entry>  col_data_;
+  std::vector<SparseBatch::Entry> col_data_;
 };
 }  // namespace xgboost
-#endif
+#endif  // XGBOOST_DATA_H
diff --git a/src/io/io.cpp b/src/io/io.cpp
new file mode 100644
index 000000000..93d91a61c
--- /dev/null
+++ b/src/io/io.cpp
@@ -0,0 +1,16 @@
+#define _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_DEPRECATE
+#include <string>
+#include "./io.h"
+#include "simple_dmatrix-inl.hpp"
+// implements data loads using dmatrix simple for now
+
+namespace xgboost {
+namespace io {
+DataMatrix* LoadDataMatrix(const char *fname) {
+  DMatrixSimple *dmat = new DMatrixSimple();
+  dmat->CacheLoad(fname);
+  return dmat;
+}
+}  // namespace io
+}  // namespace xgboost
diff --git a/src/io/io.h b/src/io/io.h
new file mode 100644
index 000000000..81f89de89
--- /dev/null
+++ b/src/io/io.h
@@ -0,0 +1,34 @@
+#ifndef XGBOOST_IO_IO_H_
+#define XGBOOST_IO_IO_H_
+/*!
+ * \file io.h
+ * \brief handles input data format of xgboost
+ *    I/O module handles a specific DMatrix format
+ * \author Tianqi Chen
+ */
+#include "../data.h"
+#include "../learner/dmatrix.h"
+
+namespace xgboost {
+/*! \brief namespace related to data format */
+namespace io {
+/*! \brief DMatrix object that I/O module support save/load */
+typedef learner::DMatrix<FMatrixS> DataMatrix;
+/*!
+ * \brief load DataMatrix from stream
+ * \param fname file name to be loaded
+ * \return a loaded DMatrix
+ */
+DataMatrix* LoadDataMatrix(const char *fname);
+/*!
+ * \brief save DataMatrix into stream, 
+ *  note: the saved dmatrix format may not be in exactly same as input
+ *  SaveDMatrix will choose the best way to materialize the dmatrix.
+ * \param dmat the dmatrix to be saved
+ * \param fname file name to be savd
+ */
+void SaveDMatrix(const DataMatrix &dmat, const char *fname);  
+
+}  // namespace io
+}  // namespace xgboost
+#endif  // XGBOOST_IO_IO_H_
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
new file mode 100644
index 000000000..5da6d1c0b
--- /dev/null
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -0,0 +1,216 @@
+#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
+#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
+/*!
+ * \file simple_dmatrix-inl.hpp
+ * \brief simple implementation of DMatrixS that can be used 
+ *  the data format of xgboost is templatized, which means it can accept
+ *  any data structure that implements the function defined by FMatrix
+ *  this file is a specific implementation of input data structure that can be used by BoostLearner
+ * \author Tianqi Chen
+ */
+#include <string>
+#include <cstring>
+#include <vector>
+#include <algorithm>
+#include "../data.h"
+#include "../utils/utils.h"
+#include "../learner/dmatrix.h"
+#include "./io.h"
+
+namespace xgboost {
+namespace io {
+/*! \brief implementation of DataMatrix, in CSR format */
+class DMatrixSimple : public DataMatrix {
+ public:
+  // constructor
+  DMatrixSimple(void) {
+    this->fmat.set_iter(new OneBatchIter(this));
+    this->Clear();
+  }
+  // virtual destructor
+  virtual ~DMatrixSimple(void) {}
+  /*! \brief clear the storage */
+  inline void Clear(void) {
+    row_ptr_.clear();
+    row_ptr_.push_back(0);
+    row_data_.clear();
+    info.Clear();
+  }
+  /*!
+   * \brief add a row to the matrix
+   * \param feats features
+   * \return the index of added row
+   */
+  inline size_t AddRow(const std::vector<SparseBatch::Entry> &feats) {
+    for (size_t i = 0; i < feats.size(); ++i) {
+      row_data_.push_back(feats[i]);
+      info.num_col = std::max(info.num_col, static_cast<size_t>(feats[i].findex+1));
+    }
+    row_ptr_.push_back(row_ptr_.back() + feats.size());
+    info.num_row += 1;
+    return row_ptr_.size() - 2;
+  }
+  /*!
+   * \brief load from text file
+   * \param fname name of text data
+   * \param silent whether print information or not
+   */
+  inline void LoadText(const char* fname, bool silent = false) {
+    this->Clear();
+    FILE* file = utils::FopenCheck(fname, "r");
+    float label; bool init = true;
+    char tmp[1024];
+    std::vector<SparseBatch::Entry> feats;
+    while (fscanf(file, "%s", tmp) == 1) {
+      SparseBatch::Entry e;
+      if (sscanf(tmp, "%u:%f", &e.findex, &e.fvalue) == 2) {
+        feats.push_back(e);
+      } else {
+        if (!init) {
+          info.labels.push_back(label);
+          this->AddRow(feats);
+        }
+        feats.clear();
+        utils::Check(sscanf(tmp, "%f", &label) == 1, "invalid LibSVM format");
+        init = false;
+      }
+    }
+
+    info.labels.push_back(label);
+    this->AddRow(feats);
+
+    if (!silent) {
+      printf("%lux%lu matrix with %lu entries is loaded from %s\n",
+             info.num_row, info.num_col, row_data_.size(), fname);
+    }
+    fclose(file);
+    // try to load in additional file
+    std::string name = fname;
+    std::string gname = name + ".group";
+    if (info.TryLoadGroup(gname.c_str(), silent)) {
+      utils::Check(info.group_ptr.back() == info.num_row,
+                   "DMatrix: group data does not match the number of rows in features");
+    }
+    std::string wname = name + ".weight";
+    if (info.TryLoadWeight(wname.c_str(), silent)) {
+      utils::Check(info.weights.size() == info.num_row,
+                   "DMatrix: weight data does not match the number of rows in features");
+    }
+  }
+  /*!
+   * \brief load from binary file
+   * \param fname name of binary data
+   * \param silent whether print information or not
+   * \return whether loading is success
+   */
+  inline bool LoadBinary(const char* fname, bool silent = false) {
+    FILE *fp = fopen64(fname, "rb");
+    if (fp == NULL) return false;
+    utils::FileStream fs(fp);
+    int magic;
+    utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
+    utils::Check(magic == kMagic, "invalid format,magic number mismatch");
+
+    info.LoadBinary(fs);
+    FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
+    fmat.LoadColAccess(fs);
+    fs.Close();
+
+    if (!silent) {
+      printf("%lux%lu matrix with %lu entries is loaded from %s\n",
+             info.num_row, info.num_col, row_data_.size(), fname);
+      if (info.group_ptr.size() != 0) {
+        printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
+      }
+    }
+    return true;
+  }
+  /*!
+   * \brief save to binary file
+   * \param fname name of binary data
+   * \param silent whether print information or not
+   */
+  inline void SaveBinary(const char* fname, bool silent = false) {
+    utils::FileStream fs(utils::FopenCheck(fname, "wb"));
+    int magic = kMagic;
+    fs.Write(&magic, sizeof(magic));
+
+    info.SaveBinary(fs);
+    FMatrixS::SaveBinary(fs, row_ptr_, row_data_);
+    fmat.SaveColAccess(fs);
+    fs.Close();
+
+    if (!silent) {
+      printf("%lux%lu matrix with %lu entries is saved to %s\n",
+             info.num_row, info.num_col, row_data_.size(), fname);
+      if (info.group_ptr.size() != 0) {
+        printf("data contains %lu groups\n", info.group_ptr.size()-1);
+      }
+    }
+  }
+  /*!
+   * \brief cache load data given a file name, if filename ends with .buffer, direct load binary
+   *        otherwise the function will first check if fname + '.buffer' exists,
+   *        if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file,
+   *        and try to create a buffer file
+   * \param fname name of binary data
+   * \param silent whether print information or not
+   * \param savebuffer whether do save binary buffer if it is text
+   */
+  inline void CacheLoad(const char *fname, bool silent = false, bool savebuffer = true) {
+    int len = strlen(fname);
+    if (len > 8 && !strcmp(fname + len - 7, ".buffer")) {
+      if (!this->LoadBinary(fname, silent)) {
+        utils::Error("can not open file \"%s\"", fname);
+      }
+      return;
+    }
+    char bname[1024];
+    snprintf(bname, sizeof(bname), "%s.buffer", fname);
+    if (!this->LoadBinary(bname, silent)) {
+      this->LoadText(fname, silent);
+      if (savebuffer) this->SaveBinary(bname, silent);
+    }
+  }
+  // data fields
+  /*! \brief row pointer of CSR sparse storage */
+  std::vector<size_t> row_ptr_;
+  /*! \brief data in the row */
+  std::vector<SparseBatch::Entry> row_data_;
+  /*! \brief magic number used to identify DMatrix */
+  static const int kMagic = 0xff01;
+
+ protected:
+  // one batch iterator that return content in the matrix
+  struct OneBatchIter: utils::IIterator<SparseBatch> {
+    OneBatchIter(DMatrixSimple *parent)
+        : at_first_(true), parent_(parent) {}
+    virtual ~OneBatchIter(void) {}
+    virtual void BeforeFirst(void) {
+      at_first_ = true;
+    }
+    virtual bool Next(void) {
+      if (!at_first_) return false;
+      at_first_ = false;
+      batch_.size = parent_->row_ptr_.size() - 1;
+      batch_.base_rowid = 0;
+      batch_.row_ptr = &parent_->row_ptr_[0];
+      batch_.data_ptr = &parent_->row_data_[0];
+      return true;
+    }
+    virtual const SparseBatch &Value(void) const {
+      return batch_;
+    }
+
+   private:
+    // whether is at first
+    bool at_first_;
+    // pointer to parient
+    DMatrixSimple *parent_;
+    // temporal space for batch
+    SparseBatch batch_;
+  };
+};
+}  // namespace io
+}  // namespace xgboost
+#endif  // namespace XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index 522be9b95..88a865399 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -10,10 +10,14 @@
 
 namespace xgboost {
 namespace learner {
-/*! 
+/*!
  * \brief meta information needed in training, including label, weight
  */
 struct MetaInfo {
+  /*! \brief number of rows in the data */
+  size_t num_row;
+  /*! \brief number of columns in the data */
+  size_t num_col;
   /*! \brief label of each instance */
   std::vector<float> labels;
   /*!
@@ -28,6 +32,15 @@ struct MetaInfo {
    *  can be used for multi task setting
    */
   std::vector<unsigned> root_index;
+  MetaInfo(void) : num_row(0), num_col(0) {}
+  /*! \brief clear all the information */
+  inline void Clear(void) {
+    labels.clear();
+    group_ptr.clear();
+    weights.clear();
+    root_index.clear();
+    num_row = num_col = 0;
+  }
   /*! \brief get weight of each instances */
   inline float GetWeight(size_t i) const {
     if(weights.size() != 0) {
@@ -45,20 +58,53 @@ struct MetaInfo {
     }
   }
   inline void SaveBinary(utils::IStream &fo) {
+    fo.Write(&num_row, sizeof(num_row));
+    fo.Write(&num_col, sizeof(num_col));
     fo.Write(labels);
     fo.Write(group_ptr);
     fo.Write(weights);
     fo.Write(root_index);
   }
   inline void LoadBinary(utils::IStream &fi) {
+    utils::Check(fi.Read(&num_row, sizeof(num_row)), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&num_col, sizeof(num_col)), "MetaInfo: invalid format");
     utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
     utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
     utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
     utils::Check(fi.Read(&root_index), "MetaInfo: invalid format");
   }
+  // try to load group information from file, if exists
+  inline bool TryLoadGroup(const char* fname, bool silent = false) {
+    FILE *fi = fopen64(fname, "r");
+    if (fi == NULL) return false;       
+    group_ptr.push_back(0);
+    unsigned nline;
+    while (fscanf(fi, "%u", &nline) == 1) {
+      group_ptr.push_back(group_ptr.back()+nline);
+    }
+    if (!silent) {
+      printf("%lu groups are loaded from %s\n", group_ptr.size()-1, fname);
+    }
+    fclose(fi);
+    return true;
+  }
+  // try to load weight information from file, if exists
+  inline bool TryLoadWeight(const char* fname, bool silent = false) {
+    FILE *fi = fopen64(fname, "r");
+    if (fi == NULL) return false;
+    float wt;
+    while (fscanf(fi, "%f", &wt) == 1) {
+      weights.push_back(wt);
+    }
+    if (!silent) {
+      printf("loading weight from %s\n", fname);
+    }
+    fclose(fi);
+    return true;
+  }
 };
 
-/*! 
+/*!
  * \brief data object used for learning,
  * \tparam FMatrix type of feature data source
  */
@@ -66,8 +112,6 @@ template<typename FMatrix>
 struct DMatrix {
   /*! \brief meta information about the dataset */
   MetaInfo info;
-  /*! \brief number of rows in the DMatrix */
-  size_t num_row;
   /*! \brief feature matrix about data content */
   FMatrix fmat;
   /*! 
@@ -77,6 +121,8 @@ struct DMatrix {
   void *cache_learner_ptr_;
   /*! \brief default constructor */
   DMatrix(void) : cache_learner_ptr_(NULL) {}
+  // virtual destructor
+  virtual ~DMatrix(void){}
 };
 
 }  // namespace learner
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 62f852a12..e26f6a52d 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -55,9 +55,9 @@ class BoostLearner {
       if (dupilicate) continue;
       // set mats[i]'s cache learner pointer to this
       mats[i]->cache_learner_ptr_ = this;
-      cache_.push_back(CacheEntry(mats[i], buffer_size, mats[i]->num_row));
-      buffer_size += mats[i]->num_row;
-      num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->num_col));
+      cache_.push_back(CacheEntry(mats[i], buffer_size, mats[i]->info.num_row));
+      buffer_size += mats[i]->info.num_row;
+      num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->info.num_col));
     }
     char str_temp[25];
     if (num_feature > mparam.num_feature) {
diff --git a/src/utils/io.h b/src/utils/io.h
index b52acf764..a18e6067a 100644
--- a/src/utils/io.h
+++ b/src/utils/io.h
@@ -42,7 +42,9 @@ class IStream {
   inline void Write(const std::vector<T> &vec) {
     uint64_t sz = vec.size();
     this->Write(&sz, sizeof(sz));
-    this->Write(&vec[0], sizeof(T) * sz);
+    if (sz != 0) {
+      this->Write(&vec[0], sizeof(T) * sz);
+    }
   }
   /*!
    * \brief binary load a vector 
@@ -54,7 +56,9 @@ class IStream {
     uint64_t sz;
     if (this->Read(&sz, sizeof(sz)) == 0) return false;
     out_vec->resize(sz);
-    if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false;
+    if (sz != 0) {
+      if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false;
+    }
     return true;
   }
   /*!
@@ -64,7 +68,9 @@ class IStream {
   inline void Write(const std::string &str) {
     uint64_t sz = str.length();
     this->Write(&sz, sizeof(sz));
-    this->Write(&str[0], sizeof(char) * sz);
+    if (sz != 0) {
+      this->Write(&str[0], sizeof(char) * sz);
+    }
   }
   /*!
    * \brief binary load a string
@@ -75,7 +81,9 @@ class IStream {
     uint64_t sz;
     if (this->Read(&sz, sizeof(sz)) == 0) return false;
     out_str->resize(sz);
-    if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) return false;
+    if (sz != 0) {
+      if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) return false;
+    }
     return true;
   }
 };
diff --git a/src/utils/iterator.h b/src/utils/iterator.h
index 32ab64aa9..3f5b23310 100644
--- a/src/utils/iterator.h
+++ b/src/utils/iterator.h
@@ -18,11 +18,11 @@ class IIterator {
   /*!
    * \brief set the parameter 
    * \param name name of parameter
-   * \param val  value of parameter
+   * \param val value of parameter
    */
-  virtual void SetParam(const char *name, const char *val) = 0;
+  virtual void SetParam(const char *name, const char *val) {}
   /*! \brief initalize the iterator so that we can use the iterator */
-  virtual void Init(void) = 0;
+  virtual void Init(void) {}
   /*! \brief set before first of the item */
   virtual void BeforeFirst(void) = 0;
   /*! \brief move to next item */

From 2c969ecf145e04df1844f2a5e09ffe60270b2d6f Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sat, 16 Aug 2014 15:44:35 -0700
Subject: [PATCH 08/52] first version that reproduce binary classification demo

---
 Makefile                          |   7 +-
 src/data.h                        |   7 +-
 src/gbm/gbm.h                     |   8 +
 src/gbm/gbtree-inl.hpp            |   7 +
 src/io/io.cpp                     |   4 +-
 src/io/io.h                       |   4 +-
 src/learner/evaluation-inl.hpp    |   1 +
 src/learner/learner-inl.hpp       |  18 ++-
 src/learner/objective-inl.hpp     |   1 +
 src/tree/updater_colmaker-inl.hpp |   5 +-
 src/xgboost_main.cpp              | 244 ++++++++++++++++++++++++++++++
 11 files changed, 286 insertions(+), 20 deletions(-)
 create mode 100644 src/xgboost_main.cpp

diff --git a/Makefile b/Makefile
index a8eb89c12..0a5fd7047 100644
--- a/Makefile
+++ b/Makefile
@@ -3,15 +3,15 @@ export CXX = clang++
 export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas 
 
 # specify tensor path
-BIN = xgunity.exe
+BIN = xgboost
 OBJ = io.o
 .PHONY: clean all
 
 all: $(BIN) $(OBJ)
 export LDFLAGS= -pthread -lm 
 
-xgunity.exe: src/xgunity.cpp
-io.o: src/io/io.cpp
+xgboost: src/xgboost_main.cpp io.o src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp 
+io.o: src/io/io.cpp src/data.h src/utils/*.h
 
 $(BIN) : 
 	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
@@ -24,4 +24,3 @@ install:
 
 clean:
 	$(RM) $(OBJ) $(BIN) *~ */*~ */*/*~
-
diff --git a/src/data.h b/src/data.h
index e37565a20..fe81b4dad 100644
--- a/src/data.h
+++ b/src/data.h
@@ -310,12 +310,11 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
       const size_t nbatch = std::min(batch.size, max_nrow - batch.base_rowid);
       for (size_t i = 0; i < nbatch; ++i, ++num_buffered_row_) {
         SparseBatch::Inst inst = batch[i];
-        for (bst_uint j = 0; j < batch.size; ++j) {
+        for (bst_uint j = 0; j < inst.length; ++j) {
           builder.AddBudget(inst[j].findex);
         }
       }
     }
-
     builder.InitStorage();
 
     iter_->BeforeFirst();
@@ -325,9 +324,9 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
       const size_t nbatch = std::min(batch.size, max_nrow - batch.base_rowid);
       for (size_t i = 0; i < nbatch; ++i) {
         SparseBatch::Inst inst = batch[i];
-        for (bst_uint j = 0; j < batch.size; ++j) {
+        for (bst_uint j = 0; j < inst.length; ++j) {
           builder.PushElem(inst[j].findex,
-                           Entry((bst_uint)(batch.base_rowid+j),
+                           Entry((bst_uint)(batch.base_rowid+i),
                                  inst[j].fvalue));
         }
       }
diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h
index 5a9a3af98..dcc204868 100644
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -7,6 +7,7 @@
  */
 #include <vector>
 #include "../data.h"
+#include "../utils/fmap.h"
 
 namespace xgboost {
 /*! \brief namespace for gradient booster */
@@ -63,6 +64,13 @@ class IGradBooster {
                        int64_t buffer_offset,
                        const std::vector<unsigned> &root_index,
                        std::vector<float> *out_preds) = 0;
+  /*!
+   * \brief dump the model in text format
+   * \param fmap feature map that may help give interpretations of feature
+   * \param option extra option of the dumo model
+   * \return a vector of dump for boosters
+   */
+  virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) = 0;
   // destrcutor
   virtual ~IGradBooster(void){}
 };
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index 5ccbcd1f1..1fc90e40c 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -141,6 +141,13 @@ class GBTree : public IGradBooster<FMatrix> {
       }
     }
   }
+  virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
+    std::vector<std::string> dump;
+    for (size_t i = 0; i < trees.size(); i++) {
+      dump.push_back(trees[i]->DumpModel(fmap, option&1));
+    }
+    return dump;
+  }
 
  protected:
   // clear the model
diff --git a/src/io/io.cpp b/src/io/io.cpp
index 93d91a61c..2cf42aadf 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -7,9 +7,9 @@
 
 namespace xgboost {
 namespace io {
-DataMatrix* LoadDataMatrix(const char *fname) {
+DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
   DMatrixSimple *dmat = new DMatrixSimple();
-  dmat->CacheLoad(fname);
+  dmat->CacheLoad(fname, silent, savebuffer);
   return dmat;
 }
 }  // namespace io
diff --git a/src/io/io.h b/src/io/io.h
index 81f89de89..d6d280d5e 100644
--- a/src/io/io.h
+++ b/src/io/io.h
@@ -17,9 +17,11 @@ typedef learner::DMatrix<FMatrixS> DataMatrix;
 /*!
  * \brief load DataMatrix from stream
  * \param fname file name to be loaded
+ * \param silent whether print message during loading
+ * \param savebuffer whether temporal buffer the file if the file is in text format
  * \return a loaded DMatrix
  */
-DataMatrix* LoadDataMatrix(const char *fname);
+DataMatrix* LoadDataMatrix(const char *fname, bool silent = false, bool savebuffer = true);
 /*!
  * \brief save DataMatrix into stream, 
  *  note: the saved dmatrix format may not be in exactly same as input
diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index a4ac1e462..184197d45 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -9,6 +9,7 @@
 #include <utility>
 #include <string>
 #include <climits>
+#include <cmath>
 #include <algorithm>
 #include "./evaluation.h"
 #include "./helper_utils.h"
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index e26f6a52d..3c04837c3 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -120,8 +120,8 @@ class BoostLearner {
   }
   inline void SaveModel(utils::IStream &fo) const {
     fo.Write(&mparam, sizeof(ModelParam));
-    fo.Write(&name_obj_);
-    fo.Write(&name_gbm_);
+    fo.Write(name_obj_);
+    fo.Write(name_gbm_);
     gbm_->SaveModel(fo);
   }
   /*!
@@ -139,7 +139,7 @@ class BoostLearner {
    * \param p_train pointer to the data matrix
    */
   inline void UpdateOneIter(int iter, DMatrix<FMatrix> *p_train) {
-    this->PredictRaw(preds_, *p_train);
+    this->PredictRaw(*p_train, &preds_);
     obj_->GetGradient(preds_, p_train->info, iter, &gpair_);
     gbm_->DoBoost(gpair_, p_train->fmat, p_train->info.root_index);
   }
@@ -189,7 +189,11 @@ class BoostLearner {
     this->PredictRaw(data, out_preds);
     obj_->PredTransform(out_preds);
   }
-
+  /*! \brief dump model out */
+  inline std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
+    return gbm_->DumpModel(fmap, option);
+  }
+  
  protected:
   /*! 
    * \brief initialize the objective function and GBM, 
@@ -212,9 +216,9 @@ class BoostLearner {
    * \param out_preds output vector that stores the prediction
    */
   inline void PredictRaw(const DMatrix<FMatrix> &data,
-                         std::vector<float> *out_preds) {
+                         std::vector<float> *out_preds) const {
     gbm_->Predict(data.fmat, this->FindBufferOffset(data),
-                  data.info, out_preds);
+                  data.info.root_index, out_preds);
   }
 
   /*! \brief training parameter for regression */
@@ -280,7 +284,7 @@ class BoostLearner {
   inline int64_t FindBufferOffset(const DMatrix<FMatrix> &mat) const {
     for (size_t i = 0; i < cache_.size(); ++i) {
       if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) {
-        if (cache_[i].num_row_ == mat.num_row) {
+        if (cache_[i].num_row_ == mat.info.num_row) {
           return cache_[i].buffer_offset_;
         }
       }
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index 7aa11d338..d5cc97fcf 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -6,6 +6,7 @@
  * \author Tianqi Chen, Kailong Chen
  */
 #include <vector>
+#include <cmath>
 #include "./objective.h"
 
 namespace xgboost {
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index 1868f8f41..f0624bdeb 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -27,7 +27,6 @@ class ColMaker: public IUpdater<FMatrix> {
                       const FMatrix &fmat,
                       const std::vector<unsigned> &root_index,
                       const std::vector<RegTree*> &trees) {
-    
     for (size_t i = 0; i < trees.size(); ++i) {
       Builder builder(param);
       builder.Update(gpair, fmat, root_index, trees[i]);
@@ -132,7 +131,9 @@ class ColMaker: public IUpdater<FMatrix> {
         // initialize feature index
         unsigned ncol = static_cast<unsigned>(fmat.NumCol());
         for (unsigned i = 0; i < ncol; ++i) {
-          if (fmat.GetColSize(i) != 0) feat_index.push_back(i);
+          if (fmat.GetColSize(i) != 0) {
+            feat_index.push_back(i);
+          }
         }
         unsigned n = static_cast<unsigned>(param.colsample_bytree * feat_index.size());
         random::Shuffle(feat_index);
diff --git a/src/xgboost_main.cpp b/src/xgboost_main.cpp
new file mode 100644
index 000000000..16139f0d8
--- /dev/null
+++ b/src/xgboost_main.cpp
@@ -0,0 +1,244 @@
+#define _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_DEPRECATE
+
+#include <ctime>
+#include <string>
+#include <cstring>
+#include "io/io.h"
+#include "utils/utils.h"
+#include "utils/config.h"
+#include "learner/learner-inl.hpp"
+
+namespace xgboost {
+/*!
+ * \brief wrapping the training process 
+ */
+class BoostLearnTask{
+ public:
+  inline int Run(int argc, char *argv[]) {
+    if (argc < 2) {
+      printf("Usage: <config>\n");
+      return 0;
+    }
+    utils::ConfigIterator itr(argv[1]);
+    while (itr.Next()) {
+      this->SetParam(itr.name(), itr.val());
+    }
+    for (int i = 2; i < argc; ++i) {
+      char name[256], val[256];
+      if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
+        this->SetParam(name, val);
+      }
+    }
+    this->InitData();
+    this->InitLearner();
+    if (task == "dump") {
+      this->TaskDump(); return 0;
+    }
+    if (task == "eval") {
+      this->TaskEval(); return 0;
+    }
+    if (task == "pred") {
+      this->TaskPred();
+    } else {
+      this->TaskTrain();
+    }
+    return 0;
+  }
+  inline void SetParam(const char *name, const char *val) {
+    if (!strcmp("silent", name)) silent = atoi(val);
+    if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
+    if (!strcmp("seed", name)) random::Seed(atoi(val));
+    if (!strcmp("num_round", name)) num_round = atoi(val);
+    if (!strcmp("save_period", name)) save_period = atoi(val);
+    if (!strcmp("eval_train", name)) eval_train = atoi(val);
+    if (!strcmp("task", name)) task = val;
+    if (!strcmp("data", name)) train_path = val;
+    if (!strcmp("test:data", name)) test_path = val;
+    if (!strcmp("model_in", name)) model_in = val;
+    if (!strcmp("model_out", name)) model_out = val;
+    if (!strcmp("model_dir", name)) model_dir_path = val;
+    if (!strcmp("fmap", name)) name_fmap = val;
+    if (!strcmp("name_dump", name)) name_dump = val;
+    if (!strcmp("name_pred", name)) name_pred = val;
+    if (!strcmp("dump_stats", name)) dump_model_stats = atoi(val);
+    if (!strncmp("eval[", name, 5)) {
+      char evname[256];
+      utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display");
+      eval_data_names.push_back(std::string(evname));
+      eval_data_paths.push_back(std::string(val));
+    }
+    learner.SetParam(name, val);
+  }
+ public:
+  BoostLearnTask(void) {
+    // default parameters
+    silent = 0;
+    use_buffer = 1;
+    num_round = 10;
+    save_period = 0;
+    eval_train = 0;
+    dump_model_stats = 0;
+    task = "train";
+    model_in = "NULL";
+    model_out = "NULL";
+    name_fmap = "NULL";
+    name_pred = "pred.txt";
+    name_dump = "dump.txt";
+    model_dir_path = "./";
+    data = NULL;
+  }
+  ~BoostLearnTask(void){
+    for (size_t i = 0; i < deval.size(); i++){
+      delete deval[i];
+    }
+    if (data != NULL) delete data;
+  }
+ private:
+  inline void InitData(void) {
+    if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
+    if (task == "dump") return;
+    if (task == "pred") {
+      data = io::LoadDataMatrix(test_path.c_str(), silent != 0, use_buffer != 0);
+    } else {
+      // training
+      data = io::LoadDataMatrix(train_path.c_str(), silent != 0, use_buffer != 0);
+      {// intialize column access
+        data->fmat.InitColAccess();
+      }
+      utils::Assert(eval_data_names.size() == eval_data_paths.size(), "BUG");
+      for (size_t i = 0; i < eval_data_names.size(); ++i) {
+        deval.push_back(io::LoadDataMatrix(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0));
+        devalall.push_back(deval.back());
+      }
+            
+      std::vector<io::DataMatrix *> dcache(1, data);
+      for (size_t i = 0; i < deval.size(); ++ i) {
+        dcache.push_back(deval[i]);
+      }
+      // set cache data to be all training and evaluation data
+      learner.SetCacheData(dcache);
+      
+      // add training set to evaluation set if needed
+      if( eval_train != 0 ) {
+        devalall.push_back(data);
+        eval_data_names.push_back(std::string("train"));
+      }
+    }
+  }
+  inline void InitLearner(void) {
+    if (model_in != "NULL"){
+      utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
+      learner.LoadModel(fi);
+      fi.Close();
+    } else {
+      utils::Assert(task == "train", "model_in not specified");
+      learner.InitModel();
+    }
+  }
+  inline void TaskTrain(void) {
+    const time_t start = time(NULL);
+    unsigned long elapsed = 0;
+    for (int i = 0; i < num_round; ++i) {
+      elapsed = (unsigned long)(time(NULL) - start);
+      if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
+      learner.UpdateOneIter(i,data); 
+      std::string res = learner.EvalOneIter(i, devalall, eval_data_names);
+      fprintf(stderr, "%s\n", res.c_str());
+      if (save_period != 0 && (i + 1) % save_period == 0) {
+        this->SaveModel(i);
+      }
+      elapsed = (unsigned long)(time(NULL) - start);
+    }
+    // always save final round
+    if ((save_period == 0 || num_round % save_period != 0) && model_out != "NONE") {
+      if (model_out == "NULL"){
+        this->SaveModel(num_round - 1);
+      } else {
+        this->SaveModel(model_out.c_str());
+      }
+    }
+    if (!silent){
+      printf("\nupdating end, %lu sec in all\n", elapsed);
+    }
+  }
+  inline void TaskEval(void) {
+    learner.EvalOneIter(0, devalall, eval_data_names);
+  }
+  inline void TaskDump(void){
+    FILE *fo = utils::FopenCheck(name_dump.c_str(), "w");
+    std::vector<std::string> dump = learner.DumpModel(fmap, dump_model_stats != 0);
+    for (size_t i = 0; i < dump.size(); ++ i) {
+      fprintf(fo,"booster[%lu]:\n", i);
+      fprintf(fo,"%s", dump[i].c_str()); 
+    }
+    fclose(fo);
+  }
+  inline void SaveModel(const char *fname) const {
+    utils::FileStream fo(utils::FopenCheck(fname, "wb"));
+    learner.SaveModel(fo);
+    fo.Close();
+  }
+  inline void SaveModel(int i) const {
+    char fname[256];
+    sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1);
+    this->SaveModel(fname);
+  }
+  inline void TaskPred(void) {
+    std::vector<float> preds;
+    if (!silent) printf("start prediction...\n");
+    learner.Predict(*data, &preds);
+    if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
+    FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
+    for (size_t i = 0; i < preds.size(); i++) {
+      fprintf(fo, "%f\n", preds[i]);
+    }
+    fclose(fo);
+  }
+ private:
+  /* \brief whether silent */
+  int silent;
+  /* \brief whether use auto binary buffer */
+  int use_buffer;
+  /* \brief whether evaluate training statistics */            
+  int eval_train;
+  /* \brief number of boosting iterations */
+  int num_round;
+  /* \brief the period to save the model, 0 means only save the final round model */
+  int save_period;
+  /* \brief the path of training/test data set */
+  std::string train_path, test_path;
+  /* \brief the path of test model file, or file to restart training */
+  std::string model_in;
+  /* \brief the path of final model file, to be saved */
+  std::string model_out;
+  /* \brief the path of directory containing the saved models */
+  std::string model_dir_path;
+  /* \brief task to perform */
+  std::string task;
+  /* \brief name of predict file */
+  std::string name_pred;
+  /* \brief whether dump statistics along with model */
+  int dump_model_stats;
+  /* \brief name of feature map */
+  std::string name_fmap;
+  /* \brief name of dump file */
+  std::string name_dump;
+  /* \brief the paths of validation data sets */
+  std::vector<std::string> eval_data_paths;
+  /* \brief the names of the evaluation data used in output log */
+  std::vector<std::string> eval_data_names;
+ private:
+  io::DataMatrix* data;
+  std::vector<io::DataMatrix*> deval;
+  std::vector<const io::DataMatrix*> devalall;
+  utils::FeatMap fmap;
+  learner::BoostLearner<FMatrixS> learner;
+};
+}
+
+int main(int argc, char *argv[]){
+  xgboost::random::Seed(0);
+  xgboost::BoostLearnTask tsk;
+  return tsk.Run(argc, argv);
+}

From af100dd869ec970aee841d4965bfa56cd42dfd48 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 17 Aug 2014 17:43:46 -0700
Subject: [PATCH 09/52] remake the wrapper

---
 Makefile                      |   8 +-
 README.md                     |  18 +--
 python/Makefile               |  26 ---
 python/README.md              |   2 +
 python/xgboost.py             |   6 +-
 python/xgboost_python.cpp     | 297 ----------------------------------
 python/xgboost_python.h       | 209 ------------------------
 python/xgboost_wrapper.cpp    | 240 +++++++++++++++++++++++++++
 python/xgboost_wrapper.h      | 182 +++++++++++++++++++++
 src/README.md                 |  25 +++
 src/data.h                    |   6 +-
 src/io/io.cpp                 |   6 +
 src/io/io.h                   |   3 +-
 src/io/simple_dmatrix-inl.hpp |  22 ++-
 src/learner/dmatrix.h         |  14 +-
 src/learner/evaluation.h      |   2 +-
 src/learner/learner-inl.hpp   |  19 ++-
 src/xgboost_main.cpp          |   7 +-
 18 files changed, 520 insertions(+), 572 deletions(-)
 delete mode 100644 python/Makefile
 delete mode 100644 python/xgboost_python.cpp
 delete mode 100644 python/xgboost_python.h
 create mode 100644 python/xgboost_wrapper.cpp
 create mode 100644 python/xgboost_wrapper.h
 create mode 100644 src/README.md

diff --git a/Makefile b/Makefile
index 0a5fd7047..8c9980ac1 100644
--- a/Makefile
+++ b/Makefile
@@ -5,17 +5,23 @@ export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas
 # specify tensor path
 BIN = xgboost
 OBJ = io.o
+SLIB = python/libxgboostwrapper.so
 .PHONY: clean all
 
-all: $(BIN) $(OBJ)
+all: $(BIN) $(OBJ) $(SLIB)
 export LDFLAGS= -pthread -lm 
 
 xgboost: src/xgboost_main.cpp io.o src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp 
 io.o: src/io/io.cpp src/data.h src/utils/*.h
+# now the wrapper takes in two files. io and wrapper part
+python/libxgboostwrapper.so: python/xgboost_wrapper.cpp src/io/io.cpp src/*.h src/*/*.hpp src/*/*.h
 
 $(BIN) : 
 	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
 
+$(SLIB) :
+	$(CXX) $(CFLAGS) -fPIC $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)
+
 $(OBJ) : 
 	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
 
diff --git a/README.md b/README.md
index f5b64b78a..732e64d7f 100644
--- a/README.md
+++ b/README.md
@@ -17,19 +17,7 @@ Build
 * Simply type make
 * If your compiler does not come with OpenMP support, it will fire an warning telling you that the code will compile into single thread mode, and you will get single thread xgboost
 * You may get a error: -lgomp is not found, you can remove -fopenmp flag in Makefile to get single thread xgboost, or upgrade your compiler to compile multi-thread version
+* Possible way to build using Visual Studio (not tested):
+   - In principle, you can put src/xgboost.cpp and src/io/io.cpp into the project, and build xgboost.
+   - For python module, you need python/xgboost_wrapper.cpp and src/io/io.cpp to build a dll.
 
-Project Logical Layout
-=======
-* Dependency order: io->learner->gbm->tree
-  - All module depends on data.h
-* tree are implementations of tree construction algorithms.
-* gbm is gradient boosting interface, that takes trees and other base learner to do boosting.
-  - gbm only takes gradient as sufficient statistics, it does not compute the gradient.
-* learner is learning module that computes gradient for specific object, and pass it to GBM
-
-File Naming Convention
-======= 
-* The project is templatized, to make it easy to adjust input data structure.
-* .h files are data structures and interface, which are needed to use functions in that layer.
-* -inl.hpp files are implementations of interface, like cpp file in most project.
-  - You only need to understand the interface file to understand the usage of that layer
diff --git a/python/Makefile b/python/Makefile
deleted file mode 100644
index 76dfdcf01..000000000
--- a/python/Makefile
+++ /dev/null
@@ -1,26 +0,0 @@
-export CC  = gcc
-export CXX = g++
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas  -fopenmp
-
-# specify tensor path
-SLIB = libxgboostpy.so
-.PHONY: clean all
-
-all: $(SLIB)
-export LDFLAGS= -pthread -lm 
-
-libxgboostpy.so: xgboost_python.cpp ../regrank/*.h ../booster/*.h ../booster/*/*.hpp ../booster/*.hpp
-
-$(SLIB) :
-	$(CXX) $(CFLAGS) -fPIC $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)
-$(BIN) : 
-	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
-
-$(OBJ) : 
-	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
-
-install:
-	cp -f -r $(BIN)  $(INSTALL_PATH)
-
-clean:
-	$(RM) $(OBJ) $(BIN) $(SLIB) *~
diff --git a/python/README.md b/python/README.md
index 19d33aa08..cf59ba9ab 100644
--- a/python/README.md
+++ b/python/README.md
@@ -1,3 +1,5 @@
 python wrapper for xgboost using ctypes
 
 see example for usage
+
+to make the python module, type make in the root directory of project
diff --git a/python/xgboost.py b/python/xgboost.py
index 070fe6593..2e8deefa8 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -8,11 +8,7 @@ import numpy.ctypeslib
 import scipy.sparse as scp
 
 # set this line correctly
-XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostpy.so'
-
-# entry type of sparse matrix
-class REntry(ctypes.Structure):
-    _fields_ = [("findex", ctypes.c_uint), ("fvalue", ctypes.c_float) ]
+XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
 
 # load in xgboost library
 xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
diff --git a/python/xgboost_python.cpp b/python/xgboost_python.cpp
deleted file mode 100644
index a325a20d4..000000000
--- a/python/xgboost_python.cpp
+++ /dev/null
@@ -1,297 +0,0 @@
-// implementations in ctypes
-#include "xgboost_python.h"
-#include "../regrank/xgboost_regrank.h"
-#include "../regrank/xgboost_regrank_data.h"
-
-namespace xgboost{
-    namespace python{
-        class DMatrix: public regrank::DMatrix{
-        public:
-            // whether column is initialized
-            bool init_col_;
-        public:
-            DMatrix(void){
-                init_col_ = false;
-            }            
-            ~DMatrix(void){}
-        public:            
-            inline void Load(const char *fname, bool silent){
-                this->CacheLoad(fname, silent);
-                init_col_ = this->data.HaveColAccess();
-            }
-            inline void Clear( void ){
-                this->data.Clear();
-                this->info.labels.clear();
-                this->info.weights.clear();
-                this->info.group_ptr.clear();
-            }
-            inline size_t NumRow( void ) const{
-                return this->data.NumRow();
-            }
-            inline void AddRow( const XGEntry *data, size_t len ){
-                xgboost::booster::FMatrixS &mat = this->data;
-                mat.row_data_.resize( mat.row_ptr_.back() + len );
-                memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len );
-                mat.row_ptr_.push_back( mat.row_ptr_.back() + len );
-                init_col_ = false;
-            }
-            inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{
-                const xgboost::booster::FMatrixS &mat = this->data;
-
-                *len = mat.row_ptr_[ridx+1] - mat.row_ptr_[ridx];
-                return &mat.row_data_[ mat.row_ptr_[ridx] ];
-            }
-            inline void ParseCSR( const size_t *indptr,
-                                  const unsigned *indices,
-                                  const float *data,
-                                  size_t nindptr,
-                                  size_t nelem ){
-                xgboost::booster::FMatrixS &mat = this->data;
-                mat.row_ptr_.resize( nindptr );
-                memcpy( &mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr );
-                mat.row_data_.resize( nelem );
-                for( size_t i = 0; i < nelem; ++ i ){
-                    mat.row_data_[i] = XGEntry(indices[i], data[i]);
-                }
-                this->data.InitData();
-                this->init_col_ = true;
-            }
-
-            inline void ParseMat( const float *data,
-                                  size_t nrow,
-                                  size_t ncol,
-                                  float  missing ){
-                xgboost::booster::FMatrixS &mat = this->data;
-                mat.Clear();
-                for( size_t i = 0; i < nrow; ++i, data += ncol ){
-                    size_t nelem = 0;
-                    for( size_t j = 0; j < ncol; ++j ){
-                        if( data[j] != missing ){                           
-                            mat.row_data_.push_back( XGEntry(j, data[j]) );
-                            ++ nelem;
-                        }
-                    }
-                    mat.row_ptr_.push_back( mat.row_ptr_.back() + nelem );
-                }
-                this->data.InitData();
-                this->init_col_ = true;
-            }
-            inline void SetLabel( const float *label, size_t len ){
-                this->info.labels.resize( len );
-                memcpy( &(this->info).labels[0], label, sizeof(float)*len );
-            }
-            inline void SetGroup( const unsigned *group, size_t len ){
-                this->info.group_ptr.resize( len + 1 );
-                this->info.group_ptr[0] = 0;
-                for( size_t i = 0; i < len; ++ i ){
-                    this->info.group_ptr[i+1] = this->info.group_ptr[i]+group[i];
-                }
-            }
-            inline void SetWeight( const float *weight, size_t len ){
-                this->info.weights.resize( len );
-                memcpy( &(this->info).weights[0], weight, sizeof(float)*len );
-            }
-            inline const float* GetLabel( size_t* len ) const{
-                *len = this->info.labels.size();
-                return &(this->info.labels[0]);
-            }
-            inline const float* GetWeight( size_t* len ) const{
-                *len = this->info.weights.size();
-                return &(this->info.weights[0]);
-            }
-            inline void CheckInit(void){
-                if(!init_col_){
-                    this->data.InitData();
-                    init_col_ = true;
-                }
-                utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
-            }
-        };
-    
-        class Booster: public xgboost::regrank::RegRankBoostLearner{
-        private:
-            bool init_trainer, init_model;
-        public:
-            Booster(const std::vector<regrank::DMatrix *> mats){
-                silent = 1;
-                init_trainer = false;
-                init_model = false;
-                this->SetCacheData(mats);
-            }
-            inline void CheckInit(void){
-                if( !init_trainer ){
-                    this->InitTrainer(); init_trainer = true;
-                }
-                if( !init_model ){
-                    this->InitModel(); init_model = true;
-                }
-            }
-            inline void LoadModel( const char *fname ){
-                xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
-                this->init_model = true;
-            }
-            inline void SetParam( const char *name, const char *val ){
-                if( !strcmp( name, "seed" ) ) random::Seed(atoi(val));
-                xgboost::regrank::RegRankBoostLearner::SetParam( name, val );
-            }
-            const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){
-                this->CheckInit();
-
-                this->Predict( this->preds_, dmat, bst_group );
-                *len = this->preds_.size();
-                return &this->preds_[0];
-            }
-            inline void BoostOneIter( const DMatrix &train, 
-                                      float *grad, float *hess, size_t len, int bst_group ){
-                this->grad_.resize( len ); this->hess_.resize( len );
-                memcpy( &this->grad_[0], grad, sizeof(float)*len );
-                memcpy( &this->hess_[0], hess, sizeof(float)*len );
-                
-                if( grad_.size() == train.Size() ){
-                    if( bst_group < 0 ) bst_group = 0;
-                    base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index, bst_group);
-                }else{
-                    utils::Assert( bst_group == -1, "must set bst_group to -1 to support all group boosting" );
-                    int ngroup = base_gbm.NumBoosterGroup();
-                    utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" );
-                    std::vector<float> tgrad( train.Size() ), thess( train.Size() );
-                    for( int g = 0; g < ngroup; ++ g ){
-                        memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() );
-                        memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() );
-                        base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g );
-                    }
-                }                
-            }
-        };
-    };
-};
-
-using namespace xgboost::python;
-
-
-extern "C"{
-    void* XGDMatrixCreate( void ){
-        return new DMatrix();
-    }
-    void XGDMatrixFree( void *handle ){
-        delete static_cast<DMatrix*>(handle);
-    }
-    void XGDMatrixLoad( void *handle, const char *fname, int silent ){
-        static_cast<DMatrix*>(handle)->Load(fname, silent!=0);
-    }
-    void XGDMatrixSaveBinary( void *handle, const char *fname, int silent ){
-        static_cast<DMatrix*>(handle)->SaveBinary(fname, silent!=0);
-    }
-    void XGDMatrixParseCSR( void *handle, 
-                            const size_t *indptr,
-                            const unsigned *indices,
-                            const float *data,
-                            size_t nindptr,
-                            size_t nelem ){
-        static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
-    }
-    void XGDMatrixParseMat( void *handle, 
-                            const float *data,
-                            size_t nrow,
-                            size_t ncol,
-                            float  missing ){
-      static_cast<DMatrix*>(handle)->ParseMat(data, nrow, ncol, missing);
-    }
-    void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
-        static_cast<DMatrix*>(handle)->SetLabel(label,len);        
-    }
-    void XGDMatrixSetWeight( void *handle, const float *weight, size_t len ){
-        static_cast<DMatrix*>(handle)->SetWeight(weight,len);        
-    }
-    void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len ){
-        static_cast<DMatrix*>(handle)->SetGroup(group,len);        
-    }
-    const float* XGDMatrixGetLabel( const void *handle, size_t* len ){
-        return static_cast<const DMatrix*>(handle)->GetLabel(len);
-    }
-    const float* XGDMatrixGetWeight( const void *handle, size_t* len ){
-        return static_cast<const DMatrix*>(handle)->GetWeight(len);
-    }
-    void XGDMatrixClear(void *handle){
-        static_cast<DMatrix*>(handle)->Clear();
-    }
-    void XGDMatrixAddRow( void *handle, const XGEntry *data, size_t len ){
-        static_cast<DMatrix*>(handle)->AddRow(data, len);
-    }
-    size_t XGDMatrixNumRow(const void *handle){
-        return static_cast<const DMatrix*>(handle)->NumRow();
-    }
-    const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len){
-        return static_cast<DMatrix*>(handle)->GetRow(ridx, len);
-    }
-
-    // xgboost implementation
-    void *XGBoosterCreate( void *dmats[], size_t len ){
-        std::vector<xgboost::regrank::DMatrix*> mats;
-        for( size_t i = 0; i < len; ++i ){
-            DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
-            dtr->CheckInit();
-            mats.push_back( dtr );
-        }
-        return new Booster( mats );
-    }
-    void XGBoosterFree( void *handle ){
-        delete  static_cast<Booster*>(handle);
-    }
-    void XGBoosterSetParam( void *handle, const char *name, const char *value ){
-        static_cast<Booster*>(handle)->SetParam( name, value );
-    }
-    void XGBoosterUpdateOneIter( void *handle, void *dtrain ){
-        Booster *bst = static_cast<Booster*>(handle);
-        DMatrix *dtr = static_cast<DMatrix*>(dtrain);
-        bst->CheckInit(); dtr->CheckInit(); 
-        bst->UpdateOneIter( *dtr );
-    }    
-    void XGBoosterBoostOneIter( void *handle, void *dtrain, 
-                                float *grad, float *hess, size_t len, int bst_group ){
-        Booster *bst = static_cast<Booster*>(handle);
-        DMatrix *dtr = static_cast<DMatrix*>(dtrain);
-        bst->CheckInit(); dtr->CheckInit(); 
-        bst->BoostOneIter( *dtr, grad, hess, len, bst_group );
-    }      
-    void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len ){
-        Booster *bst = static_cast<Booster*>(handle);
-        bst->CheckInit();
-
-        std::vector<std::string> names;
-        std::vector<const xgboost::regrank::DMatrix*> mats;
-        for( size_t i = 0; i < len; ++i ){
-            mats.push_back( static_cast<DMatrix*>(dmats[i]) );
-            names.push_back( std::string( evnames[i]) );
-        }
-        bst->EvalOneIter( iter, mats, names, stderr );
-    }
-    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group ){
-        return static_cast<Booster*>(handle)->Pred( *static_cast<DMatrix*>(dmat), len, bst_group );
-    }
-    void XGBoosterLoadModel( void *handle, const char *fname ){        
-        static_cast<Booster*>(handle)->LoadModel( fname );        
-    } 
-    void XGBoosterSaveModel( const void *handle, const char *fname ){
-        static_cast<const Booster*>(handle)->SaveModel( fname );
-    }
-    void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap ){
-        using namespace xgboost::utils;
-        FILE *fo = FopenCheck( fname, "w" );
-        FeatMap featmap; 
-        if( strlen(fmap) != 0 ){ 
-            featmap.LoadText( fmap );
-        }
-        static_cast<Booster*>(handle)->DumpModel( fo, featmap, false );
-        fclose( fo );
-    }
-
-    void XGBoosterUpdateInteract( void *handle, void *dtrain, const char *action ){
-        Booster *bst = static_cast<Booster*>(handle);
-        DMatrix *dtr = static_cast<DMatrix*>(dtrain);        
-        bst->CheckInit(); dtr->CheckInit(); 
-        std::string act( action );
-        bst->UpdateInteract( act, *dtr );
-    }
-};
-
diff --git a/python/xgboost_python.h b/python/xgboost_python.h
deleted file mode 100644
index 6c113a108..000000000
--- a/python/xgboost_python.h
+++ /dev/null
@@ -1,209 +0,0 @@
-#ifndef XGBOOST_PYTHON_H
-#define XGBOOST_PYTHON_H
-/*!
- * \file xgboost_python.h
- * \author Tianqi Chen
- * \brief python wrapper for xgboost, using ctypes, 
- *        hides everything behind functions
- *      use c style interface
- */
-#include "../booster/xgboost_data.h"
-extern "C"{
-    /*! \brief type of row entry */
-    typedef xgboost::booster::FMatrixS::REntry XGEntry;
-    
-    /*! 
-     * \brief create a data matrix 
-     * \return a new data matrix
-     */
-    void* XGDMatrixCreate(void);
-    /*! 
-     * \brief free space in data matrix
-     */
-    void XGDMatrixFree(void *handle);
-    /*! 
-     * \brief load a data matrix from text file or buffer(if exists)
-     * \param handle a instance of data matrix
-     * \param fname file name 
-     * \param silent print statistics when loading
-     */
-    void XGDMatrixLoad(void *handle, const char *fname, int silent);
-    /*!
-     * \brief load a data matrix into binary file
-     * \param handle a instance of data matrix
-     * \param fname file name 
-     * \param silent print statistics when saving
-     */
-    void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
-    /*! 
-     * \brief set matrix content from csr format
-     * \param handle a instance of data matrix
-     * \param indptr pointer to row headers
-     * \param indices findex
-     * \param data    fvalue
-     * \param nindptr number of rows in the matix + 1 
-     * \param nelem number of nonzero elements in the matrix
-     */
-    void XGDMatrixParseCSR( void *handle, 
-                            const size_t *indptr,
-                            const unsigned *indices,
-                            const float *data,
-                            size_t nindptr,
-                            size_t nelem );
-    /*! 
-     * \brief set matrix content from data content
-     * \param handle a instance of data matrix
-     * \param data pointer to the data space
-     * \param nrow number of rows
-     * \param ncol number columns
-     * \param missing which value to represent missing value
-     */
-    void XGDMatrixParseMat( void *handle, 
-                            const float *data,
-                            size_t nrow,
-                            size_t ncol,
-                            float  missing );
-    /*! 
-     * \brief set label of the training matrix
-     * \param handle a instance of data matrix
-     * \param label pointer to label
-     * \param len length of array
-     */    
-    void XGDMatrixSetLabel( void *handle, const float *label, size_t len );        
-    /*! 
-     * \brief set label of the training matrix
-     * \param handle a instance of data matrix
-     * \param group pointer to group size
-     * \param len length of array
-     */    
-    void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len );        
-    /*! 
-     * \brief set weight of each instacne
-     * \param handle a instance of data matrix
-     * \param weight data pointer to weights
-     * \param len length of array
-     */    
-    void XGDMatrixSetWeight( void *handle, const float *weight, size_t len );        
-    /*! 
-     * \brief get label set from matrix
-     * \param handle a instance of data matrix
-     * \param len used to set result length
-     * \return pointer to the label
-     */
-    const float* XGDMatrixGetLabel( const void *handle, size_t* len );
-    /*! 
-     * \brief get weight set from matrix
-     * \param handle a instance of data matrix
-     * \param len used to set result length
-     * \return pointer to the weight
-     */
-    const float* XGDMatrixGetWeight( const void *handle, size_t* len );
-    /*! 
-     * \brief clear all the records, including feature matrix and label
-     * \param handle a instance of data matrix
-     */
-    void XGDMatrixClear(void *handle);
-    /*! 
-     * \brief return number of rows
-     */    
-    size_t XGDMatrixNumRow(const void *handle);
-    /*! 
-     * \brief add row 
-     * \param handle a instance of data matrix
-     * \param data array of row content 
-     * \param len length of array
-     */
-    void XGDMatrixAddRow(void *handle, const XGEntry *data, size_t len);
-    /*! 
-     * \brief get ridx-th row of sparse matrix
-     * \param handle handle
-     * \param ridx row index 
-     * \param len used to set result length
-     * \reurn pointer to the row
-     */    
-    const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len);
-    
-    // --- start XGBoost class
-    /*! 
-     * \brief create xgboost learner 
-     * \param dmats matrices that are set to be cached
-     * \param create a booster
-     */
-    void *XGBoosterCreate( void* dmats[], size_t len ); 
-    /*! 
-     * \brief free obj in handle 
-     * \param handle handle to be freed
-     */
-    void XGBoosterFree( void* handle ); 
-    /*! 
-     * \brief set parameters 
-     * \param handle handle
-     * \param name  parameter name
-     * \param val value of parameter
-     */    
-    void XGBoosterSetParam( void *handle, const char *name, const char *value );   
-    /*! 
-     * \brief update the model in one round using dtrain
-     * \param handle handle
-     * \param dtrain training data
-     */        
-    void XGBoosterUpdateOneIter( void *handle, void *dtrain );   
-    
-    /*!
-     * \brief update the model, by directly specify gradient and second order gradient, 
-     *        this can be used to replace UpdateOneIter, to support customized loss function
-     * \param handle handle
-     * \param dtrain training data
-     * \param grad gradient statistics
-     * \param hess second order gradient statistics
-     * \param len length of grad/hess array
-     * \param bst_group boost group we are working at, default = -1
-     */
-    void XGBoosterBoostOneIter( void *handle, void *dtrain, 
-                                float *grad, float *hess, size_t len, int bst_group );   
-    /*! 
-     * \brief print evaluation statistics to stdout for xgboost
-     * \param handle handle
-     * \param iter current iteration rounds
-     * \param dmats pointers to data to be evaluated
-     * \param evnames pointers to names of each data
-     * \param len  length of dmats
-     */        
-    void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len );   
-    /*! 
-     * \brief make prediction based on dmat
-     * \param handle handle
-     * \param dmat data matrix
-     * \param len used to store length of returning result
-     * \param bst_group booster group, if model contains multiple booster group, default = -1 means predict for all groups 
-     */    
-    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group );
-    /*! 
-     * \brief load model from existing file
-     * \param handle handle
-     * \param fname file name
-     */    
-    void XGBoosterLoadModel( void *handle, const char *fname );
-    /*! 
-     * \brief save model into existing file
-     * \param handle handle
-     * \param fname file name
-     */    
-    void XGBoosterSaveModel( const void *handle, const char *fname );
-    /*! 
-     * \brief dump model into text file
-     * \param handle handle
-     * \param fname file name
-     * \param fmap  name to fmap can be empty string
-     */    
-    void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap );
-    /*! 
-     * \brief interactively update model: beta
-     * \param handle handle
-     * \param dtrain training data
-     * \param action action name
-     */        
-    void XGBoosterUpdateInteract( void *handle, void *dtrain, const char* action );   
-};
-#endif
-
diff --git a/python/xgboost_wrapper.cpp b/python/xgboost_wrapper.cpp
new file mode 100644
index 000000000..e43095920
--- /dev/null
+++ b/python/xgboost_wrapper.cpp
@@ -0,0 +1,240 @@
+// implementations in ctypes
+#include <cstdio>
+#include <vector>
+#include <string>
+#include <cstring>
+#include <algorithm>
+#include "./xgboost_wrapper.h"
+#include "../src/data.h"
+#include "../src/learner/learner-inl.hpp"
+#include "../src/io/io.h"
+#include "../src/io/simple_dmatrix-inl.hpp"
+
+using namespace xgboost;
+using namespace xgboost::io;
+
+namespace xgboost {
+namespace wrapper {
+// booster wrapper class
+class Booster: public learner::BoostLearner<FMatrixS> {
+ public:
+  explicit Booster(const std::vector<DataMatrix*>& mats) {
+    this->silent = 1;
+    this->SetCacheData(mats);
+  }
+  const float *Pred(const DataMatrix &dmat, size_t *len) {
+    this->Predict(dmat, &this->preds_);
+    *len = this->preds_.size();
+    return &this->preds_[0];
+  }
+  inline void BoostOneIter(const DataMatrix &train,
+                           float *grad, float *hess, size_t len) {
+    this->gpair_.resize(len);
+    const unsigned ndata = static_cast<unsigned>(len);    
+    #pragma omp parallel for schedule(static)
+    for (unsigned j = 0; j < ndata; ++j) {
+      gpair_[j] = bst_gpair(grad[j], hess[j]);
+    }
+    gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
+  }
+  inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
+    model_dump = this->DumpModel(fmap, with_stats);
+    model_dump_cptr.resize(model_dump.size()); 
+    for (size_t i = 0; i < model_dump.size(); ++i) {
+      model_dump_cptr[i] = model_dump[i].c_str();
+    }
+    *len = model_dump.size();
+    return &model_dump_cptr[0];
+  }
+  // temporal fields
+  // temporal data to save evaluation dump
+  std::string eval_str;
+  // temporal space to save model dump
+  std::vector<std::string> model_dump;
+  std::vector<const char*> model_dump_cptr;
+};
+}  // namespace wrapper
+}  // namespace xgboost
+
+using namespace xgboost::wrapper;
+
+extern "C"{
+  void* XGDMatrixCreateFromFile(const char *fname, int silent) {
+    return LoadDataMatrix(fname, silent, false);
+  }
+  void* XGDMatrixCreateFromCSR(const size_t *indptr,
+                               const unsigned *indices,
+                               const float *data,
+                               size_t nindptr,
+                               size_t nelem) {
+    DMatrixSimple *p_mat = new DMatrixSimple();
+    DMatrixSimple &mat = *p_mat; 
+    mat.row_ptr_.resize(nindptr);
+    memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
+    mat.row_data_.resize(nelem);
+    for (size_t i = 0; i < nelem; ++ i) {
+      mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
+      mat.info.num_col = std::max(mat.info.num_col,
+                                  static_cast<size_t>(indices[i]+1));
+    }
+    mat.info.num_row = nindptr - 1;
+    return p_mat;
+  }
+  void* XGDMatrixCreateFromMat(const float *data,
+                               size_t nrow,
+                               size_t ncol,
+                               float  missing) {
+    DMatrixSimple *p_mat = new DMatrixSimple();
+    DMatrixSimple &mat = *p_mat;
+    mat.info.num_row = nrow;
+    mat.info.num_col = ncol;
+    for (size_t i = 0; i < nrow; ++i, data += ncol) {
+      size_t nelem = 0;
+      for (size_t j = 0; j < ncol; ++j) {
+        if (data[j] != missing) {
+          mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
+          ++nelem;
+        }
+      }
+      mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
+    }
+    return p_mat;
+  }
+  void* XGDMatrixSliceDMatrix(void *handle,
+                              const int *idxset,
+                              size_t len) {
+    DMatrixSimple tmp;
+    DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
+    if (dsrc.magic != DMatrixSimple::kMagic) {
+      tmp.CopyFrom(dsrc);
+    }
+    DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
+                       *static_cast<DMatrixSimple*>(handle): tmp);
+    DMatrixSimple *p_ret = new DMatrixSimple();
+    DMatrixSimple &ret = *p_ret;
+
+    utils::Check(src.info.group_ptr.size() == 0,
+                 "slice does not support group structure");
+    ret.Clear();
+    ret.info.num_row = len;
+    ret.info.num_col = src.info.num_col;
+
+    utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();    
+    iter->BeforeFirst();
+    utils::Assert(iter->Next(), "slice");
+    const SparseBatch &batch = iter->Value();
+    for(size_t i = 0; i < len; ++i) {
+      const int ridx = idxset[i];
+      SparseBatch::Inst inst = batch[ridx];
+      utils::Check(ridx < batch.size, "slice index exceed number of rows");
+      ret.row_data_.resize(ret.row_data_.size() + inst.length);      
+      memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
+             sizeof(SparseBatch::Entry) * inst.length);
+      ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
+      if (src.info.labels.size() != 0) {
+        ret.info.labels.push_back(src.info.labels[ridx]);
+      }
+      if (src.info.weights.size() != 0) {
+        ret.info.weights.push_back(src.info.weights[ridx]);
+      }
+      if (src.info.root_index.size() != 0) {
+        ret.info.weights.push_back(src.info.root_index[ridx]);
+      }
+    }
+    return p_ret;
+  }
+  void XGDMatrixFree(void *handle) {
+    delete static_cast<DataMatrix*>(handle);
+  }
+  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {    
+    SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
+  }  
+  void XGDMatrixSetLabel(void *handle, const float *label, size_t len) {
+    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
+    pmat->info.labels.resize(len);
+    memcpy(&(pmat->info).labels[0], label, sizeof(float) * len);
+  }
+  void XGDMatrixSetWeight(void *handle, const float *weight, size_t len) {
+    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
+    pmat->info.weights.resize(len);
+    memcpy(&(pmat->info).weights[0], weight, sizeof(float) * len);
+  }
+  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len){
+    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
+    pmat->info.group_ptr.resize(len + 1);
+    pmat->info.group_ptr[0] = 0;
+    for (size_t i = 0; i < len; ++ i) {
+      pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
+    }
+  }
+  const float* XGDMatrixGetLabel(const void *handle, size_t* len) {
+    const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
+    *len = pmat->info.labels.size();
+    return &(pmat->info.labels[0]);
+  }
+  const float* XGDMatrixGetWeight(const void *handle, size_t* len) {
+    const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
+    *len = pmat->info.weights.size();
+    return &(pmat->info.weights[0]);
+  }
+  size_t XGDMatrixNumRow(const void *handle) {
+    return static_cast<const DataMatrix*>(handle)->info.num_row;
+  }
+
+  // xgboost implementation
+  void *XGBoosterCreate(void *dmats[], size_t len) {
+    std::vector<DataMatrix*> mats;
+    for (size_t i = 0; i < len; ++i) {
+      DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
+      mats.push_back(dtr);
+    }
+    return new Booster(mats);
+  }
+  void XGBoosterFree(void *handle) {
+    delete static_cast<Booster*>(handle);
+  }
+  void XGBoosterSetParam(void *handle, const char *name, const char *value) {
+    static_cast<Booster*>(handle)->SetParam(name, value);
+  }
+  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
+    Booster *bst = static_cast<Booster*>(handle);
+    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInit(dtr);
+    bst->UpdateOneIter(iter, *dtr);
+  }
+  void XGBoosterBoostOneIter(void *handle, void *dtrain, 
+                             float *grad, float *hess, size_t len) {
+    Booster *bst = static_cast<Booster*>(handle);
+    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInit(dtr);
+    bst->BoostOneIter(*dtr, grad, hess, len);
+  }
+  const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], const char *evnames[], size_t len) {
+    Booster *bst = static_cast<Booster*>(handle);    
+    std::vector<std::string> names;
+    std::vector<const DataMatrix*> mats;
+    for (size_t i = 0; i < len; ++i) {
+      mats.push_back(static_cast<DataMatrix*>(dmats[i]));
+      names.push_back(std::string(evnames[i]));
+    }
+    bst->eval_str = bst->EvalOneIter(iter, mats, names);
+    return bst->eval_str.c_str();
+  }
+  const float *XGBoosterPredict(void *handle, void *dmat, size_t *len) {
+    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), len);
+  }
+  void XGBoosterLoadModel(void *handle, const char *fname) {
+    static_cast<Booster*>(handle)->LoadModel(fname);
+  }
+  void XGBoosterSaveModel( const void *handle, const char *fname) {
+    static_cast<const Booster*>(handle)->SaveModel(fname);
+  }
+  const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
+    using namespace xgboost::utils;
+    FeatMap featmap; 
+    if(strlen(fmap) != 0) {
+      featmap.LoadText(fmap);
+    }
+    return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len);
+  }
+};
diff --git a/python/xgboost_wrapper.h b/python/xgboost_wrapper.h
new file mode 100644
index 000000000..16b8fecd7
--- /dev/null
+++ b/python/xgboost_wrapper.h
@@ -0,0 +1,182 @@
+#ifndef XGBOOST_WRAPPER_H_
+#define XGBOOST_WRAPPER_H_
+/*!
+ * \file xgboost_wrapperh
+ * \author Tianqi Chen
+ * \brief a C style wrapper of xgboost
+ *  can be used to create wrapper of other languages
+ */
+#include <cstdio>
+
+extern "C" {
+  /*!
+   * \brief load a data matrix 
+   * \return a loaded data matrix
+   */
+  void* XGDMatrixCreateFromFile(const char *fname, int silent);
+  /*! 
+   * \brief create a matrix content from csr format
+   * \param handle a instance of data matrix
+   * \param indptr pointer to row headers
+   * \param indices findex
+   * \param data fvalue
+   * \param nindptr number of rows in the matix + 1 
+   * \param nelem number of nonzero elements in the matrix
+   * \return created dmatrix
+   */
+  void* XGDMatrixCreateFromCSR(const size_t *indptr,
+                               const unsigned *indices,
+                               const float *data,
+                               size_t nindptr,
+                               size_t nelem);
+  /*!
+   * \brief create matrix content from dense matrix
+   * \param handle a instance of data matrix
+   * \param data pointer to the data space
+   * \param nrow number of rows
+   * \param ncol number columns
+   * \param missing which value to represent missing value
+   * \return created dmatrix
+   */
+  void* XGDMatrixCreateFromMat(const float *data,
+                               size_t nrow,
+                               size_t ncol,
+                               float  missing);
+  /*!
+   * \brief create a new dmatrix from sliced content of existing matrix
+   * \param handle instance of data matrix to be sliced
+   * \param idxset index set
+   * \param len length of index set
+   * \return a sliced new matrix
+   */
+  void* XGDMatrixSliceDMatrix(void *handle,
+                              const int *idxset,
+                              size_t len);
+  /*!
+   * \brief free space in data matrix
+   */
+  void XGDMatrixFree(void *handle);
+  /*!
+   * \brief load a data matrix into binary file
+   * \param handle a instance of data matrix
+   * \param fname file name
+   * \param silent print statistics when saving
+   */
+  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
+  /*!
+   * \brief set label of the training matrix
+   * \param handle a instance of data matrix
+   * \param label pointer to label
+   * \param len length of array
+   */
+  void XGDMatrixSetLabel(void *handle, const float *label, size_t len);
+  /*!
+   * \brief set weight of each instance
+   * \param handle a instance of data matrix
+   * \param weight data pointer to weights
+   * \param len length of array
+   */
+  void XGDMatrixSetWeight(void *handle, const float *weight, size_t len);
+  /*!
+   * \brief set label of the training matrix
+   * \param handle a instance of data matrix
+   * \param group pointer to group size
+   * \param len length of array
+   */
+  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
+  /*!
+   * \brief get label set from matrix
+   * \param handle a instance of data matrix
+   * \param len used to set result length
+   * \return pointer to the label
+   */
+  const float* XGDMatrixGetLabel(const void *handle, size_t* out_len);
+  /*!
+   * \brief get weight set from matrix
+   * \param handle a instance of data matrix
+   * \param len used to set result length
+   * \return pointer to the weight
+   */
+  const float* XGDMatrixGetWeight(const void *handle, size_t* out_len);
+  /*!
+   * \brief return number of rows
+   */
+  size_t XGDMatrixNumRow(const void *handle);
+  // --- start XGBoost class
+  /*! 
+   * \brief create xgboost learner 
+   * \param dmats matrices that are set to be cached
+   * \param len length of dmats
+   */
+  void *XGBoosterCreate(void* dmats[], size_t len);
+  /*! 
+   * \brief free obj in handle 
+   * \param handle handle to be freed
+   */
+  void XGBoosterFree(void* handle);
+  /*! 
+   * \brief set parameters 
+   * \param handle handle
+   * \param name  parameter name
+   * \param val value of parameter
+   */    
+  void XGBoosterSetParam(void *handle, const char *name, const char *value);
+  /*! 
+   * \brief update the model in one round using dtrain
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dtrain training data
+   */        
+  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
+  /*!
+   * \brief update the model, by directly specify gradient and second order gradient,
+   *        this can be used to replace UpdateOneIter, to support customized loss function
+   * \param handle handle
+   * \param dtrain training data
+   * \param grad gradient statistics
+   * \param hess second order gradient statistics
+   * \param len length of grad/hess array
+   */
+  void XGBoosterBoostOneIter(void *handle, void *dtrain,
+                             float *grad, float *hess, size_t len);
+  /*! 
+   * \brief get evaluation statistics for xgboost
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dmats pointers to data to be evaluated
+   * \param evnames pointers to names of each data
+   * \param len length of dmats
+   * \return the string containing evaluation stati
+   */        
+  const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
+                                   const char *evnames[], size_t len);
+  /*!
+   * \brief make prediction based on dmat
+   * \param handle handle
+   * \param dmat data matrix
+   * \param len used to store length of returning result
+   */
+  const float *XGBoosterPredict(void *handle, void *dmat, size_t *len);
+  /*!
+   * \brief load model from existing file
+   * \param handle handle
+   * \param fname file name
+   */
+  void XGBoosterLoadModel(void *handle, const char *fname);
+  /*!
+   * \brief save model into existing file
+   * \param handle handle
+   * \param fname file name
+   */
+  void XGBoosterSaveModel(const void *handle, const char *fname);
+  /*!
+   * \brief dump model, return array of strings representing model dump
+   * \param handle handle
+   * \param fmap  name to fmap can be empty string
+   * \param out_len length of output array
+   * \return char *data[], representing dump of each model
+   */
+  const char** XGBoosterDumpModel(void *handle, const char *fmap,
+                                  size_t *out_len);
+};
+#endif  // XGBOOST_WRAPPER_H_
diff --git a/src/README.md b/src/README.md
new file mode 100644
index 000000000..35d9b08e8
--- /dev/null
+++ b/src/README.md
@@ -0,0 +1,25 @@
+Coding Guide
+======
+
+Project Logical Layout
+=======
+* Dependency order: io->learner->gbm->tree
+  - All module depends on data.h
+* tree are implementations of tree construction algorithms.
+* gbm is gradient boosting interface, that takes trees and other base learner to do boosting.
+  - gbm only takes gradient as sufficient statistics, it does not compute the gradient.
+* learner is learning module that computes gradient for specific object, and pass it to GBM
+
+File Naming Convention
+======= 
+* The project is templatized, to make it easy to adjust input data structure.
+* .h files are data structures and interface, which are needed to use functions in that layer.
+* -inl.hpp files are implementations of interface, like cpp file in most project.
+  - You only need to understand the interface file to understand the usage of that layer
+
+How to Hack the Code
+======
+* Add objective function: add to learner/objective-inl.hpp and register it in learner/objective.h ```CreateObjFunction``` 
+  - You can also directly do it in python
+* Add new evaluation metric: add to learner/evaluation-inl.hpp and register it in learner/evaluation.h ```CreateEvaluator``` 
+* Add wrapper for a new language, most likely you can do it by taking the functions in python/xgboost_wrapper.h, which is purely C based, and call these C functions to use xgboost
diff --git a/src/data.h b/src/data.h
index fe81b4dad..c60b58b8a 100644
--- a/src/data.h
+++ b/src/data.h
@@ -226,8 +226,12 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     if (this->HaveColAccess()) return;
     this->InitColData(max_nrow);
   }
-  /*! \brief get the row iterator associated with FMatrix */
+  /*! 
+   * \brief get the row iterator associated with FMatrix 
+   *  this function is not threadsafe, returns iterator stored in FMatrixS
+   */
   inline utils::IIterator<SparseBatch>* RowIterator(void) const {
+    iter_->BeforeFirst();
     return iter_;
   }
   /*! \brief set iterator */
diff --git a/src/io/io.cpp b/src/io/io.cpp
index 2cf42aadf..4ddf61eb0 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -2,6 +2,7 @@
 #define _CRT_SECURE_NO_DEPRECATE
 #include <string>
 #include "./io.h"
+#include "../utils/utils.h"
 #include "simple_dmatrix-inl.hpp"
 // implements data loads using dmatrix simple for now
 
@@ -12,5 +13,10 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
   dmat->CacheLoad(fname, silent, savebuffer);
   return dmat;
 }
+
+void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
+  utils::Error("not implemented");
+}
+
 }  // namespace io
 }  // namespace xgboost
diff --git a/src/io/io.h b/src/io/io.h
index d6d280d5e..211893509 100644
--- a/src/io/io.h
+++ b/src/io/io.h
@@ -28,8 +28,9 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent = false, bool savebuff
  *  SaveDMatrix will choose the best way to materialize the dmatrix.
  * \param dmat the dmatrix to be saved
  * \param fname file name to be savd
+ * \param silent whether print message during saving
  */
-void SaveDMatrix(const DataMatrix &dmat, const char *fname);  
+void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false);  
 
 }  // namespace io
 }  // namespace xgboost
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index 5da6d1c0b..f996b8d8c 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -23,7 +23,7 @@ namespace io {
 class DMatrixSimple : public DataMatrix {
  public:
   // constructor
-  DMatrixSimple(void) {
+  DMatrixSimple(void) : DataMatrix(kMagic) {
     this->fmat.set_iter(new OneBatchIter(this));
     this->Clear();
   }
@@ -36,6 +36,24 @@ class DMatrixSimple : public DataMatrix {
     row_data_.clear();
     info.Clear();
   }
+  /*! \brief copy content data from source matrix */
+  inline void CopyFrom(const DataMatrix &src) {
+    this->info = src.info;
+    this->Clear();
+    // clone data content in thos matrix
+    utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
+    iter->BeforeFirst();
+    while (iter->Next()) {
+      const SparseBatch &batch = iter->Value();
+      for (size_t i = 0; i < batch.size; ++i) {
+        SparseBatch::Inst inst = batch[i];
+        row_data_.resize(row_data_.size() + inst.length);
+        memcpy(&row_data_[row_ptr_.back()], inst.data,
+               sizeof(SparseBatch::Entry) * inst.length);
+        row_ptr_.push_back(row_ptr_.back() + inst.length);
+      }
+    }
+  }
   /*!
    * \brief add a row to the matrix
    * \param feats features
@@ -183,7 +201,7 @@ class DMatrixSimple : public DataMatrix {
  protected:
   // one batch iterator that return content in the matrix
   struct OneBatchIter: utils::IIterator<SparseBatch> {
-    OneBatchIter(DMatrixSimple *parent)
+    explicit OneBatchIter(DMatrixSimple *parent)
         : at_first_(true), parent_(parent) {}
     virtual ~OneBatchIter(void) {}
     virtual void BeforeFirst(void) {
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index 88a865399..b558b070b 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -6,6 +6,7 @@
  *        used for regression/classification/ranking
  * \author Tianqi Chen
  */
+#include <vector>
 #include "../data.h"
 
 namespace xgboost {
@@ -43,7 +44,7 @@ struct MetaInfo {
   }
   /*! \brief get weight of each instances */
   inline float GetWeight(size_t i) const {
-    if(weights.size() != 0) {
+    if (weights.size() != 0) {
       return weights[i];
     } else {
       return 1.0f;
@@ -51,7 +52,7 @@ struct MetaInfo {
   }
   /*! \brief get root index of i-th instance */
   inline float GetRoot(size_t i) const {
-    if(root_index.size() != 0) {
+    if (root_index.size() != 0) {
       return static_cast<float>(root_index[i]);
     } else {
       return 0;
@@ -76,7 +77,7 @@ struct MetaInfo {
   // try to load group information from file, if exists
   inline bool TryLoadGroup(const char* fname, bool silent = false) {
     FILE *fi = fopen64(fname, "r");
-    if (fi == NULL) return false;       
+    if (fi == NULL) return false;
     group_ptr.push_back(0);
     unsigned nline;
     while (fscanf(fi, "%u", &nline) == 1) {
@@ -110,6 +111,11 @@ struct MetaInfo {
  */
 template<typename FMatrix>
 struct DMatrix {
+  /*! 
+   * \brief magic number associated with this object 
+   *    used to check if it is specific instance
+   */
+  const int magic;
   /*! \brief meta information about the dataset */
   MetaInfo info;
   /*! \brief feature matrix about data content */
@@ -120,7 +126,7 @@ struct DMatrix {
    */
   void *cache_learner_ptr_;
   /*! \brief default constructor */
-  DMatrix(void) : cache_learner_ptr_(NULL) {}
+  explicit DMatrix(int magic) : magic(magic), cache_learner_ptr_(NULL) {}
   // virtual destructor
   virtual ~DMatrix(void){}
 };
diff --git a/src/learner/evaluation.h b/src/learner/evaluation.h
index d51e5b767..fa25aa7d7 100644
--- a/src/learner/evaluation.h
+++ b/src/learner/evaluation.h
@@ -39,7 +39,7 @@ inline IEvaluator* CreateEvaluator(const char *name) {
   if (!strcmp(name, "merror")) return new EvalMatchError();
   if (!strcmp(name, "logloss")) return new EvalLogLoss();
   if (!strcmp(name, "auc")) return new EvalAuc();
-  if (!strncmp(name, "ams@",4)) return new EvalAMS(name);
+  if (!strncmp(name, "ams@", 4)) return new EvalAMS(name);
   if (!strncmp(name, "pre@", 4)) return new EvalPrecision(name);
   if (!strncmp(name, "map", 3)) return new EvalMAP(name);
   if (!strncmp(name, "ndcg", 3)) return new EvalNDCG(name);
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 3c04837c3..d7ad3f71d 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -78,6 +78,7 @@ class BoostLearner {
   inline void SetParam(const char *name, const char *val) {
     if (!strcmp(name, "silent")) silent = atoi(val);
     if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
+    if (!strcmp("seed", name)) random::Seed(atoi(val));
     if (gbm_ == NULL) {
       if (!strcmp(name, "objective")) name_obj_ = val;
       if (!strcmp(name, "booster")) name_gbm_ = val;
@@ -132,16 +133,24 @@ class BoostLearner {
     utils::FileStream fo(utils::FopenCheck(fname, "wb"));
     this->SaveModel(fo);
     fo.Close();
-  }  
+  }
+  /*!
+   * \brief check if data matrix is ready to be used by training,
+   *  if not intialize it
+   * \param p_train pointer to the matrix used by training
+   */
+  inline void CheckInit(DMatrix<FMatrix> *p_train) const {
+    p_train->fmat.InitColAccess();
+  }
   /*!
    * \brief update the model for one iteration
    * \param iter current iteration number
    * \param p_train pointer to the data matrix
    */
-  inline void UpdateOneIter(int iter, DMatrix<FMatrix> *p_train) {
-    this->PredictRaw(*p_train, &preds_);
-    obj_->GetGradient(preds_, p_train->info, iter, &gpair_);
-    gbm_->DoBoost(gpair_, p_train->fmat, p_train->info.root_index);
+  inline void UpdateOneIter(int iter, const DMatrix<FMatrix> &train) {
+    this->PredictRaw(train, &preds_);
+    obj_->GetGradient(preds_, train.info, iter, &gpair_);
+    gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
   }
   /*!
    * \brief evaluate the model for specific iteration
diff --git a/src/xgboost_main.cpp b/src/xgboost_main.cpp
index 16139f0d8..f3fc9201d 100644
--- a/src/xgboost_main.cpp
+++ b/src/xgboost_main.cpp
@@ -48,7 +48,6 @@ class BoostLearnTask{
   inline void SetParam(const char *name, const char *val) {
     if (!strcmp("silent", name)) silent = atoi(val);
     if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
-    if (!strcmp("seed", name)) random::Seed(atoi(val));
     if (!strcmp("num_round", name)) num_round = atoi(val);
     if (!strcmp("save_period", name)) save_period = atoi(val);
     if (!strcmp("eval_train", name)) eval_train = atoi(val);
@@ -103,9 +102,6 @@ class BoostLearnTask{
     } else {
       // training
       data = io::LoadDataMatrix(train_path.c_str(), silent != 0, use_buffer != 0);
-      {// intialize column access
-        data->fmat.InitColAccess();
-      }
       utils::Assert(eval_data_names.size() == eval_data_paths.size(), "BUG");
       for (size_t i = 0; i < eval_data_names.size(); ++i) {
         deval.push_back(io::LoadDataMatrix(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0));
@@ -139,10 +135,11 @@ class BoostLearnTask{
   inline void TaskTrain(void) {
     const time_t start = time(NULL);
     unsigned long elapsed = 0;
+    learner.CheckInit(data);
     for (int i = 0; i < num_round; ++i) {
       elapsed = (unsigned long)(time(NULL) - start);
       if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
-      learner.UpdateOneIter(i,data); 
+      learner.UpdateOneIter(i, *data); 
       std::string res = learner.EvalOneIter(i, devalall, eval_data_names);
       fprintf(stderr, "%s\n", res.c_str());
       if (save_period != 0 && (i + 1) % save_period == 0) {

From 301685e0a431e11fdf05286c95d7394733c651c3 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 17 Aug 2014 18:43:25 -0700
Subject: [PATCH 10/52] python module pass basic test

---
 python/example/demo.py        |  49 +++-----
 python/xgboost.py             | 212 ++++++++++++++++++++--------------
 python/xgboost_wrapper.cpp    |  17 +++
 src/data.h                    |   2 +-
 src/io/io.cpp                 |   7 +-
 src/io/simple_dmatrix-inl.hpp |   2 +-
 src/learner/dmatrix.h         |   2 +-
 7 files changed, 170 insertions(+), 121 deletions(-)

diff --git a/python/example/demo.py b/python/example/demo.py
index 389f139ff..e14c806aa 100755
--- a/python/example/demo.py
+++ b/python/example/demo.py
@@ -17,36 +17,17 @@ param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logisti
 # specify validations set to watch performance
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 num_round = 2
-bst = xgb.train( param, dtrain, num_round, evallist )
+bst = xgb.train(param, dtrain, num_round, evallist)
 
 # this is prediction
-preds = bst.predict( dtest )
+preds = bst.predict(dtest)
 labels = dtest.get_label()
 print ('error=%f' % (  sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
 bst.save_model('0001.model')
 # dump model
 bst.dump_model('dump.raw.txt')
 # dump model with feature map
-bst.dump_model('dump.raw.txt','featmap.txt')
-
-###
-# build dmatrix in python iteratively
-#
-print ('start running example of build DMatrix in python')
-dtrain = xgb.DMatrix()
-labels = []
-for l in open('agaricus.txt.train'):
-    arr = l.split()
-    labels.append( int(arr[0]))
-    feats = []
-    for it in arr[1:]:
-        k,v = it.split(':')
-        feats.append( (int(k), float(v)) )
-    dtrain.add_row( feats )
-dtrain.set_label( labels )
-evallist  = [(dtest,'eval'), (dtrain,'train')]
-
-bst = xgb.train( param, dtrain, num_round, evallist )
+bst.dump_model('dump.nice.txt','featmap.txt')
 
 ###
 # build dmatrix from scipy.sparse
@@ -61,7 +42,6 @@ for l in open('agaricus.txt.train'):
         k,v = it.split(':')
         row.append(i); col.append(int(k)); dat.append(float(v))
     i += 1
-
 csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
 dtrain = xgb.DMatrix( csr )
 dtrain.set_label(labels)
@@ -71,7 +51,7 @@ bst = xgb.train( param, dtrain, num_round, evallist )
 print ('start running example of build DMatrix from numpy array')
 # NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
 npymat = csr.todense()
-dtrain = xgb.DMatrix( npymat )
+dtrain = xgb.DMatrix( npymat)
 dtrain.set_label(labels)
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
@@ -81,16 +61,25 @@ bst = xgb.train( param, dtrain, num_round, evallist )
 # 
 print ('start running example to used cutomized objective function')
 
-# note: set objective= binary:logistic means the prediction will get logistic transformed
-#       in most case, we may want to leave it as default
-param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
+# note: for customized objective function, we leave objective as default
+# note: what we are getting is margin value in prediction
+# you must know what you are doing
+param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
 
 # user define objective function, given prediction, return gradient and second order gradient
-def logregobj( preds, dtrain ):
+# this is loglikelihood loss
+def logregobj(preds, dtrain):
     labels = dtrain.get_label()
+    preds = 1.0 / (1.0 + np.exp(-preds))
     grad = preds - labels
     hess = preds * (1.0-preds)
     return grad, hess
 
-# training with customized objective, we can also do step by step training, simply look at xgboost.py's implementation of train
-bst = xgb.train( param, dtrain, num_round, evallist, logregobj )
+# user defined evaluation function, return a pair metric_name, result
+def evalerror(preds, dtrain):
+    labels = dtrain.get_label()
+    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
+
+# training with customized objective, we can also do step by step training
+# simply look at xgboost.py's implementation of train
+bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
diff --git a/python/xgboost.py b/python/xgboost.py
index 2e8deefa8..c7a04d4c3 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -4,6 +4,7 @@ import ctypes
 import os
 # optinally have scipy sparse, though not necessary
 import numpy
+import sys
 import numpy.ctypeslib 
 import scipy.sparse as scp
 
@@ -13,33 +14,39 @@ XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
 # load in xgboost library
 xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
 
-xglib.XGDMatrixCreate.restype = ctypes.c_void_p
+xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
+xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
+xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
+xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
+xglib.XGDMatrixGetLabel.restype = ctypes.POINTER(ctypes.c_float)
+xglib.XGDMatrixGetWeight.restype = ctypes.POINTER(ctypes.c_float)
 xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
-xglib.XGDMatrixGetLabel.restype =  ctypes.POINTER( ctypes.c_float )
-xglib.XGDMatrixGetWeight.restype =  ctypes.POINTER( ctypes.c_float )
-xglib.XGDMatrixGetRow.restype = ctypes.POINTER( REntry )
-xglib.XGBoosterCreate.restype = ctypes.c_void_p
-xglib.XGBoosterPredict.restype = ctypes.POINTER( ctypes.c_float ) 
 
-def ctypes2numpy( cptr, length ):
+xglib.XGBoosterCreate.restype = ctypes.c_void_p
+xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
+xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
+xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
+
+
+def ctypes2numpy(cptr, length):
     # convert a ctypes pointer array to numpy
-    assert isinstance( cptr, ctypes.POINTER( ctypes.c_float ) )
-    res = numpy.zeros( length, dtype='float32' )
-    assert ctypes.memmove( res.ctypes.data, cptr, length * res.strides[0] )
+    assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
+    res = numpy.zeros(length, dtype='float32')
+    assert ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0])
     return res
 
 # data matrix used in xgboost
 class DMatrix:
     # constructor
-    def __init__(self, data=None, label=None, missing=0.0, weight = None):
+    def __init__(self, data, label=None, missing=0.0, weight = None):
         # force into void_p, mac need to pass things in as void_p
-        self.handle = ctypes.c_void_p( xglib.XGDMatrixCreate() )
         if data == None:
+            self.handle = None
             return
-        if isinstance(data,str):
-            xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data.encode('utf-8')), 1)             
-        elif isinstance(data,scp.csr_matrix):
-            self.__init_from_csr(data)
+        if isinstance(data, str):
+            self.handle = xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1)
+        elif isinstance(data, scp.csr_matrix):
+            self.__init_from_csr(data)            
         elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
             self.__init_from_npy2d(data, missing)
         else:
@@ -52,77 +59,68 @@ class DMatrix:
             self.set_label(label)
         if weight !=None:
             self.set_weight(weight)
-
     # convert data from csr matrix
-    def __init_from_csr(self,csr):
+    def __init_from_csr(self, csr):
         assert len(csr.indices) == len(csr.data)
-        xglib.XGDMatrixParseCSR( self.handle, 
-                                 ( ctypes.c_ulong  * len(csr.indptr) )(*csr.indptr),
-                                 ( ctypes.c_uint  * len(csr.indices) )(*csr.indices),
-                                 ( ctypes.c_float * len(csr.data) )(*csr.data),
-                                 len(csr.indptr), len(csr.data) )
+        self.handle = xglib.XGDMatrixCreateFromCSR(
+            (ctypes.c_ulong  * len(csr.indptr))(*csr.indptr),
+            (ctypes.c_uint  * len(csr.indices))(*csr.indices),
+            (ctypes.c_float * len(csr.data))(*csr.data),
+            len(csr.indptr), len(csr.data))
     # convert data from numpy matrix
     def __init_from_npy2d(self,mat,missing):
-        data = numpy.array( mat.reshape(mat.size), dtype='float32' )
-        xglib.XGDMatrixParseMat( self.handle, 
-                                 data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), 
-                                 mat.shape[0], mat.shape[1], ctypes.c_float(missing) )
+        data = numpy.array(mat.reshape(mat.size), dtype='float32')
+        self.handle = xglib.XGDMatrixCreateFromMat(
+            data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
+            mat.shape[0], mat.shape[1], ctypes.c_float(missing))
     # destructor
     def __del__(self):
-        xglib.XGDMatrixFree(self.handle)    
-    # load data from file 
-    def load(self, fname, silent=True):
-        xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
+        xglib.XGDMatrixFree(self.handle)
     # load data from file 
     def save_binary(self, fname, silent=True):
         xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
     # set label of dmatrix
     def set_label(self, label):
-        xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) )
+        xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label))
     # set group size of dmatrix, used for rank
     def set_group(self, group):
-        xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group) )
+        xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
     # set weight of each instances
     def set_weight(self, weight):
-        xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_float*len(weight))(*weight), len(weight) )
+        xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_float*len(weight))(*weight), len(weight))
     # get label from dmatrix
     def get_label(self):
         length = ctypes.c_ulong()
         labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length))
-        return ctypes2numpy( labels, length.value );
+        return ctypes2numpy(labels, length.value)
     # get weight from dmatrix
     def get_weight(self):
         length = ctypes.c_ulong()
         weights = xglib.XGDMatrixGetWeight(self.handle, ctypes.byref(length))
-        return ctypes2numpy( weights, length.value );
-    # clear everything
-    def clear(self):
-        xglib.XGDMatrixClear(self.handle)
+        return ctypes2numpy(weights, length.value)
     def num_row(self):
         return xglib.XGDMatrixNumRow(self.handle)
-    # append a row to DMatrix
-    def add_row(self, row):
-        xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row) )
-    # get n-throw from DMatrix
-    def __getitem__(self, ridx):
-        length = ctypes.c_ulong()
-        row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
-        return [ (int(row[i].findex),row[i].fvalue) for i in range(length.value) ]
+    # slice the DMatrix to return a new DMatrix that only contains rindex
+    def slice(self, rindex):
+        res = DMatrix(None)
+        res.handle = xglib.XGDMatrixSliceDMatrix(
+            self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex))
+        return res
 
 class Booster:
     """learner class """
     def __init__(self, params={}, cache=[]):
         """ constructor, param: """    
         for d in cache:
-            assert isinstance(d,DMatrix)
-        dmats = ( ctypes.c_void_p  * len(cache) )(*[ d.handle for d in cache])
-        self.handle = ctypes.c_void_p( xglib.XGBoosterCreate( dmats, len(cache) ) )
-        self.set_param( {'seed':0} )
-        self.set_param( params )
+            assert isinstance(d, DMatrix)
+        dmats = (ctypes.c_void_p  * len(cache))(*[ d.handle for d in cache])
+        self.handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, len(cache)))
+        self.set_param({'seed':0})
+        self.set_param(params)
     def __del__(self):
         xglib.XGBoosterFree(self.handle) 
     def set_param(self, params, pv=None):
-        if isinstance(params,dict):
+        if isinstance(params, dict):
             for k, v in params.items():
                 xglib.XGBoosterSetParam(
                     self.handle, ctypes.c_char_p(k.encode('utf-8')), 
@@ -130,72 +128,112 @@ class Booster:
         elif isinstance(params,str) and pv != None:
             xglib.XGBoosterSetParam(
                 self.handle, ctypes.c_char_p(params.encode('utf-8')),
-                ctypes.c_char_p(str(pv).encode('utf-8')) )
+                ctypes.c_char_p(str(pv).encode('utf-8')))
         else:
             for k, v in params:
                 xglib.XGBoosterSetParam(
                     self.handle, ctypes.c_char_p(k.encode('utf-8')),
-                    ctypes.c_char_p(str(v).encode('utf-8')) )             
-    def update(self, dtrain):
+                    ctypes.c_char_p(str(v).encode('utf-8')))
+    def update(self, dtrain, it):
         """ update """
         assert isinstance(dtrain, DMatrix)
-        xglib.XGBoosterUpdateOneIter( self.handle, dtrain.handle )
-    def boost(self, dtrain, grad, hess, bst_group = -1):
+        xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
+    def boost(self, dtrain, grad, hess):
         """ update """
         assert len(grad) == len(hess)
         assert isinstance(dtrain, DMatrix)
-        xglib.XGBoosterBoostOneIter( self.handle, dtrain.handle,
-                                     (ctypes.c_float*len(grad))(*grad),
-                                     (ctypes.c_float*len(hess))(*hess),
-                                     len(grad), bst_group )
-    def update_interact(self, dtrain, action, booster_index=None):
-        """ beta: update with specified action"""
-        assert isinstance(dtrain, DMatrix)
-        if booster_index != None:
-            self.set_param('interact:booster_index', str(booster_index))
-        xglib.XGBoosterUpdateInteract(
-            self.handle, dtrain.handle, ctypes.c_char_p(str(action)) )
+        xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
+                                    (ctypes.c_float*len(grad))(*grad),
+                                    (ctypes.c_float*len(hess))(*hess),
+                                    len(grad))
     def eval_set(self, evals, it = 0):
         for d in evals:
             assert isinstance(d[0], DMatrix)
             assert isinstance(d[1], str)
-        dmats = ( ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
-        evnames = ( ctypes.c_char_p * len(evals) )(
-            *[ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
-        xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) )
-    def eval(self, mat, name = 'eval', it = 0 ):
-        self.eval_set( [(mat,name)], it)
-    def predict(self, data, bst_group = -1):
+        dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
+        evnames = (ctypes.c_char_p * len(evals))(
+            * [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
+        return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))        
+    def eval(self, mat, name = 'eval', it = 0):
+        return self.eval_set( [(mat,name)], it)
+    def predict(self, data):
         length = ctypes.c_ulong()
-        preds = xglib.XGBoosterPredict( self.handle, data.handle, ctypes.byref(length), bst_group)
-        return ctypes2numpy( preds, length.value )
+        preds = xglib.XGBoosterPredict(self.handle, data.handle, ctypes.byref(length))
+        return ctypes2numpy(preds, length.value)
     def save_model(self, fname):
         """ save model to file """
         xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
     def load_model(self, fname):
         """load model from file"""
         xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
-    def dump_model(self, fname, fmap=''):
+    def dump_model(self, fo, fmap=''):
         """dump model into text file"""
-        xglib.XGBoosterDumpModel(
-            self.handle, ctypes.c_char_p(fname.encode('utf-8')), 
-            ctypes.c_char_p(fmap.encode('utf-8')))
+        if isinstance(fo,str):            
+            fo = open(fo,'w')
+            need_close = True
+        else:
+            need_close = False
+        ret = self.get_dump(fmap)
+        for i in range(len(ret)):
+            fo.write('booster[%d]:\n' %i)
+            fo.write( ret[i] )
+        if need_close:
+            fo.close()
+    def get_dump(self, fmap=''):
+        """get dump of model as list of strings """
+        length = ctypes.c_ulong()
+        sarr = xglib.XGBoosterDumpModel(self.handle, ctypes.c_char_p(fmap.encode('utf-8')), ctypes.byref(length))
+        res = []
+        for i in range(length.value):
+            res.append( str(sarr[i]) )
+        return res
+    def get_fscore(self, fmap=''):
+        """ get feature importance of each feature """
+        trees = self.get_dump(fmap)
+        fmap = {}
+        for tree in trees:
+            print tree
+            for l in tree.split('\n'):
+                arr = l.split('[')
+                if len(arr) == 1:
+                    continue
+                fid = arr[1].split(']')[0]
+                fid = fid.split('<')[0]
+                if fid not in fmap:
+                    fmap[fid] = 1
+                else:
+                    fmap[fid]+= 1
+        return fmap
 
-def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
+def evaluate(bst, evals, it, feval = None):
+    """evaluation on eval set"""
+    if feval != None:
+        res = '[%d]' % it
+        for dm, evname in evals:
+            name, val = feval(bst.predict(dm), dm)
+            res += '\t%s-%s:%f' % (evname, name, val)
+    else:
+        res = bst.eval_set(evals, it)
+
+    return res
+
+def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
     """ train a booster with given paramaters """
     bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
     if obj == None:
         for i in range(num_boost_round):
-            bst.update( dtrain )
+            bst.update( dtrain, i )
             if len(evals) != 0:
-                bst.eval_set( evals, i )
+                sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
     else:
+        if len(evals) != 0 and feval == None:
+            print 'you need to provide your own evaluation function'
+
         # try customized objective function
         for i in range(num_boost_round):
             pred = bst.predict( dtrain )
             grad, hess = obj( pred, dtrain )
             bst.boost( dtrain, grad, hess )
             if len(evals) != 0:
-                bst.eval_set( evals, i )        
+                sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
     return bst
-
diff --git a/python/xgboost_wrapper.cpp b/python/xgboost_wrapper.cpp
index e43095920..478d74936 100644
--- a/python/xgboost_wrapper.cpp
+++ b/python/xgboost_wrapper.cpp
@@ -20,9 +20,11 @@ class Booster: public learner::BoostLearner<FMatrixS> {
  public:
   explicit Booster(const std::vector<DataMatrix*>& mats) {
     this->silent = 1;
+    this->init_model = false;
     this->SetCacheData(mats);
   }
   const float *Pred(const DataMatrix &dmat, size_t *len) {
+    this->CheckInitModel();
     this->Predict(dmat, &this->preds_);
     *len = this->preds_.size();
     return &this->preds_[0];
@@ -37,6 +39,15 @@ class Booster: public learner::BoostLearner<FMatrixS> {
     }
     gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
   }
+  inline void CheckInitModel(void) {
+    if (!init_model) {
+      this->InitModel(); init_model = true;
+    }  
+  }
+  inline void LoadModel(const char *fname) {
+    learner::BoostLearner<FMatrixS>::LoadModel(fname);
+    this->init_model = true;
+  }
   inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
     model_dump = this->DumpModel(fmap, with_stats);
     model_dump_cptr.resize(model_dump.size()); 
@@ -52,6 +63,9 @@ class Booster: public learner::BoostLearner<FMatrixS> {
   // temporal space to save model dump
   std::vector<std::string> model_dump;
   std::vector<const char*> model_dump_cptr;
+
+ private:
+  bool init_model;
 };
 }  // namespace wrapper
 }  // namespace xgboost
@@ -199,6 +213,7 @@ extern "C"{
   void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
     Booster *bst = static_cast<Booster*>(handle);
     DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInitModel();
     bst->CheckInit(dtr);
     bst->UpdateOneIter(iter, *dtr);
   }
@@ -206,6 +221,7 @@ extern "C"{
                              float *grad, float *hess, size_t len) {
     Booster *bst = static_cast<Booster*>(handle);
     DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInitModel();
     bst->CheckInit(dtr);
     bst->BoostOneIter(*dtr, grad, hess, len);
   }
@@ -217,6 +233,7 @@ extern "C"{
       mats.push_back(static_cast<DataMatrix*>(dmats[i]));
       names.push_back(std::string(evnames[i]));
     }
+    bst->CheckInitModel();
     bst->eval_str = bst->EvalOneIter(iter, mats, names);
     return bst->eval_str.c_str();
   }
diff --git a/src/data.h b/src/data.h
index c60b58b8a..df43551e3 100644
--- a/src/data.h
+++ b/src/data.h
@@ -242,7 +242,7 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
    * \brief save column access data into stream
    * \param fo output stream to save to
    */
-  inline void SaveColAccess(utils::IStream &fo) {
+  inline void SaveColAccess(utils::IStream &fo) const {
     fo.Write(&num_buffered_row_, sizeof(num_buffered_row_));
     if (num_buffered_row_ != 0) {
       SaveBinary(fo, col_ptr_, col_data_);
diff --git a/src/io/io.cpp b/src/io/io.cpp
index 4ddf61eb0..7689a4560 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -15,7 +15,12 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
 }
 
 void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
-  utils::Error("not implemented");
+  if (dmat.magic == DMatrixSimple::kMagic){
+    const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
+    p_dmat->SaveBinary(fname, silent);
+  } else {
+    utils::Error("not implemented");
+  }
 }
 
 }  // namespace io
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index f996b8d8c..b8b15adce 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -148,7 +148,7 @@ class DMatrixSimple : public DataMatrix {
    * \param fname name of binary data
    * \param silent whether print information or not
    */
-  inline void SaveBinary(const char* fname, bool silent = false) {
+  inline void SaveBinary(const char* fname, bool silent = false) const {
     utils::FileStream fs(utils::FopenCheck(fname, "wb"));
     int magic = kMagic;
     fs.Write(&magic, sizeof(magic));
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index b558b070b..144b1a44e 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -58,7 +58,7 @@ struct MetaInfo {
       return 0;
     }
   }
-  inline void SaveBinary(utils::IStream &fo) {
+  inline void SaveBinary(utils::IStream &fo) const {
     fo.Write(&num_row, sizeof(num_row));
     fo.Write(&num_col, sizeof(num_col));
     fo.Write(labels);

From e77df138157916f716845996f07d855e4f5c221c Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 17 Aug 2014 18:49:54 -0700
Subject: [PATCH 11/52] ok

---
 python/example/demo.py | 7 +++++++
 python/xgboost.py      | 3 ---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/python/example/demo.py b/python/example/demo.py
index e14c806aa..a099f56bf 100755
--- a/python/example/demo.py
+++ b/python/example/demo.py
@@ -76,8 +76,15 @@ def logregobj(preds, dtrain):
     return grad, hess
 
 # user defined evaluation function, return a pair metric_name, result
+# NOTE: when you do customized loss function, the default prediction value is margin
+# this may make buildin evalution metric not function properly
+# for example, we are doing logistic loss, the prediction is score before logistic transformation
+# the buildin evaluation error assumes input is after logistic transformation
+# Take this in mind when you use the customization, and maybe you need write customized evaluation function
 def evalerror(preds, dtrain):
     labels = dtrain.get_label()
+    # return a pair metric_name, result
+    # since preds are margin(before logistic transformation, cutoff at 0)
     return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
 
 # training with customized objective, we can also do step by step training
diff --git a/python/xgboost.py b/python/xgboost.py
index c7a04d4c3..f47642898 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -226,9 +226,6 @@ def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None
             if len(evals) != 0:
                 sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
     else:
-        if len(evals) != 0 and feval == None:
-            print 'you need to provide your own evaluation function'
-
         # try customized objective function
         for i in range(num_boost_round):
             pred = bst.predict( dtrain )

From 9df8bb13973b6f8cd37902dd4c69ff3129e7f647 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 17 Aug 2014 19:16:17 -0700
Subject: [PATCH 12/52] check in softmax multiclass

---
 demo/multiclass_classification/train.py |  7 +-
 python/xgboost_wrapper.cpp              | 38 +++++------
 src/learner/learner-inl.hpp             |  1 +
 src/learner/objective-inl.hpp           | 90 +++++++++++++++++++++++++
 src/learner/objective.h                 |  2 +
 5 files changed, 116 insertions(+), 22 deletions(-)

diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py
index fabc43c45..69214a6c8 100755
--- a/demo/multiclass_classification/train.py
+++ b/demo/multiclass_classification/train.py
@@ -42,8 +42,9 @@ print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] f
 # do the same thing again, but output probabilities
 param['objective'] = 'multi:softprob'
 bst = xgb.train(param, xg_train, num_round, watchlist );
-# get prediction, this is in 1D array, need reshape to (nclass, ndata)
-yprob = bst.predict( xg_test ).reshape( 6, test_Y.shape[0] )
-ylabel = np.argmax( yprob, axis=0)
+# Note: this convention has been changed since xgboost-unity
+# get prediction, this is in 1D array, need reshape to (ndata, nclass)
+yprob = bst.predict( xg_test ).reshape( test_Y.shape[0], 6 )
+ylabel = np.argmax(yprob, axis=1)
 
 print ('predicting, classification error=%f' % (sum( int(ylabel[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
diff --git a/python/xgboost_wrapper.cpp b/python/xgboost_wrapper.cpp
index 478d74936..8b89d1d25 100644
--- a/python/xgboost_wrapper.cpp
+++ b/python/xgboost_wrapper.cpp
@@ -32,7 +32,7 @@ class Booster: public learner::BoostLearner<FMatrixS> {
   inline void BoostOneIter(const DataMatrix &train,
                            float *grad, float *hess, size_t len) {
     this->gpair_.resize(len);
-    const unsigned ndata = static_cast<unsigned>(len);    
+    const unsigned ndata = static_cast<unsigned>(len);
     #pragma omp parallel for schedule(static)
     for (unsigned j = 0; j < ndata; ++j) {
       gpair_[j] = bst_gpair(grad[j], hess[j]);
@@ -42,7 +42,7 @@ class Booster: public learner::BoostLearner<FMatrixS> {
   inline void CheckInitModel(void) {
     if (!init_model) {
       this->InitModel(); init_model = true;
-    }  
+    }
   }
   inline void LoadModel(const char *fname) {
     learner::BoostLearner<FMatrixS>::LoadModel(fname);
@@ -50,7 +50,7 @@ class Booster: public learner::BoostLearner<FMatrixS> {
   }
   inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
     model_dump = this->DumpModel(fmap, with_stats);
-    model_dump_cptr.resize(model_dump.size()); 
+    model_dump_cptr.resize(model_dump.size());
     for (size_t i = 0; i < model_dump.size(); ++i) {
       model_dump_cptr[i] = model_dump[i].c_str();
     }
@@ -82,11 +82,11 @@ extern "C"{
                                size_t nindptr,
                                size_t nelem) {
     DMatrixSimple *p_mat = new DMatrixSimple();
-    DMatrixSimple &mat = *p_mat; 
+    DMatrixSimple &mat = *p_mat;
     mat.row_ptr_.resize(nindptr);
     memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
     mat.row_data_.resize(nelem);
-    for (size_t i = 0; i < nelem; ++ i) {
+    for (size_t i = 0; i < nelem; ++i) {
       mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
       mat.info.num_col = std::max(mat.info.num_col,
                                   static_cast<size_t>(indices[i]+1));
@@ -133,15 +133,15 @@ extern "C"{
     ret.info.num_row = len;
     ret.info.num_col = src.info.num_col;
 
-    utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();    
+    utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
     iter->BeforeFirst();
     utils::Assert(iter->Next(), "slice");
     const SparseBatch &batch = iter->Value();
-    for(size_t i = 0; i < len; ++i) {
+    for (size_t i = 0; i < len; ++i) {
       const int ridx = idxset[i];
       SparseBatch::Inst inst = batch[ridx];
       utils::Check(ridx < batch.size, "slice index exceed number of rows");
-      ret.row_data_.resize(ret.row_data_.size() + inst.length);      
+      ret.row_data_.resize(ret.row_data_.size() + inst.length);
       memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
              sizeof(SparseBatch::Entry) * inst.length);
       ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
@@ -160,9 +160,9 @@ extern "C"{
   void XGDMatrixFree(void *handle) {
     delete static_cast<DataMatrix*>(handle);
   }
-  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {    
+  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
     SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
-  }  
+  }
   void XGDMatrixSetLabel(void *handle, const float *label, size_t len) {
     DataMatrix *pmat = static_cast<DataMatrix*>(handle);
     pmat->info.labels.resize(len);
@@ -173,11 +173,11 @@ extern "C"{
     pmat->info.weights.resize(len);
     memcpy(&(pmat->info).weights[0], weight, sizeof(float) * len);
   }
-  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len){
+  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) {
     DataMatrix *pmat = static_cast<DataMatrix*>(handle);
     pmat->info.group_ptr.resize(len + 1);
     pmat->info.group_ptr[0] = 0;
-    for (size_t i = 0; i < len; ++ i) {
+    for (size_t i = 0; i < len; ++i) {
       pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
     }
   }
@@ -217,7 +217,7 @@ extern "C"{
     bst->CheckInit(dtr);
     bst->UpdateOneIter(iter, *dtr);
   }
-  void XGBoosterBoostOneIter(void *handle, void *dtrain, 
+  void XGBoosterBoostOneIter(void *handle, void *dtrain,
                              float *grad, float *hess, size_t len) {
     Booster *bst = static_cast<Booster*>(handle);
     DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
@@ -225,8 +225,9 @@ extern "C"{
     bst->CheckInit(dtr);
     bst->BoostOneIter(*dtr, grad, hess, len);
   }
-  const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], const char *evnames[], size_t len) {
-    Booster *bst = static_cast<Booster*>(handle);    
+  const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
+                                   const char *evnames[], size_t len) {
+    Booster *bst = static_cast<Booster*>(handle);
     std::vector<std::string> names;
     std::vector<const DataMatrix*> mats;
     for (size_t i = 0; i < len; ++i) {
@@ -243,13 +244,12 @@ extern "C"{
   void XGBoosterLoadModel(void *handle, const char *fname) {
     static_cast<Booster*>(handle)->LoadModel(fname);
   }
-  void XGBoosterSaveModel( const void *handle, const char *fname) {
+  void XGBoosterSaveModel(const void *handle, const char *fname) {
     static_cast<const Booster*>(handle)->SaveModel(fname);
   }
   const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
-    using namespace xgboost::utils;
-    FeatMap featmap; 
-    if(strlen(fmap) != 0) {
+    utils::FeatMap featmap;
+    if (strlen(fmap) != 0) {
       featmap.LoadText(fmap);
     }
     return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len);
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index d7ad3f71d..a183e904a 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -79,6 +79,7 @@ class BoostLearner {
     if (!strcmp(name, "silent")) silent = atoi(val);
     if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
     if (!strcmp("seed", name)) random::Seed(atoi(val));
+    if (!strcmp(name, "num_class")) this->SetParam("num_output_group", val);
     if (gbm_ == NULL) {
       if (!strcmp(name, "objective")) name_obj_ = val;
       if (!strcmp(name, "booster")) name_gbm_ = val;
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index d5cc97fcf..5f23e3b00 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -7,7 +7,9 @@
  */
 #include <vector>
 #include <cmath>
+#include "../data.h"
 #include "./objective.h"
+#include "./helper_utils.h"
 
 namespace xgboost {
 namespace learner {
@@ -133,6 +135,94 @@ class RegLossObj : public IObjFunction{
   float scale_pos_weight;
   LossType loss;
 };
+
+// softmax multi-class classification
+class SoftmaxMultiClassObj : public IObjFunction {
+ public:
+  explicit SoftmaxMultiClassObj(int output_prob)
+      : output_prob(output_prob) {
+    nclass = 0;
+  }
+  virtual ~SoftmaxMultiClassObj(void) {}
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strcmp( "num_class", name )) nclass = atoi(val);
+  }
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo &info,
+                           int iter,
+                           std::vector<bst_gpair> *out_gpair) {
+    utils::Check(nclass != 0, "must set num_class to use softmax");
+    utils::Check(preds.size() == static_cast<size_t>(nclass) * info.labels.size(),
+                 "SoftmaxMultiClassObj: label size and pred size does not match");
+    std::vector<bst_gpair> &gpair = *out_gpair;
+    gpair.resize(preds.size());
+    const unsigned ndata = static_cast<unsigned>(info.labels.size());
+    #pragma omp parallel
+    {
+      std::vector<float> rec(nclass);
+      #pragma omp for schedule(static)
+      for (unsigned j = 0; j < ndata; ++j) {
+        for (int k = 0; k < nclass; ++k) {
+          rec[k] = preds[j * nclass + k];
+        }
+        Softmax(&rec);
+        unsigned label = static_cast<unsigned>(info.labels[j]);
+        utils::Check(label < nclass, "SoftmaxMultiClassObj: label exceed num_class");
+        const float wt = info.GetWeight(j);
+        for (int k = 0; k < nclass; ++k) {
+          float p = rec[k];
+          const float h = 2.0f * p * (1.0f - p) * wt;
+          if (label == k) {
+            gpair[j * nclass + k] = bst_gpair((p - 1.0f) * wt, h);
+          } else {
+            gpair[j * nclass + k] = bst_gpair(p* wt, h);
+          }
+        }
+      }
+    }
+  }
+  virtual void PredTransform(std::vector<float> *io_preds) {
+    this->Transform(io_preds, output_prob);
+  }
+  virtual void EvalTransform(std::vector<float> *io_preds) {
+    this->Transform(io_preds, 0);
+  }
+  virtual const char* DefaultEvalMetric(void) {
+    return "merror";
+  }
+
+ private:
+  inline void Transform(std::vector<float> *io_preds, int prob) {
+    utils::Check(nclass != 0, "must set num_class to use softmax");
+    std::vector<float> &preds = *io_preds;
+    const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
+    #pragma omp parallel
+    {
+      std::vector<float> rec(nclass);
+      #pragma omp for schedule(static)
+      for (unsigned j = 0; j < ndata; ++j) {
+        for (int k = 0; k < nclass; ++k) {
+          rec[k] = preds[j * nclass + k];
+        }
+        if (prob == 0) {
+          preds[j] = FindMaxIndex(rec);
+        } else {
+          Softmax(&rec);
+          for (int k = 0; k < nclass; ++k) {
+            preds[j * nclass + k] = rec[k];
+          }
+        }
+      }
+    }
+    if (prob == 0) {
+      preds.resize(ndata);
+    }
+  }
+  // data field
+  int nclass;
+  int output_prob;
+};
+
 }  // namespace learner
 }  // namespace xgboost
 #endif  // XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
diff --git a/src/learner/objective.h b/src/learner/objective.h
index e38f7cfe4..bca035854 100644
--- a/src/learner/objective.h
+++ b/src/learner/objective.h
@@ -71,6 +71,8 @@ inline IObjFunction* CreateObjFunction(const char *name) {
   if (!strcmp("reg:logistic", name)) return new RegLossObj(LossType::kLogisticNeglik);
   if (!strcmp("binary:logistic", name)) return new RegLossObj(LossType::kLogisticClassify);
   if (!strcmp("binary:logitraw", name)) return new RegLossObj(LossType::kLogisticRaw);
+  if (!strcmp("multi:softmax", name)) return new SoftmaxMultiClassObj(0);
+  if (!strcmp("multi:softprob", name)) return new SoftmaxMultiClassObj(1);
   utils::Error("unknown objective function type: %s", name);
   return NULL;
 }

From 5a472145de531692916c2ba937c444fa05bebf0b Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 17 Aug 2014 20:32:02 -0700
Subject: [PATCH 13/52] check in rank loss

---
 src/learner/evaluation-inl.hpp |   4 +-
 src/learner/objective-inl.hpp  | 294 +++++++++++++++++++++++++++++++++
 src/learner/objective.h        |   3 +
 3 files changed, 300 insertions(+), 1 deletion(-)

diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index 184197d45..43fe48726 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -216,7 +216,9 @@ struct EvalRankList : public IEvaluator {
                      const MetaInfo &info) const {
     utils::Check(preds.size() == info.labels.size(),
                   "label size predict size not match");
-    const std::vector<unsigned> &gptr = info.group_ptr;
+    // quick consistency when group is not available
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
+    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
     utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
     utils::Assert(gptr.back() == preds.size(),
                    "EvalRanklist: group structure must match number of prediction");
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index 5f23e3b00..137349975 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -7,9 +7,13 @@
  */
 #include <vector>
 #include <cmath>
+#include <algorithm>
+#include <functional>
 #include "../data.h"
 #include "./objective.h"
 #include "./helper_utils.h"
+#include "../utils/random.h"
+#include "../utils/omp.h"
 
 namespace xgboost {
 namespace learner {
@@ -223,6 +227,296 @@ class SoftmaxMultiClassObj : public IObjFunction {
   int output_prob;
 };
 
+/*! \brief objective for lambda rank */
+class LambdaRankObj : public IObjFunction {
+ public:
+  LambdaRankObj(void) {
+    loss.loss_type = LossType::kLogisticRaw;
+    fix_list_weight = 0.0f;
+    num_pairsample = 1;
+  }
+  virtual ~LambdaRankObj(void) {}
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strcmp( "loss_type", name )) loss.loss_type = atoi(val);
+    if (!strcmp( "fix_list_weight", name)) fix_list_weight = static_cast<float>(atof(val));
+    if (!strcmp( "num_pairsample", name)) num_pairsample = atoi(val);
+  }
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo &info,
+                           int iter,
+                           std::vector<bst_gpair> *out_gpair) {
+    utils::Assert(preds.size() == info.labels.size(), "label size predict size not match");
+    std::vector<bst_gpair> &gpair = *out_gpair;
+    gpair.resize(preds.size());
+    // quick consistency when group is not available
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
+    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
+    utils::Check(gptr.size() != 0 && gptr.back() == preds.size(),
+                 "group structure not consistent with #rows");
+    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    
+    #pragma omp parallel
+    {
+      // parall construct, declare random number generator here, so that each 
+      // thread use its own random number generator, seed by thread id and current iteration
+      random::Random rnd; rnd.Seed(iter* 1111 + omp_get_thread_num());
+      std::vector<LambdaPair> pairs;
+      std::vector<ListEntry>  lst;
+      std::vector< std::pair<float,unsigned> > rec;
+      
+      #pragma omp for schedule(static)
+      for (unsigned k = 0; k < ngroup; ++k) {
+        lst.clear(); pairs.clear(); 
+        for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
+          lst.push_back(ListEntry(preds[j], info.labels[j], j));
+          gpair[j] = bst_gpair(0.0f, 0.0f);
+        }
+        std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
+        rec.resize(lst.size());
+        for (unsigned i = 0; i < lst.size(); ++i) {
+          rec[i] = std::make_pair(lst[i].label, i);
+        }
+        std::sort(rec.begin(), rec.end(), CmpFirst);
+        // enumerate buckets with same label, for each item in the lst, grab another sample randomly
+        for (unsigned i = 0; i < rec.size(); ) {
+          unsigned j = i + 1;
+          while (j < rec.size() && rec[j].first == rec[i].first) ++j;
+          // bucket in [i,j), get a sample outside bucket
+          unsigned nleft = i, nright = rec.size() - j;
+          if (nleft + nright != 0) {
+            int nsample = num_pairsample;
+            while (nsample --) {
+              for (unsigned pid = i; pid < j; ++pid) {
+                unsigned ridx = static_cast<unsigned>(rnd.RandDouble() * (nleft+nright));
+                if (ridx < nleft) {
+                  pairs.push_back(LambdaPair(rec[ridx].second, rec[pid].second));
+                } else {
+                  pairs.push_back(LambdaPair(rec[pid].second, rec[ridx+j-i].second));
+                }
+              }
+            }
+          }
+          i = j;
+        }
+        // get lambda weight for the pairs
+        this->GetLambdaWeight(lst, &pairs);
+        // rescale each gradient and hessian so that the lst have constant weighted
+        float scale = 1.0f / num_pairsample;
+        if (fix_list_weight != 0.0f) {
+          scale *= fix_list_weight / (gptr[k+1] - gptr[k]);
+        }
+        for (size_t i = 0; i < pairs.size(); ++i) {
+          const ListEntry &pos = lst[pairs[i].pos_index];
+          const ListEntry &neg = lst[pairs[i].neg_index];
+          const float w = pairs[i].weight * scale;
+          float p = loss.PredTransform(pos.pred - neg.pred);
+          float g = loss.FirstOrderGradient(p, 1.0f);
+          float h = loss.SecondOrderGradient(p, 1.0f);
+          // accumulate gradient and hessian in both pid, and nid
+          gpair[pos.rindex].grad += g * w; 
+          gpair[pos.rindex].hess += 2.0f * h; 
+          gpair[neg.rindex].grad -= g * w;
+          gpair[neg.rindex].hess += 2.0f * h;
+        }
+      }
+    }
+  }
+  virtual const char* DefaultEvalMetric(void) {
+    return "map";
+  }
+
+ protected:
+  /*! \brief helper information in a list */
+  struct ListEntry {
+    /*! \brief the predict score we in the data */
+    float pred;
+    /*! \brief the actual label of the entry */
+    float label;
+    /*! \brief row index in the data matrix */           
+    unsigned rindex;
+    // constructor
+    ListEntry(float pred, float label, unsigned rindex)
+        : pred(pred), label(label), rindex(rindex) {}
+    // comparator by prediction
+    inline static bool CmpPred(const ListEntry &a, const ListEntry &b) {
+      return a.pred > b.pred;
+    }
+    // comparator by label
+    inline static bool CmpLabel(const ListEntry &a, const ListEntry &b) {
+      return a.label > b.label;
+    }
+  };
+  /*! \brief a pair in the lambda rank */
+  struct LambdaPair {
+    /*! \brief positive index: this is a position in the list */
+    unsigned pos_index;
+    /*! \brief negative index: this is a position in the list */
+    unsigned neg_index;
+    /*! \brief weight to be filled in */
+    float weight;
+    // constructor
+    LambdaPair(unsigned pos_index, unsigned neg_index)
+        : pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
+  };
+  /*!
+   * \brief get lambda weight for existing pairs 
+   * \param list a list that is sorted by pred score
+   * \param io_pairs record of pairs, containing the pairs to fill in weights
+   */
+  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
+                               std::vector<LambdaPair> *io_pairs) = 0;
+
+ private:
+  // loss function
+  LossType loss;
+  // number of samples peformed for each instance
+  int num_pairsample;            
+  // fix weight of each elements in list
+  float fix_list_weight;
+};
+
+class PairwiseRankObj: public LambdaRankObj{
+ public:
+  virtual ~PairwiseRankObj(void){}
+
+ protected:
+  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
+                               std::vector<LambdaPair> *io_pairs) {}
+};
+
+// beta version: NDCG lambda rank
+class LambdaRankObjNDCG : public LambdaRankObj {
+ public:
+  virtual ~LambdaRankObjNDCG(void) {}
+
+ protected:
+  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
+                               std::vector<LambdaPair> *io_pairs) {
+    std::vector<LambdaPair> &pairs = *io_pairs;
+    float IDCG;
+    {
+      std::vector<float> labels(sorted_list.size());
+      for (size_t i = 0; i < sorted_list.size(); ++i) {
+        labels[i] = sorted_list[i].label;
+      }
+      std::sort(labels.begin(), labels.end(), std::greater<float>());
+      IDCG = CalcDCG(labels);
+    }
+    
+    if (IDCG == 0.0) {
+      for (size_t i = 0; i < pairs.size(); ++i) {
+        pairs[i].weight = 0.0f;
+      }
+    } else {
+      IDCG = 1.0f / IDCG;
+      for (size_t i = 0; i < pairs.size(); ++i) {
+        unsigned pos_idx = pairs[i].pos_index;
+        unsigned neg_idx = pairs[i].neg_index;
+        float pos_loginv = 1.0f / logf(pos_idx+2.0f);
+        float neg_loginv = 1.0f / logf(neg_idx+2.0f);
+        int pos_label = static_cast<int>(sorted_list[pos_idx].label);
+        int neg_label = static_cast<int>(sorted_list[neg_idx].label);
+        float original = ((1<<pos_label)-1) * pos_loginv + ((1<<neg_label)-1) * neg_loginv;
+        float changed  = ((1<<neg_label)-1) * pos_loginv + ((1<<pos_label)-1) * neg_loginv;
+        float delta = (original-changed) * IDCG;
+        if (delta < 0.0f) delta = - delta;
+        pairs[i].weight = delta;
+      }
+    }
+  }
+  inline static float CalcDCG(const std::vector<float> &labels) {
+    double sumdcg = 0.0;
+    for (size_t i = 0; i < labels.size(); ++i) {
+      const unsigned rel = labels[i];
+      if (rel != 0) { 
+        sumdcg += ((1<<rel)-1) / logf(i + 2);
+      }
+    }
+    return static_cast<float>(sumdcg);
+  }
+};
+
+class LambdaRankObjMAP : public LambdaRankObj {  
+ public:
+  virtual ~LambdaRankObjMAP(void) {}
+
+ protected:
+  struct MAPStats {
+    /* \brief the accumulated precision */
+    float ap_acc;
+    /* \brief the accumulated precision assuming a positive instance is missing */
+    float ap_acc_miss;
+    /* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/
+    float ap_acc_add;
+    /* \brief the accumulated positive instance count */
+    float hits;
+    MAPStats(void) {}
+    MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits)
+        : ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {}
+  };
+  /*
+   * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
+   *        in sorted triples
+   * \param sorted_list the list containing entry information
+   * \param index1,index2 the instances switched
+   * \param map_stats a vector containing the accumulated precisions for each position in a list
+   */
+  inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
+                            int index1, int index2,
+                            std::vector<MAPStats> &map_stats){
+    if (index1 == index2 || map_stats[map_stats.size() - 1].hits == 0) {
+      return 0.0f;
+    }
+    if (index1 > index2) std::swap(index1, index2);
+    float original = map_stats[index2].ap_acc;
+    if (index1 != 0) original -= map_stats[index1 - 1].ap_acc;
+    float changed = 0;
+    float label1 = sorted_list[index1].label > 0.0f ? 1.0f : 0.0f;
+    float label2 = sorted_list[index2].label > 0.0f ? 1.0f : 0.0f;
+    if (label1 == label2) {
+      return 0.0;
+    } else if (label1 < label2) {
+      changed += map_stats[index2 - 1].ap_acc_add - map_stats[index1].ap_acc_add;
+      changed += (map_stats[index1].hits + 1.0f) / (index1 + 1);
+    } else {
+      changed += map_stats[index2 - 1].ap_acc_miss - map_stats[index1].ap_acc_miss;
+      changed += map_stats[index2].hits / (index2 + 1);
+    }
+    
+    float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits);
+    if (ans < 0) ans = -ans;
+    return ans;
+  }  
+  /*
+   * \brief obtain preprocessing results for calculating delta MAP
+   * \param sorted_list the list containing entry information
+   * \param map_stats a vector containing the accumulated precisions for each position in a list
+   */
+  inline void GetMAPStats(const std::vector<ListEntry> &sorted_list,
+                          std::vector<MAPStats> &map_acc){
+    map_acc.resize(sorted_list.size());
+    float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
+    for (size_t i = 1; i <= sorted_list.size(); ++i) {
+      if (sorted_list[i - 1].label > 0.0f) {
+        hit++;
+        acc1 += hit / i;
+        acc2 += (hit - 1) / i;
+        acc3 += (hit + 1) / i;
+      }
+      map_acc[i - 1] = MAPStats(acc1,acc2,acc3,hit);
+    }
+  }  
+  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> *io_pairs) {
+    std::vector<LambdaPair> &pairs = *io_pairs;
+    std::vector<MAPStats> map_stats;
+    GetMAPStats(sorted_list, map_stats);
+    for (size_t i = 0; i < pairs.size(); ++i) {
+      pairs[i].weight = 
+          GetLambdaMAP(sorted_list, pairs[i].pos_index, pairs[i].neg_index, map_stats);
+    }
+  }
+};
+
 }  // namespace learner
 }  // namespace xgboost
 #endif  // XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
diff --git a/src/learner/objective.h b/src/learner/objective.h
index bca035854..ff870c034 100644
--- a/src/learner/objective.h
+++ b/src/learner/objective.h
@@ -73,6 +73,9 @@ inline IObjFunction* CreateObjFunction(const char *name) {
   if (!strcmp("binary:logitraw", name)) return new RegLossObj(LossType::kLogisticRaw);
   if (!strcmp("multi:softmax", name)) return new SoftmaxMultiClassObj(0);
   if (!strcmp("multi:softprob", name)) return new SoftmaxMultiClassObj(1);
+  if (!strcmp("rank:pairwise", name )) return new PairwiseRankObj();
+  if (!strcmp("rank:ndcg", name)) return new LambdaRankObjNDCG();
+  if (!strcmp("rank:map", name)) return new LambdaRankObjMAP();  
   utils::Error("unknown objective function type: %s", name);
   return NULL;
 }

From 4ed4b0814650707b3dfed648a5814b059a6f8cac Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 17 Aug 2014 20:47:20 -0700
Subject: [PATCH 14/52] ok

---
 src/io/io.cpp                 |  2 +-
 src/io/io.h                   |  2 +-
 src/learner/learner-inl.hpp   |  2 +-
 src/learner/objective-inl.hpp | 75 ++++++++++++++++++++---------------
 4 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/src/io/io.cpp b/src/io/io.cpp
index 7689a4560..a3ea457ed 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -15,7 +15,7 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
 }
 
 void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
-  if (dmat.magic == DMatrixSimple::kMagic){
+  if (dmat.magic == DMatrixSimple::kMagic) {
     const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
     p_dmat->SaveBinary(fname, silent);
   } else {
diff --git a/src/io/io.h b/src/io/io.h
index 211893509..8674a8c32 100644
--- a/src/io/io.h
+++ b/src/io/io.h
@@ -30,7 +30,7 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent = false, bool savebuff
  * \param fname file name to be savd
  * \param silent whether print message during saving
  */
-void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false);  
+void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false);
 
 }  // namespace io
 }  // namespace xgboost
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index a183e904a..fe6f1aa43 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -203,7 +203,7 @@ class BoostLearner {
   inline std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
     return gbm_->DumpModel(fmap, option);
   }
-  
+
  protected:
   /*! 
    * \brief initialize the objective function and GBM, 
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index 137349975..29e6d3393 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -8,6 +8,7 @@
 #include <vector>
 #include <cmath>
 #include <algorithm>
+#include <utility>
 #include <functional>
 #include "../data.h"
 #include "./objective.h"
@@ -254,19 +255,17 @@ class LambdaRankObj : public IObjFunction {
     utils::Check(gptr.size() != 0 && gptr.back() == preds.size(),
                  "group structure not consistent with #rows");
     const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
-    
     #pragma omp parallel
     {
-      // parall construct, declare random number generator here, so that each 
+      // parall construct, declare random number generator here, so that each
       // thread use its own random number generator, seed by thread id and current iteration
       random::Random rnd; rnd.Seed(iter* 1111 + omp_get_thread_num());
       std::vector<LambdaPair> pairs;
       std::vector<ListEntry>  lst;
-      std::vector< std::pair<float,unsigned> > rec;
-      
+      std::vector< std::pair<float, unsigned> > rec;
       #pragma omp for schedule(static)
       for (unsigned k = 0; k < ngroup; ++k) {
-        lst.clear(); pairs.clear(); 
+        lst.clear(); pairs.clear();
         for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
           lst.push_back(ListEntry(preds[j], info.labels[j], j));
           gpair[j] = bst_gpair(0.0f, 0.0f);
@@ -313,8 +312,8 @@ class LambdaRankObj : public IObjFunction {
           float g = loss.FirstOrderGradient(p, 1.0f);
           float h = loss.SecondOrderGradient(p, 1.0f);
           // accumulate gradient and hessian in both pid, and nid
-          gpair[pos.rindex].grad += g * w; 
-          gpair[pos.rindex].hess += 2.0f * h; 
+          gpair[pos.rindex].grad += g * w;
+          gpair[pos.rindex].hess += 2.0f * h;
           gpair[neg.rindex].grad -= g * w;
           gpair[neg.rindex].hess += 2.0f * h;
         }
@@ -332,7 +331,7 @@ class LambdaRankObj : public IObjFunction {
     float pred;
     /*! \brief the actual label of the entry */
     float label;
-    /*! \brief row index in the data matrix */           
+    /*! \brief row index in the data matrix */
     unsigned rindex;
     // constructor
     ListEntry(float pred, float label, unsigned rindex)
@@ -370,14 +369,14 @@ class LambdaRankObj : public IObjFunction {
   // loss function
   LossType loss;
   // number of samples peformed for each instance
-  int num_pairsample;            
+  int num_pairsample;
   // fix weight of each elements in list
   float fix_list_weight;
 };
 
 class PairwiseRankObj: public LambdaRankObj{
  public:
-  virtual ~PairwiseRankObj(void){}
+  virtual ~PairwiseRankObj(void) {}
 
  protected:
   virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
@@ -402,7 +401,6 @@ class LambdaRankObjNDCG : public LambdaRankObj {
       std::sort(labels.begin(), labels.end(), std::greater<float>());
       IDCG = CalcDCG(labels);
     }
-    
     if (IDCG == 0.0) {
       for (size_t i = 0; i < pairs.size(); ++i) {
         pairs[i].weight = 0.0f;
@@ -412,13 +410,15 @@ class LambdaRankObjNDCG : public LambdaRankObj {
       for (size_t i = 0; i < pairs.size(); ++i) {
         unsigned pos_idx = pairs[i].pos_index;
         unsigned neg_idx = pairs[i].neg_index;
-        float pos_loginv = 1.0f / logf(pos_idx+2.0f);
-        float neg_loginv = 1.0f / logf(neg_idx+2.0f);
+        float pos_loginv = 1.0f / logf(pos_idx + 2.0f);
+        float neg_loginv = 1.0f / logf(neg_idx + 2.0f);
         int pos_label = static_cast<int>(sorted_list[pos_idx].label);
         int neg_label = static_cast<int>(sorted_list[neg_idx].label);
-        float original = ((1<<pos_label)-1) * pos_loginv + ((1<<neg_label)-1) * neg_loginv;
-        float changed  = ((1<<neg_label)-1) * pos_loginv + ((1<<pos_label)-1) * neg_loginv;
-        float delta = (original-changed) * IDCG;
+        float original =
+            ((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv;
+        float changed  =
+            ((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
+        float delta = (original - changed) * IDCG;
         if (delta < 0.0f) delta = - delta;
         pairs[i].weight = delta;
       }
@@ -428,25 +428,31 @@ class LambdaRankObjNDCG : public LambdaRankObj {
     double sumdcg = 0.0;
     for (size_t i = 0; i < labels.size(); ++i) {
       const unsigned rel = labels[i];
-      if (rel != 0) { 
-        sumdcg += ((1<<rel)-1) / logf(i + 2);
+      if (rel != 0) {
+        sumdcg += ((1 << rel) - 1) / logf(i + 2);
       }
     }
     return static_cast<float>(sumdcg);
   }
 };
 
-class LambdaRankObjMAP : public LambdaRankObj {  
+class LambdaRankObjMAP : public LambdaRankObj {
  public:
   virtual ~LambdaRankObjMAP(void) {}
 
  protected:
   struct MAPStats {
-    /* \brief the accumulated precision */
+    /*! \brief the accumulated precision */
     float ap_acc;
-    /* \brief the accumulated precision assuming a positive instance is missing */
+    /*!
+     * \brief the accumulated precision,
+     *   assuming a positive instance is missing 
+     */
     float ap_acc_miss;
-    /* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/
+    /*! 
+     * \brief the accumulated precision,
+     * assuming that one more positive instance is inserted ahead
+     */
     float ap_acc_add;
     /* \brief the accumulated positive instance count */
     float hits;
@@ -454,7 +460,7 @@ class LambdaRankObjMAP : public LambdaRankObj {
     MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits)
         : ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {}
   };
-  /*
+  /*!
    * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
    *        in sorted triples
    * \param sorted_list the list containing entry information
@@ -463,7 +469,8 @@ class LambdaRankObjMAP : public LambdaRankObj {
    */
   inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
                             int index1, int index2,
-                            std::vector<MAPStats> &map_stats){
+                            std::vector<MAPStats> *p_map_stats) {
+    std::vector<MAPStats> &map_stats = *p_map_stats;
     if (index1 == index2 || map_stats[map_stats.size() - 1].hits == 0) {
       return 0.0f;
     }
@@ -482,18 +489,18 @@ class LambdaRankObjMAP : public LambdaRankObj {
       changed += map_stats[index2 - 1].ap_acc_miss - map_stats[index1].ap_acc_miss;
       changed += map_stats[index2].hits / (index2 + 1);
     }
-    
     float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits);
     if (ans < 0) ans = -ans;
     return ans;
-  }  
+  }
   /*
    * \brief obtain preprocessing results for calculating delta MAP
    * \param sorted_list the list containing entry information
    * \param map_stats a vector containing the accumulated precisions for each position in a list
    */
   inline void GetMAPStats(const std::vector<ListEntry> &sorted_list,
-                          std::vector<MAPStats> &map_acc){
+                          std::vector<MAPStats> *p_map_acc) {
+    std::vector<MAPStats> &map_acc = *p_map_acc;
     map_acc.resize(sorted_list.size());
     float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
     for (size_t i = 1; i <= sorted_list.size(); ++i) {
@@ -503,16 +510,18 @@ class LambdaRankObjMAP : public LambdaRankObj {
         acc2 += (hit - 1) / i;
         acc3 += (hit + 1) / i;
       }
-      map_acc[i - 1] = MAPStats(acc1,acc2,acc3,hit);
+      map_acc[i - 1] = MAPStats(acc1, acc2, acc3, hit);
     }
-  }  
-  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> *io_pairs) {
+  }
+  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
+                               std::vector<LambdaPair> *io_pairs) {
     std::vector<LambdaPair> &pairs = *io_pairs;
     std::vector<MAPStats> map_stats;
-    GetMAPStats(sorted_list, map_stats);
+    GetMAPStats(sorted_list, &map_stats);
     for (size_t i = 0; i < pairs.size(); ++i) {
-      pairs[i].weight = 
-          GetLambdaMAP(sorted_list, pairs[i].pos_index, pairs[i].neg_index, map_stats);
+      pairs[i].weight =
+          GetLambdaMAP(sorted_list, pairs[i].pos_index,
+                       pairs[i].neg_index, &map_stats);
     }
   }
 };

From 0d9a8c042c35e4963cac08964c17d1ddae089b34 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 17 Aug 2014 22:49:36 -0700
Subject: [PATCH 15/52] make xgcombine buffer work

---
 Makefile                      |   6 +-
 python/xgboost_wrapper.cpp    |   2 +-
 src/learner/objective-inl.hpp |   2 +-
 tools/Makefile                |   1 -
 tools/xgcombine_buffer.cpp    | 401 +++++++++++++++++-----------------
 5 files changed, 205 insertions(+), 207 deletions(-)

diff --git a/Makefile b/Makefile
index 8c9980ac1..83a927e8c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
-export CC  = clang
-export CXX = clang++
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas 
+export CC  = gcc
+export CXX = g++
+export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fopenmp
 
 # specify tensor path
 BIN = xgboost
diff --git a/python/xgboost_wrapper.cpp b/python/xgboost_wrapper.cpp
index 8b89d1d25..edda96c29 100644
--- a/python/xgboost_wrapper.cpp
+++ b/python/xgboost_wrapper.cpp
@@ -140,7 +140,7 @@ extern "C"{
     for (size_t i = 0; i < len; ++i) {
       const int ridx = idxset[i];
       SparseBatch::Inst inst = batch[ridx];
-      utils::Check(ridx < batch.size, "slice index exceed number of rows");
+      utils::Check(static_cast<size_t>(ridx) < batch.size, "slice index exceed number of rows");
       ret.row_data_.resize(ret.row_data_.size() + inst.length);
       memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
              sizeof(SparseBatch::Entry) * inst.length);
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index 29e6d3393..163d0d283 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -171,7 +171,7 @@ class SoftmaxMultiClassObj : public IObjFunction {
           rec[k] = preds[j * nclass + k];
         }
         Softmax(&rec);
-        unsigned label = static_cast<unsigned>(info.labels[j]);
+        int label = static_cast<int>(info.labels[j]);
         utils::Check(label < nclass, "SoftmaxMultiClassObj: label exceed num_class");
         const float wt = info.GetWeight(j);
         for (int k = 0; k < nclass; ++k) {
diff --git a/tools/Makefile b/tools/Makefile
index fec764b5a..b35277aff 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -12,7 +12,6 @@ export LDFLAGS= -pthread -lm
 
 xgcombine_buffer : xgcombine_buffer.cpp
 
-
 $(BIN) : 
 	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
 
diff --git a/tools/xgcombine_buffer.cpp b/tools/xgcombine_buffer.cpp
index 16bc12894..84bc996a4 100644
--- a/tools/xgcombine_buffer.cpp
+++ b/tools/xgcombine_buffer.cpp
@@ -10,239 +10,238 @@
 #include <cstring>
 #include <ctime>
 #include <cmath>
-#include "../regrank/xgboost_regrank_data.h"
-#include "../utils/xgboost_utils.h"
+#include "../src/io/simple_dmatrix-inl.hpp"
+#include "../src/utils/utils.h"
 
 using namespace xgboost;
-using namespace xgboost::booster;
-using namespace xgboost::regrank;
+using namespace xgboost::io;
 
 // header in dataset
 struct Header{
-    FILE *fi;
-    int   tmp_num;
-    int   base;
-    int   num_feat;
-    // whether it's dense format
-    bool  is_dense;
-	bool  warned;
-    
-	Header( void ){ this->warned = false; this->is_dense = false; }
-
-	inline void CheckBase( unsigned findex ){
-		if( findex >= (unsigned)num_feat && ! warned ) {
-			fprintf( stderr, "warning:some feature exceed bound, num_feat=%d\n", num_feat );
-			warned = true;
-		}
-	}
+  FILE *fi;
+  int   tmp_num;
+  int   base;
+  int   num_feat;
+  // whether it's dense format
+  bool  is_dense;
+  bool  warned;
+  
+  Header( void ){ this->warned = false; this->is_dense = false; }
+  
+  inline void CheckBase( unsigned findex ){
+    if( findex >= (unsigned)num_feat && ! warned ) {
+      fprintf( stderr, "warning:some feature exceed bound, num_feat=%d\n", num_feat );
+      warned = true;
+    }
+  }
 };
 
 
 inline int norm( std::vector<Header> &vec, int base = 0 ){
-    int n = base;
-    for( size_t i = 0; i < vec.size(); i ++ ){
-        if( vec[i].is_dense ) vec[i].num_feat = 1;
-        vec[i].base = n; n += vec[i].num_feat;
-    }
-    return n;        
+  int n = base;
+  for( size_t i = 0; i < vec.size(); i ++ ){
+    if( vec[i].is_dense ) vec[i].num_feat = 1;
+    vec[i].base = n; n += vec[i].num_feat;
+  }
+  return n;        
 }
 
 inline void vclose( std::vector<Header> &vec ){
-    for( size_t i = 0; i < vec.size(); i ++ ){
-        fclose( vec[i].fi );
-    }
+  for( size_t i = 0; i < vec.size(); i ++ ){
+    fclose( vec[i].fi );
+  }
 }
 
 inline int readnum( std::vector<Header> &vec ){
-    int n = 0;
-    for( size_t i = 0; i < vec.size(); i ++ ){
-        if( !vec[i].is_dense ){
-            utils::Assert( fscanf( vec[i].fi, "%d", &vec[i].tmp_num ) == 1, "load num" );
-            n += vec[i].tmp_num;
-        }else{
-            n ++;
-        }
+  int n = 0;
+  for( size_t i = 0; i < vec.size(); i ++ ){
+    if( !vec[i].is_dense ){
+      utils::Assert( fscanf( vec[i].fi, "%d", &vec[i].tmp_num ) == 1, "load num" );
+      n += vec[i].tmp_num;
+    }else{
+      n ++;
     }
-    return n;        
+  }
+  return n;        
 }
 
 inline void vskip( std::vector<Header> &vec ){
-    for( size_t i = 0; i < vec.size(); i ++ ){
-        if( !vec[i].is_dense ){
-            utils::Assert( fscanf( vec[i].fi, "%*d%*[^\n]\n" ) >= 0 );
-        }else{
-            utils::Assert( fscanf( vec[i].fi, "%*f\n" ) >= 0 );
-        }
+  for( size_t i = 0; i < vec.size(); i ++ ){
+    if( !vec[i].is_dense ){
+      utils::Assert( fscanf( vec[i].fi, "%*d%*[^\n]\n" ) >= 0, "sparse" );
+    }else{
+      utils::Assert( fscanf( vec[i].fi, "%*f\n" ) >= 0, "dense" );
     }
+  }
 }
 
-class DataLoader: public DMatrix{
-public:
-    // whether to do node and edge feature renormalization
-    int rescale;
-    int linelimit;
-public:
-    FILE *fp, *fwlist, *fgroup, *fweight;
-    std::vector<Header> fheader;
-    std::vector<FMatrixS::REntry> entry;
-    DataLoader( void ){
-        rescale = 0; 
-        linelimit = -1;
-        fp = NULL; fwlist = NULL; fgroup = NULL; fweight = NULL;
+class DataLoader: public DMatrixSimple {
+ public:
+  // whether to do node and edge feature renormalization
+  int rescale;
+  int linelimit;
+ public:
+  FILE *fp, *fwlist, *fgroup, *fweight;
+  std::vector<Header> fheader;
+  DataLoader( void ){
+    rescale = 0; 
+    linelimit = -1;
+    fp = NULL; fwlist = NULL; fgroup = NULL; fweight = NULL;
+  }
+ private:
+  inline void Load( std::vector<SparseBatch::Entry> &feats, std::vector<Header> &vec ){
+    SparseBatch::Entry e;
+    for( size_t i = 0; i < vec.size(); i ++ ){
+      if( !vec[i].is_dense ) { 
+        for( int j = 0; j < vec[i].tmp_num; j ++ ){
+          utils::Assert( fscanf ( vec[i].fi, "%u:%f", &e.findex, &e.fvalue ) == 2, "Error when load feat" );  
+          vec[i].CheckBase( e.findex );
+          e.findex += vec[i].base;
+          feats.push_back(e);
+        }
+      }else{
+        utils::Assert( fscanf ( vec[i].fi, "%f", &e.fvalue ) == 1, "load feat" );  
+        e.findex = vec[i].base;
+        feats.push_back(e);
+      }
     }
-private:
-    inline void Load( std::vector<unsigned> &findex, std::vector<float> &fvalue, std::vector<Header> &vec ){
-        unsigned fidx; float fv;
-        for( size_t i = 0; i < vec.size(); i ++ ){
-            if( !vec[i].is_dense ) { 
-                for( int j = 0; j < vec[i].tmp_num; j ++ ){
-                    utils::Assert( fscanf ( vec[i].fi, "%u:%f", &fidx, &fv ) == 2, "Error when load feat" );  
-                    vec[i].CheckBase( fidx );
-                    fidx += vec[i].base;
-                    findex.push_back( fidx ); fvalue.push_back( fv );
-                }
-            }else{
-                utils::Assert( fscanf ( vec[i].fi, "%f", &fv ) == 1, "load feat" );  
-                fidx = vec[i].base;
-                findex.push_back( fidx ); fvalue.push_back( fv );
-            }
-        }
+  }
+  inline void DoRescale( std::vector<SparseBatch::Entry> &vec ){
+    double sum = 0.0;
+    for( size_t i = 0; i < vec.size(); i ++ ){
+      sum += vec[i].fvalue * vec[i].fvalue;
+    } 
+    sum = sqrt( sum );
+    for( size_t i = 0; i < vec.size(); i ++ ){
+      vec[i].fvalue /= sum;
+    } 
+  }    
+ public:    
+  // basically we are loading all the data inside
+  inline void Load( void ){
+    this->Clear();
+    float label, weight = 0.0f;
+    
+    unsigned ngleft = 0, ngacc = 0;
+    if( fgroup != NULL ){
+      info.group_ptr.clear(); 
+      info.group_ptr.push_back(0);
     }
-    inline void DoRescale( std::vector<float> &vec ){
-        double sum = 0.0;
-        for( size_t i = 0; i < vec.size(); i ++ ){
-            sum += vec[i] * vec[i];
-        } 
-        sum = sqrt( sum );
-        for( size_t i = 0; i < vec.size(); i ++ ){
-            vec[i] /= sum;
-        } 
-    }    
-public:    
-    // basically we are loading all the data inside
-    inline void Load( void ){
-        this->data.Clear();
-        float label, weight = 0.0f;
-
-        unsigned ngleft = 0, ngacc = 0;
-        if( fgroup != NULL ){
-            info.group_ptr.clear(); 
-            info.group_ptr.push_back(0);
+    
+    while( fscanf( fp, "%f", &label ) == 1 ){            
+      if( ngleft == 0 && fgroup != NULL ){
+        utils::Assert( fscanf( fgroup, "%u", &ngleft ) == 1, "group" );
+      }
+      if( fweight != NULL ){
+        utils::Assert( fscanf( fweight, "%f", &weight ) == 1, "weight" );
+      }
+      
+      ngleft -= 1; ngacc += 1;
+      
+      int pass = 1;
+      if( fwlist != NULL ){
+        utils::Assert( fscanf( fwlist, "%u", &pass ) ==1, "pass" );
+      }
+      if( pass == 0 ){
+        vskip( fheader ); ngacc -= 1;
+      }else{            
+        const int nfeat = readnum( fheader );
+        
+        std::vector<SparseBatch::Entry> feats;
+        
+        // pairs 
+        this->Load( feats, fheader );
+        utils::Assert( feats.size() == (unsigned)nfeat, "nfeat" );
+        if( rescale != 0 ) this->DoRescale( feats );
+        // push back data :)
+        this->info.labels.push_back( label );
+        // push back weight if any
+        if( fweight != NULL ){
+          this->info.weights.push_back( weight );                    
         }
-
-        while( fscanf( fp, "%f", &label ) == 1 ){            
-            if( ngleft == 0 && fgroup != NULL ){
-                utils::Assert( fscanf( fgroup, "%u", &ngleft ) == 1 );
-            }
-            if( fweight != NULL ){
-                utils::Assert( fscanf( fweight, "%f", &weight ) == 1 );
-            }
-            
-            ngleft -= 1; ngacc += 1;
-
-            int pass = 1;
-            if( fwlist != NULL ){
-                utils::Assert( fscanf( fwlist, "%u", &pass ) ==1 );
-            }
-            if( pass == 0 ){
-                vskip( fheader ); ngacc -= 1;
-            }else{            
-                const int nfeat = readnum( fheader );
-                std::vector<unsigned> findex;
-                std::vector<float> fvalue;
-                // pairs 
-                this->Load( findex, fvalue, fheader );
-                utils::Assert( findex.size() == (unsigned)nfeat );
-                if( rescale != 0 ) this->DoRescale( fvalue );
-                // push back data :)
-                this->info.labels.push_back( label );
-                // push back weight if any
-                if( fweight != NULL ){
-                    this->info.weights.push_back( weight );                    
-                }
-                this->data.AddRow( findex, fvalue );
-            }             
-            if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){
-                info.group_ptr.push_back( info.group_ptr.back() + ngacc );
-                utils::Assert( info.group_ptr.back() == data.NumRow(), "group size must match num rows" );
-                ngacc = 0;
-            }
-            // linelimit
-            if( linelimit >= 0 ) {
-                if( -- linelimit <= 0 ) break;
-            }
-        }
-        if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){
-            info.group_ptr.push_back( info.group_ptr.back() + ngacc );
-            utils::Assert( info.group_ptr.back() == data.NumRow(), "group size must match num rows" );
-        }
-        this->data.InitData();
+        this->AddRow( feats );
+      }             
+      if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){
+        info.group_ptr.push_back( info.group_ptr.back() + ngacc );
+        utils::Assert( info.group_ptr.back() == info.num_row, "group size must match num rows" );
+        ngacc = 0;
+      }
+      // linelimit
+      if( linelimit >= 0 ) {
+        if( -- linelimit <= 0 ) break;
+      }
     }
+    if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){
+      info.group_ptr.push_back( info.group_ptr.back() + ngacc );
+      utils::Assert( info.group_ptr.back() == info.num_row, "group size must match num rows" );
+    }
+  }
+  
 };
 
 const char *folder = "features";
 
 int main( int argc, char *argv[] ){
-    if( argc < 3 ){
-        printf("Usage:xgcombine_buffer <inname> <outname> [options] -f [features] -fd [densefeatures]\n"\
-               "options: -rescale -linelimit -fgroup <groupfilename> -wlist <whitelistinstance>\n");
-        return 0; 
+  if( argc < 3 ){
+    printf("Usage:xgcombine_buffer <inname> <outname> [options] -f [features] -fd [densefeatures]\n" \
+           "options: -rescale -linelimit -fgroup <groupfilename> -wlist <whitelistinstance>\n");
+    return 0; 
+  }
+  
+  DataLoader loader;
+  time_t start = time( NULL );
+  
+  int mode = 0;
+  for( int i = 3; i < argc; i ++ ){        
+    if( !strcmp( argv[i], "-f") ){
+      mode = 0; continue;
     }
-
-    DataLoader loader;
-    time_t start = time( NULL );
-
-    int mode = 0;
-    for( int i = 3; i < argc; i ++ ){        
-        if( !strcmp( argv[i], "-f") ){
-            mode = 0; continue;
-        }
-        if( !strcmp( argv[i], "-fd") ){
-            mode = 2; continue;
-        }
-        if( !strcmp( argv[i], "-rescale") ){
-            loader.rescale = 1; continue;
-        }
-        if( !strcmp( argv[i], "-wlist") ){
-            loader.fwlist = utils::FopenCheck( argv[ ++i ], "r" ); continue;
-        }
-        if( !strcmp( argv[i], "-fgroup") ){
-            loader.fgroup = utils::FopenCheck( argv[ ++i ], "r" ); continue;
-        }
-        if( !strcmp( argv[i], "-fweight") ){
-            loader.fweight = utils::FopenCheck( argv[ ++i ], "r" ); continue;
-        }
-        if( !strcmp( argv[i], "-linelimit") ){
-            loader.linelimit = atoi( argv[ ++i ] ); continue;
-        }
-       
-        char name[ 256 ];
-        sprintf( name, "%s/%s.%s", folder, argv[1], argv[i] );
-        Header h;
-        h.fi = utils::FopenCheck( name, "r" );
-
-        if( mode == 2 ){
-            h.is_dense = true; h.num_feat = 1;
-            loader.fheader.push_back( h );
-        }else{
-            utils::Assert( fscanf( h.fi, "%d", &h.num_feat ) == 1, "num feat" );
-            switch( mode ){
-            case 0: loader.fheader.push_back( h ); break;
-            default: ;
-            }             
-        }
+    if( !strcmp( argv[i], "-fd") ){
+      mode = 2; continue;
+    }
+    if( !strcmp( argv[i], "-rescale") ){
+      loader.rescale = 1; continue;
+    }
+    if( !strcmp( argv[i], "-wlist") ){
+      loader.fwlist = utils::FopenCheck( argv[ ++i ], "r" ); continue;
+    }
+    if( !strcmp( argv[i], "-fgroup") ){
+      loader.fgroup = utils::FopenCheck( argv[ ++i ], "r" ); continue;
+    }
+    if( !strcmp( argv[i], "-fweight") ){
+      loader.fweight = utils::FopenCheck( argv[ ++i ], "r" ); continue;
+    }
+    if( !strcmp( argv[i], "-linelimit") ){
+      loader.linelimit = atoi( argv[ ++i ] ); continue;
     }
-    loader.fp = utils::FopenCheck( argv[1], "r" );
     
-    printf("num_features=%d\n", norm( loader.fheader ) ); 
-    printf("start creating buffer...\n");
-    loader.Load();
-    loader.SaveBinary( argv[2] );
-    // close files
-    fclose( loader.fp );
-    if( loader.fwlist != NULL ) fclose( loader.fwlist );    
-    if( loader.fgroup != NULL ) fclose( loader.fgroup );    
-    vclose( loader.fheader );
-    printf("all generation end, %lu sec used\n", (unsigned long)(time(NULL) - start) );    
-    return 0;
+    char name[ 256 ];
+    sprintf( name, "%s/%s.%s", folder, argv[1], argv[i] );
+    Header h;
+    h.fi = utils::FopenCheck( name, "r" );
+    
+    if( mode == 2 ){
+      h.is_dense = true; h.num_feat = 1;
+      loader.fheader.push_back( h );
+    }else{
+      utils::Assert( fscanf( h.fi, "%d", &h.num_feat ) == 1, "num feat" );
+      switch( mode ){
+        case 0: loader.fheader.push_back( h ); break;
+        default: ;
+      }             
+    }
+  }
+  loader.fp = utils::FopenCheck( argv[1], "r" );
+  
+  printf("num_features=%d\n", norm( loader.fheader ) ); 
+  printf("start creating buffer...\n");
+  loader.Load();
+  loader.SaveBinary( argv[2] );
+  // close files
+  fclose( loader.fp );
+  if( loader.fwlist != NULL ) fclose( loader.fwlist );    
+  if( loader.fgroup != NULL ) fclose( loader.fgroup );    
+  vclose( loader.fheader );
+  printf("all generation end, %lu sec used\n", (unsigned long)(time(NULL) - start) );    
+  return 0;
 }

From b76853731c5956814fa47fc5ef6a85d513822b3b Mon Sep 17 00:00:00 2001
From: Bing Xu <antinucleon@gmail.com>
Date: Mon, 18 Aug 2014 02:10:54 -0400
Subject: [PATCH 16/52] make it compatible with old code

---
 python/xgboost.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/xgboost.py b/python/xgboost.py
index f47642898..6ea1cd921 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -134,7 +134,7 @@ class Booster:
                 xglib.XGBoosterSetParam(
                     self.handle, ctypes.c_char_p(k.encode('utf-8')),
                     ctypes.c_char_p(str(v).encode('utf-8')))
-    def update(self, dtrain, it):
+    def update(self, dtrain, it=1):
         """ update """
         assert isinstance(dtrain, DMatrix)
         xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)

From e912dd3364a6323266804ca7efeb67fcb99e3b38 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Mon, 18 Aug 2014 08:57:26 -0700
Subject: [PATCH 17/52] fix omp

---
 src/gbm/gbtree-inl.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index 1fc90e40c..216240b74 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -287,8 +287,7 @@ class GBTree : public IGradBooster<FMatrix> {
         updater_initialized = 0;
       }
       if (!strcmp(name, "nthread")) {
-        omp_set_num_threads(nthread);
-        nthread = atoi(val);
+        omp_set_num_threads(nthread = atoi(val));
       }
       if (!strcmp(name, "num_parallel_tree")) {
         num_parallel_tree = atoi(val);

From d3bfc31e6a45bf36fdf44547b90b0787eaa398e5 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Mon, 18 Aug 2014 09:00:23 -0700
Subject: [PATCH 18/52] enforce putting iteration numbers in train

---
 python/xgboost.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/xgboost.py b/python/xgboost.py
index 6ea1cd921..6296d6263 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -134,8 +134,12 @@ class Booster:
                 xglib.XGBoosterSetParam(
                     self.handle, ctypes.c_char_p(k.encode('utf-8')),
                     ctypes.c_char_p(str(v).encode('utf-8')))
-    def update(self, dtrain, it=1):
-        """ update """
+    def update(self, dtrain, it):
+        """ 
+        update 
+          dtrain: the training DMatrix
+          it: current iteration number
+        """        
         assert isinstance(dtrain, DMatrix)
         xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
     def boost(self, dtrain, grad, hess):

From 7c068cbe460c775e6c153e64dd21c5b76121a835 Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Mon, 18 Aug 2014 10:14:34 -0700
Subject: [PATCH 19/52] fix mac

---
 Makefile                      |  2 +-
 python/xgboost.py             | 15 ++++++++-------
 src/io/simple_dmatrix-inl.hpp |  2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile
index 83a927e8c..2a30564e1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 export CC  = gcc
 export CXX = g++
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fopenmp
+export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas 
 
 # specify tensor path
 BIN = xgboost
diff --git a/python/xgboost.py b/python/xgboost.py
index 6296d6263..2e5aeceba 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -44,7 +44,8 @@ class DMatrix:
             self.handle = None
             return
         if isinstance(data, str):
-            self.handle = xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1)
+            self.handle = ctypes.c_void_p(
+                xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1))
         elif isinstance(data, scp.csr_matrix):
             self.__init_from_csr(data)            
         elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
@@ -62,17 +63,17 @@ class DMatrix:
     # convert data from csr matrix
     def __init_from_csr(self, csr):
         assert len(csr.indices) == len(csr.data)
-        self.handle = xglib.XGDMatrixCreateFromCSR(
+        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSR(
             (ctypes.c_ulong  * len(csr.indptr))(*csr.indptr),
             (ctypes.c_uint  * len(csr.indices))(*csr.indices),
             (ctypes.c_float * len(csr.data))(*csr.data),
-            len(csr.indptr), len(csr.data))
+            len(csr.indptr), len(csr.data)))
     # convert data from numpy matrix
     def __init_from_npy2d(self,mat,missing):
         data = numpy.array(mat.reshape(mat.size), dtype='float32')
-        self.handle = xglib.XGDMatrixCreateFromMat(
+        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromMat(
             data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
-            mat.shape[0], mat.shape[1], ctypes.c_float(missing))
+            mat.shape[0], mat.shape[1], ctypes.c_float(missing)))
     # destructor
     def __del__(self):
         xglib.XGDMatrixFree(self.handle)
@@ -103,8 +104,8 @@ class DMatrix:
     # slice the DMatrix to return a new DMatrix that only contains rindex
     def slice(self, rindex):
         res = DMatrix(None)
-        res.handle = xglib.XGDMatrixSliceDMatrix(
-            self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex))
+        res.handle = ctype.c_void_p(xglib.XGDMatrixSliceDMatrix(
+            self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex)))
         return res
 
 class Booster:
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index b8b15adce..bc0e3c2bd 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -196,7 +196,7 @@ class DMatrixSimple : public DataMatrix {
   /*! \brief data in the row */
   std::vector<SparseBatch::Entry> row_data_;
   /*! \brief magic number used to identify DMatrix */
-  static const int kMagic = 0xff01;
+  static const int kMagic = 0xffffab01;
 
  protected:
   // one batch iterator that return content in the matrix

From 66ae3a75781c9780b78b64582a5cfc561e763d4a Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Mon, 18 Aug 2014 10:17:49 -0700
Subject: [PATCH 20/52] add no omp flag

---
 Makefile | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 2a30564e1..6fae8eb0a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,12 @@
 export CC  = gcc
 export CXX = g++
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas 
+export LDFLAGS= -pthread -lm 
+
+ifeq ($(no_omp),1)
+	export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas 
+else
+	export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fopenmp
+endif
 
 # specify tensor path
 BIN = xgboost
@@ -9,7 +15,6 @@ SLIB = python/libxgboostwrapper.so
 .PHONY: clean all
 
 all: $(BIN) $(OBJ) $(SLIB)
-export LDFLAGS= -pthread -lm 
 
 xgboost: src/xgboost_main.cpp io.o src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp 
 io.o: src/io/io.cpp src/data.h src/utils/*.h

From 04e04ec5a00ed5ff45c926892cd2af0ab76a6d33 Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Mon, 18 Aug 2014 10:19:47 -0700
Subject: [PATCH 21/52] chg readme

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 732e64d7f..ac68a5cf4 100644
--- a/README.md
+++ b/README.md
@@ -16,8 +16,10 @@ Build
 ======
 * Simply type make
 * If your compiler does not come with OpenMP support, it will fire an warning telling you that the code will compile into single thread mode, and you will get single thread xgboost
-* You may get a error: -lgomp is not found, you can remove -fopenmp flag in Makefile to get single thread xgboost, or upgrade your compiler to compile multi-thread version
+* You may get a error: -lgomp is not found
+  - You can type ```make no_omp=1```, this will get you single thread xgboost
+  - Alternatively, you can upgrade your compiler to compile multi-thread version
 * Possible way to build using Visual Studio (not tested):
-   - In principle, you can put src/xgboost.cpp and src/io/io.cpp into the project, and build xgboost.
-   - For python module, you need python/xgboost_wrapper.cpp and src/io/io.cpp to build a dll.
+  - In principle, you can put src/xgboost.cpp and src/io/io.cpp into the project, and build xgboost.
+  - For python module, you need python/xgboost_wrapper.cpp and src/io/io.cpp to build a dll.
 

From f6c763a2a7a9fcae6798246004f345bb808e475f Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Mon, 18 Aug 2014 10:53:15 -0700
Subject: [PATCH 22/52] fix base score, and print message

---
 Makefile                       |  2 +-
 src/gbm/gbtree-inl.hpp         |  6 +++++-
 src/learner/learner-inl.hpp    |  9 ++++++++-
 src/learner/objective-inl.hpp  |  9 ++++++---
 src/learner/objective.h        |  4 ++--
 src/tree/updater_prune-inl.hpp | 21 +++++++++++++++------
 src/utils/omp.h                |  2 ++
 7 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/Makefile b/Makefile
index 6fae8eb0a..ca58f0eb3 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ export CXX = g++
 export LDFLAGS= -pthread -lm 
 
 ifeq ($(no_omp),1)
-	export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas 
+	export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -DDISABLE_OPENMP 
 else
 	export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fopenmp
 endif
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index 216240b74..b0bd0f99a 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -233,7 +233,7 @@ class GBTree : public IGradBooster<FMatrix> {
       pred_counter[bid] = static_cast<unsigned>(trees.size());
       pred_buffer[bid] = psum;
     }
-    return psum;
+    return psum + mparam.base_score;
   }
   // initialize thread local space for prediction
   inline void InitThreadTemp(int nthread) {
@@ -296,6 +296,8 @@ class GBTree : public IGradBooster<FMatrix> {
   };
   /*! \brief model parameters */
   struct ModelParam {
+    /*! \brief base prediction score of everything */
+    float base_score;
     /*! \brief number of trees */
     int num_trees;
     /*! \brief number of root: default 0, means single tree */
@@ -314,6 +316,7 @@ class GBTree : public IGradBooster<FMatrix> {
     int reserved[32];
     /*! \brief constructor */
     ModelParam(void) {
+      base_score = 0.0f;
       num_trees = 0;
       num_roots = num_feature = 0;
       num_pbuffer = 0;
@@ -326,6 +329,7 @@ class GBTree : public IGradBooster<FMatrix> {
      * \param val  value of the parameter
      */
     inline void SetParam(const char *name, const char *val) {
+      if (!strcmp("base_score", name)) base_score = static_cast<float>(atof(val));
       if (!strcmp("num_pbuffer", name)) num_pbuffer = atol(val);
       if (!strcmp("num_output_group", name)) num_output_group = atol(val);
       if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index fe6f1aa43..09167d8bf 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -85,15 +85,22 @@ class BoostLearner {
       if (!strcmp(name, "booster")) name_gbm_ = val;
       mparam.SetParam(name, val);
     }
+    if (gbm_ != NULL) gbm_->SetParam(name, val);
+    if (obj_ != NULL) obj_->SetParam(name, val);
     cfg_.push_back(std::make_pair(std::string(name), std::string(val)));
   }
   /*!
    * \brief initialize the model
    */
   inline void InitModel(void) {
+    // initialize model
     this->InitObjGBM();
-    // adapt the base score
+    // reset the base score
     mparam.base_score = obj_->ProbToMargin(mparam.base_score);
+    char tmp[32];
+    snprintf(tmp, sizeof(tmp), "%g", mparam.base_score);
+    this->SetParam("base_score", tmp);
+    // initialize GBM model
     gbm_->InitModel();
   }
   /*!
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index 163d0d283..e45250950 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -124,7 +124,7 @@ class RegLossObj : public IObjFunction{
                            loss.SecondOrderGradient(p, info.labels[j]) * w);
     }
   }
-  virtual const char* DefaultEvalMetric(void) {
+  virtual const char* DefaultEvalMetric(void) const {
     return loss.DefaultEvalMetric();
   }
   virtual void PredTransform(std::vector<float> *io_preds) {
@@ -135,6 +135,9 @@ class RegLossObj : public IObjFunction{
       preds[j] = loss.PredTransform(preds[j]);
     }
   }
+  virtual float ProbToMargin(float base_score) const {
+    return loss.ProbToMargin(base_score);
+  }
 
  protected:
   float scale_pos_weight;
@@ -192,7 +195,7 @@ class SoftmaxMultiClassObj : public IObjFunction {
   virtual void EvalTransform(std::vector<float> *io_preds) {
     this->Transform(io_preds, 0);
   }
-  virtual const char* DefaultEvalMetric(void) {
+  virtual const char* DefaultEvalMetric(void) const {
     return "merror";
   }
 
@@ -320,7 +323,7 @@ class LambdaRankObj : public IObjFunction {
       }
     }
   }
-  virtual const char* DefaultEvalMetric(void) {
+  virtual const char* DefaultEvalMetric(void) const {
     return "map";
   }
 
diff --git a/src/learner/objective.h b/src/learner/objective.h
index ff870c034..513219093 100644
--- a/src/learner/objective.h
+++ b/src/learner/objective.h
@@ -32,7 +32,7 @@ class IObjFunction{
                            int iter,
                            std::vector<bst_gpair> *out_gpair) = 0;
   /*! \return the default evaluation metric for the objective */
-  virtual const char* DefaultEvalMetric(void) = 0;
+  virtual const char* DefaultEvalMetric(void) const = 0;
   // the following functions are optional, most of time default implementation is good enough
   /*!
    * \brief transform prediction values, this is only called when Prediction is called
@@ -53,7 +53,7 @@ class IObjFunction{
    * used by gradient boosting
    * \return transformed value
    */
-  virtual float ProbToMargin(float base_score) {
+  virtual float ProbToMargin(float base_score) const {
     return base_score;
   }
 };
diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp
index bfb71b727..b5205080b 100644
--- a/src/tree/updater_prune-inl.hpp
+++ b/src/tree/updater_prune-inl.hpp
@@ -19,6 +19,7 @@ class TreePruner: public IUpdater<FMatrix> {
   // set training parameter
   virtual void SetParam(const char *name, const char *val) {
     param.SetParam(name, val);
+    if (!strcmp(name, "silent")) silent = atoi(val);
   }
   // update the tree, do pruning
   virtual void Update(const std::vector<bst_gpair> &gpair,
@@ -32,33 +33,41 @@ class TreePruner: public IUpdater<FMatrix> {
 
  private:
   // try to prune off current leaf
-  inline void TryPruneLeaf(RegTree &tree, int nid, int depth) {
-    if (tree[nid].is_root()) return;
+  inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) {
+    if (tree[nid].is_root()) return npruned;
     int pid = tree[nid].parent();
     RegTree::NodeStat &s = tree.stat(pid);
     ++s.leaf_child_cnt;
-
     if (s.leaf_child_cnt >= 2 && param.need_prune(s.loss_chg, depth - 1)) {
       // need to be pruned
       tree.ChangeToLeaf(pid, param.learning_rate * s.base_weight);
       // tail recursion
-      this->TryPruneLeaf(tree, pid, depth - 1);
-    }
+      return this->TryPruneLeaf(tree, pid, depth - 1, npruned+2);
+    } else {
+      return npruned;
+    }    
   }
   /*! \brief do prunning of a tree */
   inline void DoPrune(RegTree &tree) {
+    int npruned = 0;
     // initialize auxiliary statistics
     for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
       tree.stat(nid).leaf_child_cnt = 0;
     }
     for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
       if (tree[nid].is_leaf()) {
-        this->TryPruneLeaf(tree, nid, tree.GetDepth(nid));
+        npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned);
       }
     }
+    if (silent == 0) {
+      printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
+             tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
+    }
   }
 
  private:
+  // shutup
+  int silent;
   // training parameter
   TrainParam param;
 };
diff --git a/src/utils/omp.h b/src/utils/omp.h
index 46127f631..0423448e2 100644
--- a/src/utils/omp.h
+++ b/src/utils/omp.h
@@ -8,7 +8,9 @@
 #if defined(_OPENMP)
 #include <omp.h>
 #else
+#ifndef DISABLE_OPENMP
 #warning "OpenMP is not available, compile to single thread code"
+#endif
 inline int omp_get_thread_num() { return 0; }
 inline int omp_get_num_threads() { return 1; }
 inline void omp_set_num_threads(int nthread) {}

From 46fed899ab63b3788e0345ebb985cb9bb50f04d3 Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Mon, 18 Aug 2014 10:57:08 -0700
Subject: [PATCH 23/52] add more note

---
 README.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ac68a5cf4..106757471 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,22 @@ Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
 
 Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.com/tqchen/xgboost/issues?q=is%3Aissue+label%3Aquestion)
 
+Features
+=======
+* Sparse feature format:
+  - Sparse feature format allows easy handling of missing values, and improve computation efficiency.
+* Push the limit on single machine:
+  - Efficient implementation that optimizes memory and computation.
+* Speed: XGBoost is very fast
+  - IN [demo/higgs/speedtest.py](demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
+* Layout of gradient boosting algorithm to support user defined objective
+* Python interface, works with numpy and scipy.sparse matrix
+
 xgboost-unity
 =======
-experimental branch(not usable yet): refactor xgboost, cleaner code, more flexibility
+* experimental branch(not usable yet): refactor xgboost, cleaner code, more flexibility
+* This version of xgboost is not backward compatible with 0.2*, due to huge change in code structure
+  - This means the model and buffer file of previous version can not be loaded in xgboost-unity
 
 Build
 ======

From 9da2ced8a222d0271ee09cf035ffc06134e9291e Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Mon, 18 Aug 2014 12:20:13 -0700
Subject: [PATCH 24/52] add base_margin

---
 Makefile                      |  7 ++---
 README.md                     |  4 +--
 demo/kaggle-higgs/README.md   |  2 +-
 python/example/demo.py        | 19 +++++++++++++
 python/xgboost.py             | 53 +++++++++++++++++++++++++----------
 python/xgboost_wrapper.cpp    | 36 +++++++++---------------
 python/xgboost_wrapper.h      | 29 ++++++-------------
 src/gbm/gbtree-inl.hpp        |  6 +---
 src/io/simple_dmatrix-inl.hpp |  5 +++-
 src/learner/dmatrix.h         | 31 ++++++++++++++++++--
 src/learner/learner-inl.hpp   | 25 ++++++++++++++---
 src/xgboost_main.cpp          | 38 ++++++++++++++-----------
 12 files changed, 162 insertions(+), 93 deletions(-)

diff --git a/Makefile b/Makefile
index ca58f0eb3..25e61ca7b 100644
--- a/Makefile
+++ b/Makefile
@@ -10,14 +10,13 @@ endif
 
 # specify tensor path
 BIN = xgboost
-OBJ = io.o
+OBJ = 
 SLIB = python/libxgboostwrapper.so
 .PHONY: clean all
 
 all: $(BIN) $(OBJ) $(SLIB)
 
-xgboost: src/xgboost_main.cpp io.o src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp 
-io.o: src/io/io.cpp src/data.h src/utils/*.h
+xgboost: src/xgboost_main.cpp src/io/io.cpp src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp 
 # now the wrapper takes in two files. io and wrapper part
 python/libxgboostwrapper.so: python/xgboost_wrapper.cpp src/io/io.cpp src/*.h src/*/*.hpp src/*/*.h
 
@@ -34,4 +33,4 @@ install:
 	cp -f -r $(BIN)  $(INSTALL_PATH)
 
 clean:
-	$(RM) $(OBJ) $(BIN) *~ */*~ */*/*~
+	$(RM) $(OBJ) $(BIN) $(SLIB) *~ */*~ */*/*~
diff --git a/README.md b/README.md
index 106757471..61472aa44 100644
--- a/README.md
+++ b/README.md
@@ -21,8 +21,8 @@ Features
 
 xgboost-unity
 =======
-* experimental branch(not usable yet): refactor xgboost, cleaner code, more flexibility
-* This version of xgboost is not backward compatible with 0.2*, due to huge change in code structure
+* Experimental branch(not usable yet): refactor xgboost, cleaner code, more flexibility
+* This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
   - This means the model and buffer file of previous version can not be loaded in xgboost-unity
 
 Build
diff --git a/demo/kaggle-higgs/README.md b/demo/kaggle-higgs/README.md
index 28472a848..9e535ef1e 100644
--- a/demo/kaggle-higgs/README.md
+++ b/demo/kaggle-higgs/README.md
@@ -7,7 +7,7 @@ This script will achieve about 3.600 AMS score in public leadboard. To get start
 
 1. Compile the XGBoost python lib
 ```bash
-cd ../../python
+cd ../..
 make
 ```
 2. Put training.csv test.csv on folder './data' (you can create a symbolic link)
diff --git a/python/example/demo.py b/python/example/demo.py
index a099f56bf..231640d91 100755
--- a/python/example/demo.py
+++ b/python/example/demo.py
@@ -90,3 +90,22 @@ def evalerror(preds, dtrain):
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
 bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
+
+
+###
+# advanced: start from a initial base prediction
+#
+print ('start running example to start from a initial prediction')
+# specify parameters via map, definition are same as c++ version
+param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
+# train xgboost for 1 round
+bst = xgb.train( param, dtrain, 1, evallist )
+# Note: we need the margin value instead of transformed prediction in set_base_margin
+# do predict with output_margin=True, will always give you margin values before logistic transformation
+ptrain = bst.predict(dtrain, output_margin=True)
+ptest  = bst.predict(dtest, output_margin=True)
+dtrain.set_base_margin(ptrain)
+dtest.set_base_margin(ptest)
+
+print ('this is result of running from initial prediction')
+bst = xgb.train( param, dtrain, 1, evallist )
diff --git a/python/xgboost.py b/python/xgboost.py
index 2e5aeceba..badeebed9 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -18,8 +18,7 @@ xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
 xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
 xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
 xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
-xglib.XGDMatrixGetLabel.restype = ctypes.POINTER(ctypes.c_float)
-xglib.XGDMatrixGetWeight.restype = ctypes.POINTER(ctypes.c_float)
+xglib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
 xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
 
 xglib.XGBoosterCreate.restype = ctypes.c_void_p
@@ -77,28 +76,46 @@ class DMatrix:
     # destructor
     def __del__(self):
         xglib.XGDMatrixFree(self.handle)
-    # load data from file 
+    def __get_float_info(self, field):
+        length = ctypes.c_ulong()
+        ret = xglib.XGDMatrixGetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
+                                          ctypes.byref(length))
+        return ctypes2numpy(ret, length.value)
+    def __set_float_info(self, field, data):
+        xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')), 
+                                    (ctypes.c_float*len(data))(*data), len(data))
+    # load data from file
     def save_binary(self, fname, silent=True):
         xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
     # set label of dmatrix
     def set_label(self, label):
-        xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label))
+        self.__set_float_info('label', label)
+    # set weight of each instances
+    def set_weight(self, weight):
+        self.__set_float_info('weight', label)
+    # set initialized margin prediction
+    def set_base_margin(self, margin):
+        """
+        set base margin of booster to start from
+        this can be used to specify a prediction value of
+        existing model to be base_margin
+        However, remember margin is needed, instead of transformed prediction
+        e.g. for logistic regression: need to put in value before logistic transformation
+        see also example/demo.py
+        """
+        self.__set_float_info('base_margin', margin)
     # set group size of dmatrix, used for rank
     def set_group(self, group):
         xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
-    # set weight of each instances
-    def set_weight(self, weight):
-        xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_float*len(weight))(*weight), len(weight))
     # get label from dmatrix
     def get_label(self):
-        length = ctypes.c_ulong()
-        labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length))
-        return ctypes2numpy(labels, length.value)
+        return self.__get_float_info('label')
     # get weight from dmatrix
     def get_weight(self):
-        length = ctypes.c_ulong()
-        weights = xglib.XGDMatrixGetWeight(self.handle, ctypes.byref(length))
-        return ctypes2numpy(weights, length.value)
+        return self.__get_float_info('weight')
+    # get base_margin from dmatrix
+    def get_base_margin(self):
+        return self.__get_float_info('base_margin')
     def num_row(self):
         return xglib.XGDMatrixNumRow(self.handle)
     # slice the DMatrix to return a new DMatrix that only contains rindex
@@ -161,9 +178,15 @@ class Booster:
         return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))        
     def eval(self, mat, name = 'eval', it = 0):
         return self.eval_set( [(mat,name)], it)
-    def predict(self, data):
+    def predict(self, data, output_margin=False):
+        """
+        predict with data
+            data: the dmatrix storing the input
+            output_margin: whether output raw margin value that is untransformed
+        """
         length = ctypes.c_ulong()
-        preds = xglib.XGBoosterPredict(self.handle, data.handle, ctypes.byref(length))
+        preds = xglib.XGBoosterPredict(self.handle, data.handle,
+                                       int(output_margin), ctypes.byref(length))
         return ctypes2numpy(preds, length.value)
     def save_model(self, fname):
         """ save model to file """
diff --git a/python/xgboost_wrapper.cpp b/python/xgboost_wrapper.cpp
index edda96c29..7f2365ba3 100644
--- a/python/xgboost_wrapper.cpp
+++ b/python/xgboost_wrapper.cpp
@@ -23,9 +23,9 @@ class Booster: public learner::BoostLearner<FMatrixS> {
     this->init_model = false;
     this->SetCacheData(mats);
   }
-  const float *Pred(const DataMatrix &dmat, size_t *len) {
+  const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) {
     this->CheckInitModel();
-    this->Predict(dmat, &this->preds_);
+    this->Predict(dmat, output_margin, &this->preds_);
     *len = this->preds_.size();
     return &this->preds_[0];
   }
@@ -163,15 +163,11 @@ extern "C"{
   void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
     SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
   }
-  void XGDMatrixSetLabel(void *handle, const float *label, size_t len) {
-    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
-    pmat->info.labels.resize(len);
-    memcpy(&(pmat->info).labels[0], label, sizeof(float) * len);
-  }
-  void XGDMatrixSetWeight(void *handle, const float *weight, size_t len) {
-    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
-    pmat->info.weights.resize(len);
-    memcpy(&(pmat->info).weights[0], weight, sizeof(float) * len);
+  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) {
+    std::vector<float> &vec = 
+        static_cast<DataMatrix*>(handle)->info.GetInfo(field);
+    vec.resize(len);
+    memcpy(&vec[0], info, sizeof(float) * len);
   }
   void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) {
     DataMatrix *pmat = static_cast<DataMatrix*>(handle);
@@ -181,15 +177,11 @@ extern "C"{
       pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
     }
   }
-  const float* XGDMatrixGetLabel(const void *handle, size_t* len) {
-    const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
-    *len = pmat->info.labels.size();
-    return &(pmat->info.labels[0]);
-  }
-  const float* XGDMatrixGetWeight(const void *handle, size_t* len) {
-    const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
-    *len = pmat->info.weights.size();
-    return &(pmat->info.weights[0]);
+  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) {
+    const std::vector<float> &vec =
+        static_cast<const DataMatrix*>(handle)->info.GetInfo(field);
+    *len = vec.size();
+    return &vec[0];
   }
   size_t XGDMatrixNumRow(const void *handle) {
     return static_cast<const DataMatrix*>(handle)->info.num_row;
@@ -238,8 +230,8 @@ extern "C"{
     bst->eval_str = bst->EvalOneIter(iter, mats, names);
     return bst->eval_str.c_str();
   }
-  const float *XGBoosterPredict(void *handle, void *dmat, size_t *len) {
-    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), len);
+  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) {
+    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
   }
   void XGBoosterLoadModel(void *handle, const char *fname) {
     static_cast<Booster*>(handle)->LoadModel(fname);
diff --git a/python/xgboost_wrapper.h b/python/xgboost_wrapper.h
index 16b8fecd7..1b6805c61 100644
--- a/python/xgboost_wrapper.h
+++ b/python/xgboost_wrapper.h
@@ -64,19 +64,13 @@ extern "C" {
    */
   void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
   /*!
-   * \brief set label of the training matrix
+   * \brief set float vector to a content in info
    * \param handle a instance of data matrix
-   * \param label pointer to label
+   * \param field field name, can be label, weight
+   * \param array pointer to float vector
    * \param len length of array
    */
-  void XGDMatrixSetLabel(void *handle, const float *label, size_t len);
-  /*!
-   * \brief set weight of each instance
-   * \param handle a instance of data matrix
-   * \param weight data pointer to weights
-   * \param len length of array
-   */
-  void XGDMatrixSetWeight(void *handle, const float *weight, size_t len);
+  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len);
   /*!
    * \brief set label of the training matrix
    * \param handle a instance of data matrix
@@ -85,19 +79,13 @@ extern "C" {
    */
   void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
   /*!
-   * \brief get label set from matrix
+   * \brief get float info vector from matrix
    * \param handle a instance of data matrix
    * \param len used to set result length
+   * \param field field name
    * \return pointer to the label
    */
-  const float* XGDMatrixGetLabel(const void *handle, size_t* out_len);
-  /*!
-   * \brief get weight set from matrix
-   * \param handle a instance of data matrix
-   * \param len used to set result length
-   * \return pointer to the weight
-   */
-  const float* XGDMatrixGetWeight(const void *handle, size_t* out_len);
+  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
   /*!
    * \brief return number of rows
    */
@@ -154,9 +142,10 @@ extern "C" {
    * \brief make prediction based on dmat
    * \param handle handle
    * \param dmat data matrix
+   * \param output_margin whether only output raw margin value
    * \param len used to store length of returning result
    */
-  const float *XGBoosterPredict(void *handle, void *dmat, size_t *len);
+  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len);
   /*!
    * \brief load model from existing file
    * \param handle handle
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index b0bd0f99a..216240b74 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -233,7 +233,7 @@ class GBTree : public IGradBooster<FMatrix> {
       pred_counter[bid] = static_cast<unsigned>(trees.size());
       pred_buffer[bid] = psum;
     }
-    return psum + mparam.base_score;
+    return psum;
   }
   // initialize thread local space for prediction
   inline void InitThreadTemp(int nthread) {
@@ -296,8 +296,6 @@ class GBTree : public IGradBooster<FMatrix> {
   };
   /*! \brief model parameters */
   struct ModelParam {
-    /*! \brief base prediction score of everything */
-    float base_score;
     /*! \brief number of trees */
     int num_trees;
     /*! \brief number of root: default 0, means single tree */
@@ -316,7 +314,6 @@ class GBTree : public IGradBooster<FMatrix> {
     int reserved[32];
     /*! \brief constructor */
     ModelParam(void) {
-      base_score = 0.0f;
       num_trees = 0;
       num_roots = num_feature = 0;
       num_pbuffer = 0;
@@ -329,7 +326,6 @@ class GBTree : public IGradBooster<FMatrix> {
      * \param val  value of the parameter
      */
     inline void SetParam(const char *name, const char *val) {
-      if (!strcmp("base_score", name)) base_score = static_cast<float>(atof(val));
       if (!strcmp("num_pbuffer", name)) num_pbuffer = atol(val);
       if (!strcmp("num_output_group", name)) num_output_group = atol(val);
       if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index bc0e3c2bd..c0b98b789 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -110,10 +110,13 @@ class DMatrixSimple : public DataMatrix {
                    "DMatrix: group data does not match the number of rows in features");
     }
     std::string wname = name + ".weight";
-    if (info.TryLoadWeight(wname.c_str(), silent)) {
+    if (info.TryLoadFloatInfo("weight", wname.c_str(), silent)) {
       utils::Check(info.weights.size() == info.num_row,
                    "DMatrix: weight data does not match the number of rows in features");
     }
+    std::string mname = name + ".base_margin";
+    if (info.TryLoadFloatInfo("base_margin", mname.c_str(), silent)) {      
+    }
   }
   /*!
    * \brief load from binary file
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index 144b1a44e..f7dbcb639 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -33,6 +33,15 @@ struct MetaInfo {
    *  can be used for multi task setting
    */
   std::vector<unsigned> root_index;
+  /*! 
+   * \brief initialized margins,
+   * if specified, xgboost will start from this init margin
+   * can be used to specify initial prediction to boost from
+   */
+  std::vector<float> base_margin;
+  /*! \brief version flag, used to check version of this info */
+  static const int kVersion = 0;
+  // constructor
   MetaInfo(void) : num_row(0), num_col(0) {}
   /*! \brief clear all the information */
   inline void Clear(void) {
@@ -40,6 +49,7 @@ struct MetaInfo {
     group_ptr.clear();
     weights.clear();
     root_index.clear();
+    base_margin.clear();
     num_row = num_col = 0;
   }
   /*! \brief get weight of each instances */
@@ -59,20 +69,26 @@ struct MetaInfo {
     }
   }
   inline void SaveBinary(utils::IStream &fo) const {
+    int version = kVersion;
+    fo.Write(&version, sizeof(version));
     fo.Write(&num_row, sizeof(num_row));
     fo.Write(&num_col, sizeof(num_col));
     fo.Write(labels);
     fo.Write(group_ptr);
     fo.Write(weights);
     fo.Write(root_index);
+    fo.Write(base_margin);
   }
   inline void LoadBinary(utils::IStream &fi) {
+    int version;
+    utils::Check(fi.Read(&version, sizeof(version)), "MetaInfo: invalid format");
     utils::Check(fi.Read(&num_row, sizeof(num_row)), "MetaInfo: invalid format");
     utils::Check(fi.Read(&num_col, sizeof(num_col)), "MetaInfo: invalid format");
     utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
     utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
     utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
     utils::Check(fi.Read(&root_index), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&base_margin), "MetaInfo: invalid format");
   }
   // try to load group information from file, if exists
   inline bool TryLoadGroup(const char* fname, bool silent = false) {
@@ -89,8 +105,19 @@ struct MetaInfo {
     fclose(fi);
     return true;
   }
+  inline std::vector<float>& GetInfo(const char *field) {
+    if (!strcmp(field, "label")) return labels;
+    if (!strcmp(field, "weight")) return weights;
+    if (!strcmp(field, "base_margin")) return base_margin;
+    utils::Error("unknown field %s", field);
+    return labels;
+  }
+  inline const std::vector<float>& GetInfo(const char *field) const {
+    return ((MetaInfo*)this)->GetInfo(field);
+  }
   // try to load weight information from file, if exists
-  inline bool TryLoadWeight(const char* fname, bool silent = false) {
+  inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
+    std::vector<float> &weights = this->GetInfo(field);       
     FILE *fi = fopen64(fname, "r");
     if (fi == NULL) return false;
     float wt;
@@ -98,7 +125,7 @@ struct MetaInfo {
       weights.push_back(wt);
     }
     if (!silent) {
-      printf("loading weight from %s\n", fname);
+      printf("loading %s from %s\n", field, fname);
     }
     fclose(fi);
     return true;
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 09167d8bf..4d227f488 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -97,9 +97,6 @@ class BoostLearner {
     this->InitObjGBM();
     // reset the base score
     mparam.base_score = obj_->ProbToMargin(mparam.base_score);
-    char tmp[32];
-    snprintf(tmp, sizeof(tmp), "%g", mparam.base_score);
-    this->SetParam("base_score", tmp);
     // initialize GBM model
     gbm_->InitModel();
   }
@@ -199,12 +196,16 @@ class BoostLearner {
   /*!
    * \brief get prediction
    * \param data input data
+   * \param output_margin whether to only predict margin value instead of transformed prediction
    * \param out_preds output vector that stores the prediction
    */
   inline void Predict(const DMatrix<FMatrix> &data,
+                      bool output_margin,
                       std::vector<float> *out_preds) const {
     this->PredictRaw(data, out_preds);
-    obj_->PredTransform(out_preds);
+    if (!output_margin) {
+      obj_->PredTransform(out_preds);
+    }
   }
   /*! \brief dump model out */
   inline std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
@@ -236,6 +237,22 @@ class BoostLearner {
                          std::vector<float> *out_preds) const {
     gbm_->Predict(data.fmat, this->FindBufferOffset(data),
                   data.info.root_index, out_preds);
+    // add base margin
+    std::vector<float> &preds = *out_preds;
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    if (data.info.base_margin.size() != 0) {
+      utils::Check(preds.size() == data.info.base_margin.size(),
+                   "base_margin.size does not match with prediction size");
+      #pragma omp parallel for schedule(static)
+      for (unsigned j = 0; j < ndata; ++j) {
+        preds[j] += data.info.base_margin[j];
+      }
+    } else {
+      #pragma omp parallel for schedule(static)
+      for (unsigned j = 0; j < ndata; ++j) {
+        preds[j] += mparam.base_score;
+      }
+    }
   }
 
   /*! \brief training parameter for regression */
diff --git a/src/xgboost_main.cpp b/src/xgboost_main.cpp
index f3fc9201d..c807df15a 100644
--- a/src/xgboost_main.cpp
+++ b/src/xgboost_main.cpp
@@ -49,6 +49,7 @@ class BoostLearnTask{
     if (!strcmp("silent", name)) silent = atoi(val);
     if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
     if (!strcmp("num_round", name)) num_round = atoi(val);
+    if (!strcmp("pred_margin", name)) pred_margin = atoi(val);
     if (!strcmp("save_period", name)) save_period = atoi(val);
     if (!strcmp("eval_train", name)) eval_train = atoi(val);
     if (!strcmp("task", name)) task = val;
@@ -77,6 +78,7 @@ class BoostLearnTask{
     num_round = 10;
     save_period = 0;
     eval_train = 0;
+    pred_margin = 0;
     dump_model_stats = 0;
     task = "train";
     model_in = "NULL";
@@ -184,7 +186,7 @@ class BoostLearnTask{
   inline void TaskPred(void) {
     std::vector<float> preds;
     if (!silent) printf("start prediction...\n");
-    learner.Predict(*data, &preds);
+    learner.Predict(*data, pred_margin != 0, &preds);
     if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
     FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
     for (size_t i = 0; i < preds.size(); i++) {
@@ -193,37 +195,39 @@ class BoostLearnTask{
     fclose(fo);
   }
  private:
-  /* \brief whether silent */
+  /*! \brief whether silent */
   int silent;
-  /* \brief whether use auto binary buffer */
+  /*! \brief whether use auto binary buffer */
   int use_buffer;
-  /* \brief whether evaluate training statistics */            
+  /*! \brief whether evaluate training statistics */            
   int eval_train;
-  /* \brief number of boosting iterations */
+  /*! \brief number of boosting iterations */
   int num_round;
-  /* \brief the period to save the model, 0 means only save the final round model */
+  /*! \brief the period to save the model, 0 means only save the final round model */
   int save_period;
-  /* \brief the path of training/test data set */
+  /*! \brief the path of training/test data set */
   std::string train_path, test_path;
-  /* \brief the path of test model file, or file to restart training */
+  /*! \brief the path of test model file, or file to restart training */
   std::string model_in;
-  /* \brief the path of final model file, to be saved */
+  /*! \brief the path of final model file, to be saved */
   std::string model_out;
-  /* \brief the path of directory containing the saved models */
+  /*! \brief the path of directory containing the saved models */
   std::string model_dir_path;
-  /* \brief task to perform */
+  /*! \brief task to perform */
   std::string task;
-  /* \brief name of predict file */
+  /*! \brief name of predict file */
   std::string name_pred;
-  /* \brief whether dump statistics along with model */
+  /*!\brief whether to directly output margin value */
+  int pred_margin;
+  /*! \brief whether dump statistics along with model */
   int dump_model_stats;
-  /* \brief name of feature map */
+  /*! \brief name of feature map */
   std::string name_fmap;
-  /* \brief name of dump file */
+  /*! \brief name of dump file */
   std::string name_dump;
-  /* \brief the paths of validation data sets */
+  /*! \brief the paths of validation data sets */
   std::vector<std::string> eval_data_paths;
-  /* \brief the names of the evaluation data used in output log */
+  /*! \brief the names of the evaluation data used in output log */
   std::vector<std::string> eval_data_names;
  private:
   io::DataMatrix* data;

From 0b36c8295dd43173348dc6715720863f156a0078 Mon Sep 17 00:00:00 2001
From: antinucleon <antinucleon@gmail.com>
Date: Mon, 18 Aug 2014 13:33:36 -0600
Subject: [PATCH 25/52] lack include

---
 src/data.h | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/data.h b/src/data.h
index df43551e3..b64c13b19 100644
--- a/src/data.h
+++ b/src/data.h
@@ -9,6 +9,7 @@
 #include <vector>
 #include <limits>
 #include <climits>
+#include <cstring>
 #include <algorithm>
 #include "utils/io.h"
 #include "utils/utils.h"
@@ -16,9 +17,9 @@
 #include "utils/matrix_csr.h"
 
 namespace xgboost {
-/*! 
- * \brief unsigned interger type used in boost, 
- *        used for feature index and row index 
+/*!
+ * \brief unsigned interger type used in boost,
+ *        used for feature index and row index
  */
 typedef unsigned bst_uint;
 /*! \brief float type, used for storing statistics */
@@ -82,7 +83,7 @@ struct SparseBatch {
 
 /**
  * \brief This is a interface convention via template, defining the way to access features,
- *        column access rule is defined by template, for efficiency purpose, 
+ *        column access rule is defined by template, for efficiency purpose,
  *        row access is defined by iterator of sparse batches
  * \tparam Derived type of actual implementation
  */
@@ -122,9 +123,9 @@ class FMatrixInterface {
    * \return number of columns
    */
   inline size_t NumCol(void) const;
-  /*! 
-   * \brief check if column access is supported, if not, initialize column access 
-   * \param max_rows maximum number of rows allowed in constructor 
+  /*!
+   * \brief check if column access is supported, if not, initialize column access
+   * \param max_rows maximum number of rows allowed in constructor
    */
   inline void InitColAccess(void);
   /*! \return whether column access is enabled */
@@ -133,8 +134,8 @@ class FMatrixInterface {
   inline size_t GetColSize(size_t cidx) const;
   /*!
    * \breif return #entries-in-col / #rows
-   * \param cidx column index 
-   *   this function is used to help speedup, 
+   * \param cidx column index
+   *   this function is used to help speedup,
    *   doese not necessarily implement it if not sure, return 0.0;
    * \return column density
    */
@@ -204,9 +205,9 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     return ColIter(&col_data_[col_ptr_[cidx]] - 1,
                    &col_data_[col_ptr_[cidx + 1]] - 1);
   }
-  /*! 
-   * \brief get reversed col iterator, 
-   *   this function will be deprecated at some point 
+  /*!
+   * \brief get reversed col iterator,
+   *   this function will be deprecated at some point
    */
   inline ColBackIter GetReverseSortedCol(size_t cidx) const {
     utils::Assert(cidx < this->NumCol(), "col id exceed bound");
@@ -226,8 +227,8 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     if (this->HaveColAccess()) return;
     this->InitColData(max_nrow);
   }
-  /*! 
-   * \brief get the row iterator associated with FMatrix 
+  /*!
+   * \brief get the row iterator associated with FMatrix
    *  this function is not threadsafe, returns iterator stored in FMatrixS
    */
   inline utils::IIterator<SparseBatch>* RowIterator(void) const {
@@ -287,19 +288,19 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     size_t nrow;
     utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format");
     out_ptr->resize(nrow + 1);
-    utils::Check(fi.Read(&(*out_ptr)[0], out_ptr->size() * sizeof(size_t)) != 0, 
+    utils::Check(fi.Read(&(*out_ptr)[0], out_ptr->size() * sizeof(size_t)) != 0,
                   "invalid input file format");
     out_data->resize(out_ptr->back());
     if (out_data->size() != 0) {
-      utils::Assert(fi.Read(&(*out_data)[0], out_data->size() * sizeof(SparseBatch::Entry)) != 0, 
+      utils::Assert(fi.Read(&(*out_data)[0], out_data->size() * sizeof(SparseBatch::Entry)) != 0,
                     "invalid input file format");
     }
   }
 
  protected:
   /*!
-   * \brief intialize column data 
-   * \param max_nrow maximum number of rows supported 
+   * \brief intialize column data
+   * \param max_nrow maximum number of rows supported
    */
   inline void InitColData(size_t max_nrow) {
     // note: this part of code is serial, todo, parallelize this transformer

From e9bfc026b70bf69224d01b27cb6e87543f55ac09 Mon Sep 17 00:00:00 2001
From: antinucleon <antinucleon@gmail.com>
Date: Mon, 18 Aug 2014 13:38:09 -0600
Subject: [PATCH 26/52] fix typo

---
 python/xgboost.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/python/xgboost.py b/python/xgboost.py
index badeebed9..509fe654a 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -1,11 +1,11 @@
 # Author: Tianqi Chen, Bing Xu
 # module for xgboost
-import ctypes 
+import ctypes
 import os
 # optinally have scipy sparse, though not necessary
 import numpy
 import sys
-import numpy.ctypeslib 
+import numpy.ctypeslib
 import scipy.sparse as scp
 
 # set this line correctly
@@ -46,7 +46,7 @@ class DMatrix:
             self.handle = ctypes.c_void_p(
                 xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1))
         elif isinstance(data, scp.csr_matrix):
-            self.__init_from_csr(data)            
+            self.__init_from_csr(data)
         elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
             self.__init_from_npy2d(data, missing)
         else:
@@ -82,7 +82,7 @@ class DMatrix:
                                           ctypes.byref(length))
         return ctypes2numpy(ret, length.value)
     def __set_float_info(self, field, data):
-        xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')), 
+        xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')),
                                     (ctypes.c_float*len(data))(*data), len(data))
     # load data from file
     def save_binary(self, fname, silent=True):
@@ -92,7 +92,7 @@ class DMatrix:
         self.__set_float_info('label', label)
     # set weight of each instances
     def set_weight(self, weight):
-        self.__set_float_info('weight', label)
+        self.__set_float_info('weight', weight)
     # set initialized margin prediction
     def set_base_margin(self, margin):
         """
@@ -128,7 +128,7 @@ class DMatrix:
 class Booster:
     """learner class """
     def __init__(self, params={}, cache=[]):
-        """ constructor, param: """    
+        """ constructor, param: """
         for d in cache:
             assert isinstance(d, DMatrix)
         dmats = (ctypes.c_void_p  * len(cache))(*[ d.handle for d in cache])
@@ -136,13 +136,13 @@ class Booster:
         self.set_param({'seed':0})
         self.set_param(params)
     def __del__(self):
-        xglib.XGBoosterFree(self.handle) 
+        xglib.XGBoosterFree(self.handle)
     def set_param(self, params, pv=None):
         if isinstance(params, dict):
             for k, v in params.items():
                 xglib.XGBoosterSetParam(
-                    self.handle, ctypes.c_char_p(k.encode('utf-8')), 
-                    ctypes.c_char_p(str(v).encode('utf-8')))        
+                    self.handle, ctypes.c_char_p(k.encode('utf-8')),
+                    ctypes.c_char_p(str(v).encode('utf-8')))
         elif isinstance(params,str) and pv != None:
             xglib.XGBoosterSetParam(
                 self.handle, ctypes.c_char_p(params.encode('utf-8')),
@@ -153,11 +153,11 @@ class Booster:
                     self.handle, ctypes.c_char_p(k.encode('utf-8')),
                     ctypes.c_char_p(str(v).encode('utf-8')))
     def update(self, dtrain, it):
-        """ 
-        update 
+        """
+        update
           dtrain: the training DMatrix
           it: current iteration number
-        """        
+        """
         assert isinstance(dtrain, DMatrix)
         xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
     def boost(self, dtrain, grad, hess):
@@ -175,7 +175,7 @@ class Booster:
         dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
         evnames = (ctypes.c_char_p * len(evals))(
             * [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
-        return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))        
+        return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
     def eval(self, mat, name = 'eval', it = 0):
         return self.eval_set( [(mat,name)], it)
     def predict(self, data, output_margin=False):
@@ -196,7 +196,7 @@ class Booster:
         xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
     def dump_model(self, fo, fmap=''):
         """dump model into text file"""
-        if isinstance(fo,str):            
+        if isinstance(fo,str):
             fo = open(fo,'w')
             need_close = True
         else:

From c4b21775fac9f1c25b5dccd547eb2285e3e1ec20 Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Mon, 18 Aug 2014 12:57:31 -0700
Subject: [PATCH 27/52] some lint

---
 src/data.h            | 1 +
 src/learner/dmatrix.h | 2 +-
 src/utils/random.h    | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/data.h b/src/data.h
index b64c13b19..fa815a4ee 100644
--- a/src/data.h
+++ b/src/data.h
@@ -345,6 +345,7 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
                 &col_data_[col_ptr_[i + 1]], Entry::CmpValue);
     }
   }
+
  private:
   // --- data structure used to support InitColAccess --
   utils::IIterator<SparseBatch> *iter_;
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index f7dbcb639..5786fe6a1 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -117,7 +117,7 @@ struct MetaInfo {
   }
   // try to load weight information from file, if exists
   inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
-    std::vector<float> &weights = this->GetInfo(field);       
+    std::vector<float> &weights = this->GetInfo(field);
     FILE *fi = fopen64(fname, "r");
     if (fi == NULL) return false;
     float wt;
diff --git a/src/utils/random.h b/src/utils/random.h
index 18e40baff..689c6f860 100644
--- a/src/utils/random.h
+++ b/src/utils/random.h
@@ -74,11 +74,11 @@ inline int SampleBinary(double p) {
 template<typename T>
 inline void Shuffle(T *data, size_t sz) {
   if (sz == 0) return;
-  for (uint32_t i = (uint32_t)sz - 1; i > 0; i--){
+  for (uint32_t i = (uint32_t)sz - 1; i > 0; i--) {
     std::swap(data[i], data[NextUInt32(i + 1)]);
   }
 }
-// random shuffle the data inside, require PRNG 
+// random shuffle the data inside, require PRNG
 template<typename T>
 inline void Shuffle(std::vector<T> &data) {
   Shuffle(&data[0], data.size());

From 3b02fb26b088c44969def737b228c79160152cce Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Mon, 18 Aug 2014 13:33:58 -0700
Subject: [PATCH 28/52] fix num parallel tree

---
 src/tree/updater_colmaker-inl.hpp | 5 +++++
 src/tree/updater_prune-inl.hpp    | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index f0624bdeb..eb5ff85fc 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -27,10 +27,15 @@ class ColMaker: public IUpdater<FMatrix> {
                       const FMatrix &fmat,
                       const std::vector<unsigned> &root_index,
                       const std::vector<RegTree*> &trees) {
+    // rescale learning rate according to size of trees
+    float lr = param.learning_rate;
+    param.learning_rate = lr / trees.size();
+    // build tree
     for (size_t i = 0; i < trees.size(); ++i) {
       Builder builder(param);
       builder.Update(gpair, fmat, root_index, trees[i]);
     }
+    param.learning_rate = lr;
   }
 
  private:
diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp
index b5205080b..363d6eec1 100644
--- a/src/tree/updater_prune-inl.hpp
+++ b/src/tree/updater_prune-inl.hpp
@@ -26,9 +26,13 @@ class TreePruner: public IUpdater<FMatrix> {
                       const FMatrix &fmat,
                       const std::vector<unsigned> &root_index,
                       const std::vector<RegTree*> &trees) {
+    // rescale learning rate according to size of trees
+    float lr = param.learning_rate;
+    param.learning_rate = lr / trees.size();
     for (size_t i = 0; i < trees.size(); ++i) {
       this->DoPrune(*trees[i]);
     }
+    param.learning_rate = lr;
   }
 
  private:

From 3de07b0abe5b185556a50f9e0afc16992f6ca6b3 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Mon, 18 Aug 2014 14:12:35 -0700
Subject: [PATCH 29/52] add more guideline about python path

---
 python/example/demo.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/example/demo.py b/python/example/demo.py
index 231640d91..6dde2be72 100755
--- a/python/example/demo.py
+++ b/python/example/demo.py
@@ -3,6 +3,7 @@ import sys
 import numpy as np
 import scipy.sparse
 # append the path to xgboost, you may need to change the following line
+# alternatively, you can add the path to PYTHONPATH environment variable
 sys.path.append('../')
 import xgboost as xgb
 

From 1d8c2391e8b413a981b625b691b33a5237d4b6f7 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Mon, 18 Aug 2014 14:58:30 -0700
Subject: [PATCH 30/52] update tree maker to make it more robust

---
 python/README.md                  | 1 +
 src/tree/updater_colmaker-inl.hpp | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/README.md b/python/README.md
index cf59ba9ab..4aceb7d5e 100644
--- a/python/README.md
+++ b/python/README.md
@@ -3,3 +3,4 @@ python wrapper for xgboost using ctypes
 see example for usage
 
 to make the python module, type make in the root directory of project
+
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index eb5ff85fc..3645e53ce 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -271,7 +271,8 @@ class ColMaker: public IUpdater<FMatrix> {
       }
       // start enumeration
       const unsigned nsize = static_cast<unsigned>(feat_set.size());
-      #pragma omp parallel for schedule(dynamic, 1)
+      const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
+      #pragma omp parallel for schedule(dynamic, batch_size)
       for (unsigned i = 0; i < nsize; ++i) {
         const unsigned fid = feat_set[i];
         const int tid = omp_get_thread_num();

From dbf3a21942cc7a9163300170424b305d1a1736fd Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Mon, 18 Aug 2014 19:03:32 -0700
Subject: [PATCH 31/52] change dense fvec logic to tree

---
 src/gbm/gbtree-inl.hpp | 40 ++++++++------------------------
 src/tree/model.h       | 52 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index 216240b74..876e13c2d 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -110,7 +110,11 @@ class GBTree : public IGradBooster<FMatrix> {
     {
       nthread = omp_get_num_threads();
     }
-    this->InitThreadTemp(nthread);
+    thread_temp.resize(nthread, tree::RegTree::FVec());
+    for (int i = 0; i < nthread; ++i) {
+      thread_temp[i].Init(mparam.num_feature);
+    }
+
     std::vector<float> &preds = *out_preds;
     preds.resize(0);
     // start collecting the prediction
@@ -128,7 +132,7 @@ class GBTree : public IGradBooster<FMatrix> {
       #pragma omp parallel for schedule(static)
       for (unsigned i = 0; i < nsize; ++i) {
         const int tid = omp_get_thread_num();
-        std::vector<float> &feats = thread_temp[tid];
+        tree::RegTree::FVec &feats = thread_temp[tid];
         const size_t ridx = batch.base_rowid + i;
         const unsigned root_idx = root_index.size() == 0 ? 0 : root_index[ridx];
         // loop over output groups
@@ -210,7 +214,7 @@ class GBTree : public IGradBooster<FMatrix> {
                     int64_t buffer_index,
                     int bst_group,
                     unsigned root_index,
-                    std::vector<float> *p_feats) {
+                    tree::RegTree::FVec *p_feats) {
     size_t itop = 0;
     float  psum = 0.0f;
     const int bid = mparam.BufferOffset(buffer_index, bst_group);
@@ -220,13 +224,13 @@ class GBTree : public IGradBooster<FMatrix> {
       psum = pred_buffer[bid];
     }
     if (itop != trees.size()) {
-      FillThreadTemp(inst, p_feats);
+      p_feats->Fill(inst);
       for (size_t i = itop; i < trees.size(); ++i) {
         if (tree_info[i] == bst_group) {
           psum += trees[i]->Predict(*p_feats, root_index);
         }
       }
-      DropThreadTemp(inst, p_feats);
+      p_feats->Drop(inst);
     }
     // updated the buffered results
     if (bid >= 0) {
@@ -235,30 +239,6 @@ class GBTree : public IGradBooster<FMatrix> {
     }
     return psum;
   }
-  // initialize thread local space for prediction
-  inline void InitThreadTemp(int nthread) {
-    thread_temp.resize(nthread);
-    for (size_t i = 0; i < thread_temp.size(); ++i) {
-      thread_temp[i].resize(mparam.num_feature);
-      std::fill(thread_temp[i].begin(), thread_temp[i].end(), NAN);
-    }
-  }
-  // fill in a thread local dense vector using a sparse instance
-  inline static void FillThreadTemp(const SparseBatch::Inst &inst,
-                                    std::vector<float> *p_feats) {
-    std::vector<float> &feats = *p_feats;
-    for (bst_uint i = 0; i < inst.length; ++i) {
-      feats[inst[i].findex] = inst[i].fvalue;
-    }
-  }
-  // clear up a thread local dense vector
-  inline static void DropThreadTemp(const SparseBatch::Inst &inst,
-                                    std::vector<float> *p_feats) {
-    std::vector<float> &feats = *p_feats;
-    for (bst_uint i = 0; i < inst.length; ++i) {
-      feats[inst[i].findex] = NAN;
-    }
-  }
   // --- data structure ---
   /*! \brief training parameters */
   struct TrainParam {
@@ -361,7 +341,7 @@ class GBTree : public IGradBooster<FMatrix> {
   // configurations for tree
   std::vector< std::pair<std::string, std::string> > cfg;
   // temporal storage for per thread
-  std::vector< std::vector<float> > thread_temp;
+  std::vector<tree::RegTree::FVec> thread_temp;
   // the updaters that can be applied to each of tree
   std::vector< tree::IUpdater<FMatrix>* > updaters;
 };
diff --git a/src/tree/model.h b/src/tree/model.h
index aa84d265d..7686a4353 100644
--- a/src/tree/model.h
+++ b/src/tree/model.h
@@ -422,7 +422,7 @@ class TreeModel {
 };
 
 /*! \brief node statistics used in regression tree */
-struct RTreeNodeStat{
+struct RTreeNodeStat {
   /*! \brief loss chg caused by current split */
   float loss_chg;
   /*! \brief sum of hessian values, used to measure coverage of data */
@@ -444,20 +444,61 @@ struct RTreeNodeStat{
 /*! \brief define regression tree to be the most common tree model */
 class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
  public:
+  /*! 
+   * \brief dense feature vector that can be taken by RegTree
+   * to do tranverse efficiently
+   * and can be construct from sparse feature vector
+   */
+  struct FVec {
+    /*! 
+     * \brief a union value of value and flag
+     * when flag == -1, this indicate the value is missing
+     */
+    union Entry{
+      float fvalue;
+      int flag;
+    };
+    std::vector<Entry> data;
+    /*! \brief intialize the vector with size vector */
+    inline void Init(size_t size) {
+      Entry e; e.flag = -1;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), e);
+    }
+    /*! \brief fill the vector with sparse vector */
+    inline void Fill(const SparseBatch::Inst &inst) {
+      for (bst_uint i = 0; i < inst.length; ++i) {
+        data[inst[i].findex].fvalue = inst[i].fvalue;
+      }
+    }
+    /*! \brief drop the trace after fill, must be called after fill */
+    inline void Drop(const SparseBatch::Inst &inst) {      
+      for (bst_uint i = 0; i < inst.length; ++i) {
+        data[inst[i].findex].flag = -1;
+      }
+    }
+    /*! \brief get ith value */
+    inline float fvalue(size_t i) const {
+      return data[i].fvalue;
+    }
+    /*! \brief check whether i-th entry is missing */
+    inline bool is_missing(size_t i) const {
+      return data[i].flag == -1;
+    }
+  };
   /*!
    * \brief get the leaf index 
    * \param feats dense feature vector, if the feature is missing the field is set to NaN
    * \param root_gid starting root index of the instance
    * \return the leaf index of the given feature 
    */
-  inline int GetLeafIndex(const std::vector<float> &feat, unsigned root_id = 0) const {
+  inline int GetLeafIndex(const FVec&feat, unsigned root_id = 0) const {
     // start from groups that belongs to current data
     int pid = static_cast<int>(root_id);
     // tranverse tree
     while (!(*this)[ pid ].is_leaf()) {
       unsigned split_index = (*this)[pid].split_index();
-      const float fvalue = feat[split_index];
-      pid = this->GetNext(pid, fvalue, std::isnan(fvalue));
+      pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
     }
     return pid;
   }
@@ -467,10 +508,11 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
    * \param root_gid starting root index of the instance
    * \return the leaf index of the given feature 
    */
-  inline float Predict(const std::vector<float> &feat, unsigned root_id = 0) const {
+  inline float Predict(const FVec &feat, unsigned root_id = 0) const {
     int pid = this->GetLeafIndex(feat, root_id);
     return (*this)[pid].leaf_value();
   }
+
  private:
   /*! \brief get next position of the tree given current pid */
   inline int GetNext(int pid, float fvalue, bool is_unknown) const {

From f757520c02d11984bb54961d313a8ec0bb83ac8a Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Mon, 18 Aug 2014 21:32:31 -0700
Subject: [PATCH 32/52] add tree refresher, need review

---
 src/tree/model.h   | 2 --
 src/tree/updater.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tree/model.h b/src/tree/model.h
index 7686a4353..081f71ffb 100644
--- a/src/tree/model.h
+++ b/src/tree/model.h
@@ -512,8 +512,6 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
     int pid = this->GetLeafIndex(feat, root_id);
     return (*this)[pid].leaf_value();
   }
-
- private:
   /*! \brief get next position of the tree given current pid */
   inline int GetNext(int pid, float fvalue, bool is_unknown) const {
     float split_value = (*this)[pid].split_cond();
diff --git a/src/tree/updater.h b/src/tree/updater.h
index 5c4075b65..2664e2ebd 100644
--- a/src/tree/updater.h
+++ b/src/tree/updater.h
@@ -48,6 +48,7 @@ class IUpdater {
 }  // namespace xgboost
 
 #include "./updater_prune-inl.hpp"
+#include "./updater_refresh-inl.hpp"
 #include "./updater_colmaker-inl.hpp"
 
 namespace xgboost {
@@ -60,6 +61,7 @@ namespace tree {
 template<typename FMatrix>
 inline IUpdater<FMatrix>* CreateUpdater(const char *name) {
   if (!strcmp(name, "prune")) return new TreePruner<FMatrix>();
+  if (!strcmp(name, "refresh")) return new TreeRefresher<FMatrix>();
   if (!strcmp(name, "grow_colmaker")) return new ColMaker<FMatrix, GradStats>();
   utils::Error("unknown updater:%s", name);
   return NULL;

From d08d8ed3edbc7c5c80ec0f1d72e3b806976d03ad Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Mon, 18 Aug 2014 21:32:48 -0700
Subject: [PATCH 33/52] add tree refresher, need review

---
 src/tree/updater_refresh-inl.hpp | 138 +++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 src/tree/updater_refresh-inl.hpp

diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp
new file mode 100644
index 000000000..69f099e1d
--- /dev/null
+++ b/src/tree/updater_refresh-inl.hpp
@@ -0,0 +1,138 @@
+#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
+#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
+/*!
+ * \file updater_refresh-inl.hpp
+ * \brief refresh the statistics and leaf value on the tree on the dataset
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <limits>
+#include "./param.h"
+#include "./updater.h"
+
+namespace xgboost {
+namespace tree {
+/*! \brief pruner that prunes a tree after growing finishs */
+template<typename FMatrix>
+class TreeRefresher: public IUpdater<FMatrix> {
+ public:
+  virtual ~TreeRefresher(void) {}
+  // set training parameter
+  virtual void SetParam(const char *name, const char *val) {
+    param.SetParam(name, val);
+    if (!strcmp(name, "silent")) silent = atoi(val);
+  }
+  // update the tree, do pruning
+  virtual void Update(const std::vector<bst_gpair> &gpair,
+                      const FMatrix &fmat,
+                      const std::vector<unsigned> &root_index,
+                      const std::vector<RegTree*> &trees) {
+    if (trees.size() == 0) return;
+    // number of threads
+    int nthread;
+    // thread temporal space
+    std::vector< std::vector<GradStats> > stemp;
+    std::vector<RegTree::FVec> fvec_temp;
+    // setup temp space for each thread
+    #pragma omp parallel
+    {
+      nthread = omp_get_num_threads();
+    }
+    fvec_temp.resize(nthread, RegTree::FVec());
+    stemp.resize(trees.size() * nthread, std::vector<GradStats>());
+    #pragma omp parallel
+    {
+      int tid = omp_get_thread_num();
+      for (size_t i = 0; i < trees.size(); ++i) {
+        std::vector<GradStats> &vec = stemp[tid * trees.size() + i];
+        vec.resize(trees[i]->param.num_nodes);
+        std::fill(vec.begin(), vec.end(), GradStats());
+      }
+      fvec_temp[tid].Init(trees[0]->param.num_feature);
+    }
+    // start accumulating statistics
+    utils::IIterator<SparseBatch> *iter = fmat.RowIterator();
+    iter->BeforeFirst();
+    while (iter->Next()) {
+      const SparseBatch &batch = iter->Value();
+      utils::Check(batch.size < std::numeric_limits<unsigned>::max(),
+                   "too large batch size ");
+      const unsigned nbatch = static_cast<unsigned>(batch.size);
+      #pragma omp parallel for schedule(static)
+      for (unsigned i = 0; i < nbatch; ++i) {
+        SparseBatch::Inst inst = batch[i];
+        const int tid = omp_get_thread_num();
+        const size_t ridx = batch.base_rowid + i;
+        RegTree::FVec &feats = fvec_temp[tid];
+        feats.Fill(inst);
+        for (size_t j = 0; j < trees.size(); ++j) {
+          AddStats(*trees[j], feats, gpair[ridx],
+                   root_index.size() == 0 ? 0 : root_index[ridx],
+                   &stemp[tid * trees.size() + j]);
+        }
+        feats.Drop(inst);
+      }
+    }
+    // start update the trees using the statistics
+    // rescale learning rate according to size of trees
+    float lr = param.learning_rate;
+    param.learning_rate = lr / trees.size();
+    for (size_t i = 0; i < trees.size(); ++i) {
+      // aggregate
+      #pragma omp parallel for schedule(static)
+      for (int nid = 0; nid < trees[i]->param.num_nodes; ++nid) {
+        for (int tid = 1; tid < nthread; ++tid) {
+          stemp[i][nid].Add(stemp[tid * trees.size() + i][nid]);
+        }
+      }
+      for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) {
+        this->Refresh(stemp[i], rid, trees[i]);
+      }
+    }
+    // set learning rate back
+    param.learning_rate = lr;
+  }
+
+ private:
+  inline static void AddStats(const RegTree &tree,
+                              const RegTree::FVec &feat,
+                              const bst_gpair &gpair, unsigned root_id,
+                              std::vector<GradStats> *p_gstats) {
+    std::vector<GradStats> &gstats = *p_gstats;
+    // start from groups that belongs to current data
+    int pid = static_cast<int>(root_id);
+    gstats[pid].Add(gpair);
+    // tranverse tree
+    while (!tree[pid].is_leaf()) {
+      unsigned split_index = tree[pid].split_index();
+      pid = tree.GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
+      gstats[pid].Add(gpair);
+    }
+  }
+  inline void Refresh(const std::vector<GradStats> &gstats,
+                      int nid, RegTree *p_tree) {
+    RegTree &tree = *p_tree;
+    tree.stat(nid).base_weight = param.CalcWeight(gstats[nid]);
+    tree.stat(nid).sum_hess = static_cast<float>(gstats[nid].sum_hess);
+    if (tree[nid].is_leaf()) {
+      tree[nid].set_leaf(tree.stat(nid).base_weight * param.learning_rate);
+    } else {
+      tree.stat(nid).loss_chg =
+          param.CalcGain(gstats[tree[nid].cleft()]) +
+          param.CalcGain(gstats[tree[nid].cright()]) -
+          param.CalcGain(gstats[nid]);
+      this->Refresh(gstats, tree[nid].cleft(), p_tree);
+      this->Refresh(gstats, tree[nid].cright(), p_tree);
+    }
+  }
+  // number of thread in the data
+  int nthread;
+  // shutup
+  int silent;
+  // training parameter
+  TrainParam param;
+};
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_

From fdba6e9c46dd348ce1a372e35c123df22b57959c Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 19 Aug 2014 08:02:29 -0700
Subject: [PATCH 34/52] add pratio

---
 src/learner/evaluation-inl.hpp | 35 ++++++++++++++++++++++++++++++++++
 src/learner/evaluation.h       |  1 +
 2 files changed, 36 insertions(+)

diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index 43fe48726..17a0d5589 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -155,6 +155,41 @@ struct EvalAMS : public IEvaluator {
   float ratio_;
 };
 
+/*! \brief precision with cut off at top percentile */
+struct EvalPrecisionRatio : public IEvaluator{
+ public:
+  EvalPrecisionRatio( const char *name ) : name_(name) {
+    utils::Assert(sscanf( name, "apratio@%f", &ratio_) == 1, "BUG");
+  }
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Assert(preds.size() == info.labels.size(), "label size predict size not match");
+    std::vector< std::pair<float, unsigned> > rec;
+    for (size_t j = 0; j < preds.size(); ++j) {
+      rec.push_back(std::make_pair(preds[j], j));
+    }
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    double pratio = CalcPRatio( rec, info );
+    return static_cast<float>(pratio);
+  }
+  virtual const char *Name(void) const{
+    return name_.c_str();
+  }
+ protected:
+  inline double CalcPRatio(const std::vector< std::pair<float,unsigned> >& rec, const MetaInfo &info) const{
+    size_t cutoff = static_cast<size_t>(ratio_ * rec.size());
+    double wt_hit = 0.0, wsum = 0.0;
+    for (size_t j = 0; j < cutoff; ++j) {
+      wt_hit += info.labels[rec[j].second];
+      wsum += wt_hit / j;
+     }
+    return wsum / cutoff;
+  }
+ protected:
+  float ratio_;
+  std::string name_;
+};
+
 /*! \brief Area under curve, for both classification and rank */
 struct EvalAuc : public IEvaluator {
   virtual float Eval(const std::vector<float> &preds,
diff --git a/src/learner/evaluation.h b/src/learner/evaluation.h
index fa25aa7d7..79ad4902e 100644
--- a/src/learner/evaluation.h
+++ b/src/learner/evaluation.h
@@ -41,6 +41,7 @@ inline IEvaluator* CreateEvaluator(const char *name) {
   if (!strcmp(name, "auc")) return new EvalAuc();
   if (!strncmp(name, "ams@", 4)) return new EvalAMS(name);
   if (!strncmp(name, "pre@", 4)) return new EvalPrecision(name);
+  if (!strncmp(name, "pratio@", 4)) return new EvalPrecisionRatio(name);
   if (!strncmp(name, "map", 3)) return new EvalMAP(name);
   if (!strncmp(name, "ndcg", 3)) return new EvalNDCG(name);
   utils::Error("unknown evaluation metric type: %s", name);

From 406db647f22bee0c0513d8b437c00c8c07f94757 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 19 Aug 2014 08:05:05 -0700
Subject: [PATCH 35/52] add pratio

---
 src/learner/evaluation-inl.hpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index 17a0d5589..63d5d7fa3 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -158,8 +158,8 @@ struct EvalAMS : public IEvaluator {
 /*! \brief precision with cut off at top percentile */
 struct EvalPrecisionRatio : public IEvaluator{
  public:
-  EvalPrecisionRatio( const char *name ) : name_(name) {
-    utils::Assert(sscanf( name, "apratio@%f", &ratio_) == 1, "BUG");
+  explicit EvalPrecisionRatio(const char *name) : name_(name) {
+    utils::Assert(sscanf(name, "apratio@%f", &ratio_) == 1, "BUG");
   }
   virtual float Eval(const std::vector<float> &preds,
                      const MetaInfo &info) const {
@@ -169,23 +169,23 @@ struct EvalPrecisionRatio : public IEvaluator{
       rec.push_back(std::make_pair(preds[j], j));
     }
     std::sort(rec.begin(), rec.end(), CmpFirst);
-    double pratio = CalcPRatio( rec, info );
+    double pratio = CalcPRatio(rec, info);
     return static_cast<float>(pratio);
   }
-  virtual const char *Name(void) const{
+  virtual const char *Name(void) const {
     return name_.c_str();
   }
+
  protected:
-  inline double CalcPRatio(const std::vector< std::pair<float,unsigned> >& rec, const MetaInfo &info) const{
+  inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec, const MetaInfo &info) const {
     size_t cutoff = static_cast<size_t>(ratio_ * rec.size());
     double wt_hit = 0.0, wsum = 0.0;
     for (size_t j = 0; j < cutoff; ++j) {
       wt_hit += info.labels[rec[j].second];
-      wsum += wt_hit / j;
+      wsum += wt_hit / (j + 1);
      }
     return wsum / cutoff;
   }
- protected:
   float ratio_;
   std::string name_;
 };
@@ -285,7 +285,7 @@ struct EvalRankList : public IEvaluator {
     minus_ = false;
     if (sscanf(name, "%*[^@]@%u[-]?", &topn_) != 1) {
       topn_ = UINT_MAX;
-    }    
+    }
     if (name[strlen(name) - 1] == '-') {
       minus_ = true;
     }

From e7de77aa1f6b1004be14f726bcd7ec0444701806 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 19 Aug 2014 08:08:54 -0700
Subject: [PATCH 36/52] chg

---
 src/learner/evaluation-inl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index 63d5d7fa3..bff37ff94 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -159,7 +159,7 @@ struct EvalAMS : public IEvaluator {
 struct EvalPrecisionRatio : public IEvaluator{
  public:
   explicit EvalPrecisionRatio(const char *name) : name_(name) {
-    utils::Assert(sscanf(name, "apratio@%f", &ratio_) == 1, "BUG");
+    utils::Assert(sscanf(name, "pratio@%f", &ratio_) == 1, "BUG");
   }
   virtual float Eval(const std::vector<float> &preds,
                      const MetaInfo &info) const {

From 762b36073997f7de2aa8f98b7bf6a86baeb710ff Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 19 Aug 2014 08:42:36 -0700
Subject: [PATCH 37/52] fix typo

---
 python/xgboost.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/xgboost.py b/python/xgboost.py
index 509fe654a..4a27f2377 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -121,7 +121,7 @@ class DMatrix:
     # slice the DMatrix to return a new DMatrix that only contains rindex
     def slice(self, rindex):
         res = DMatrix(None)
-        res.handle = ctype.c_void_p(xglib.XGDMatrixSliceDMatrix(
+        res.handle = ctypes.c_void_p(xglib.XGDMatrixSliceDMatrix(
             self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex)))
         return res
 

From 91e70c76ffbb6a11e637560e4f88c3eb122d8b42 Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Tue, 19 Aug 2014 11:41:35 -0700
Subject: [PATCH 38/52] refresher test

---
 src/data.h                        |  4 ++++
 src/learner/learner-inl.hpp       | 14 +++++++++++---
 src/tree/updater_colmaker-inl.hpp | 10 ++++++----
 src/tree/updater_refresh-inl.hpp  |  3 ---
 4 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/data.h b/src/data.h
index fa815a4ee..603334b5c 100644
--- a/src/data.h
+++ b/src/data.h
@@ -199,6 +199,10 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     utils::Check(this->HaveColAccess(), "NumCol:need column access");
     return col_ptr_.size() - 1;
   }
+  /*! \brief get number of buffered rows */  
+  inline size_t NumBufferedRow(void) const {
+    return num_buffered_row_;
+  }
   /*! \brief get col sorted iterator */
   inline ColIter GetSortedCol(size_t cidx) const {
     utils::Assert(cidx < this->NumCol(), "col id exceed bound");
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 4d227f488..a8cad7ebd 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -9,6 +9,7 @@
 #include <vector>
 #include <utility>
 #include <string>
+#include <limits>
 #include "./objective.h"
 #include "./evaluation.h"
 #include "../gbm/gbm.h"
@@ -28,6 +29,8 @@ class BoostLearner {
     gbm_ = NULL;
     name_obj_ = "reg:linear";
     name_gbm_ = "gbtree";
+    silent= 0;
+    max_buffer_row = std::numeric_limits<size_t>::max();
   }
   ~BoostLearner(void) {
     if (obj_ != NULL) delete obj_;
@@ -77,6 +80,7 @@ class BoostLearner {
    */
   inline void SetParam(const char *name, const char *val) {
     if (!strcmp(name, "silent")) silent = atoi(val);
+    if (!strcmp(name, "max_buffer_row")) sscanf(val, "%lu", &max_buffer_row);
     if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
     if (!strcmp("seed", name)) random::Seed(atoi(val));
     if (!strcmp(name, "num_class")) this->SetParam("num_output_group", val);
@@ -87,7 +91,9 @@ class BoostLearner {
     }
     if (gbm_ != NULL) gbm_->SetParam(name, val);
     if (obj_ != NULL) obj_->SetParam(name, val);
-    cfg_.push_back(std::make_pair(std::string(name), std::string(val)));
+    if (gbm_ == NULL || obj_ == NULL) {
+      cfg_.push_back(std::make_pair(std::string(name), std::string(val)));
+    }
   }
   /*!
    * \brief initialize the model
@@ -144,8 +150,8 @@ class BoostLearner {
    *  if not intialize it
    * \param p_train pointer to the matrix used by training
    */
-  inline void CheckInit(DMatrix<FMatrix> *p_train) const {
-    p_train->fmat.InitColAccess();
+  inline void CheckInit(DMatrix<FMatrix> *p_train) {
+    p_train->fmat.InitColAccess(max_buffer_row);
   }
   /*!
    * \brief update the model for one iteration
@@ -286,6 +292,8 @@ class BoostLearner {
   // data fields
   // silent during training
   int silent;
+  // maximum buffred row value
+  size_t max_buffer_row;
   // evaluation set
   EvalSet evaluator_;
   // model parameter
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index 3645e53ce..b7d3f4e2f 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -110,22 +110,22 @@ class ColMaker: public IUpdater<FMatrix> {
                          const std::vector<unsigned> &root_index, const RegTree &tree) {
       utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree");
       {// setup position
-        position.resize(gpair.size());
+        position.resize(fmat.NumBufferedRow());
         if (root_index.size() == 0) {
           std::fill(position.begin(), position.end(), 0);
         } else {
-          for (size_t i = 0; i < root_index.size(); ++i) {
+          for (size_t i = 0; i < position.size(); ++i) {
             position[i] = root_index[i];
             utils::Assert(root_index[i] < (unsigned)tree.param.num_roots, "root index exceed setting");
           }
         }
         // mark delete for the deleted datas
-        for (size_t i = 0; i < gpair.size(); ++i) {
+        for (size_t i = 0; i < position.size(); ++i) {
           if (gpair[i].hess < 0.0f) position[i] = -1;
         }
         // mark subsample
         if (param.subsample < 1.0f) {
-          for (size_t i = 0; i < gpair.size(); ++i) {
+          for (size_t i = 0; i < position.size(); ++i) {
             if (gpair[i].hess < 0.0f) continue;
             if (random::SampleBinary(param.subsample) == 0) position[i] = -1;
           }
@@ -271,7 +271,9 @@ class ColMaker: public IUpdater<FMatrix> {
       }
       // start enumeration
       const unsigned nsize = static_cast<unsigned>(feat_set.size());
+      #if defined(_OPENMP)
       const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
+      #endif
       #pragma omp parallel for schedule(dynamic, batch_size)
       for (unsigned i = 0; i < nsize; ++i) {
         const unsigned fid = feat_set[i];
diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp
index 69f099e1d..12bbcf864 100644
--- a/src/tree/updater_refresh-inl.hpp
+++ b/src/tree/updater_refresh-inl.hpp
@@ -20,7 +20,6 @@ class TreeRefresher: public IUpdater<FMatrix> {
   // set training parameter
   virtual void SetParam(const char *name, const char *val) {
     param.SetParam(name, val);
-    if (!strcmp(name, "silent")) silent = atoi(val);
   }
   // update the tree, do pruning
   virtual void Update(const std::vector<bst_gpair> &gpair,
@@ -127,8 +126,6 @@ class TreeRefresher: public IUpdater<FMatrix> {
   }
   // number of thread in the data
   int nthread;
-  // shutup
-  int silent;
   // training parameter
   TrainParam param;
 };

From 9caccd3b36c4f811ae908c0d126b7f08522a1ed6 Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Tue, 19 Aug 2014 12:07:52 -0700
Subject: [PATCH 39/52] change row subsample to prob

---
 src/data.h                        | 65 ++++++++++++++++---------------
 src/learner/learner-inl.hpp       |  8 ++--
 src/tree/updater_colmaker-inl.hpp | 51 ++++++++++++++----------
 3 files changed, 69 insertions(+), 55 deletions(-)

diff --git a/src/data.h b/src/data.h
index 603334b5c..f5e02a562 100644
--- a/src/data.h
+++ b/src/data.h
@@ -14,6 +14,7 @@
 #include "utils/io.h"
 #include "utils/utils.h"
 #include "utils/iterator.h"
+#include "utils/random.h"
 #include "utils/matrix_csr.h"
 
 namespace xgboost {
@@ -184,7 +185,6 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
   /*! \brief constructor */
   FMatrixS(void) {
     iter_ = NULL;
-    num_buffered_row_ = 0;
   }
   // destructor
   ~FMatrixS(void) {
@@ -200,8 +200,8 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     return col_ptr_.size() - 1;
   }
   /*! \brief get number of buffered rows */  
-  inline size_t NumBufferedRow(void) const {
-    return num_buffered_row_;
+  inline const std::vector<bst_uint> buffered_rowset(void) const {
+    return buffered_rowset_;
   }
   /*! \brief get col sorted iterator */
   inline ColIter GetSortedCol(size_t cidx) const {
@@ -224,12 +224,12 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
   }
   /*! \brief get column density */
   inline float GetColDensity(size_t cidx) const {
-    size_t nmiss = num_buffered_row_ - (col_ptr_[cidx+1] - col_ptr_[cidx]);
-    return 1.0f - (static_cast<float>(nmiss)) / num_buffered_row_;
+    size_t nmiss = buffered_rowset_.size() - (col_ptr_[cidx+1] - col_ptr_[cidx]);
+    return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
   }
-  inline void InitColAccess(size_t max_nrow = ULONG_MAX) {
+  inline void InitColAccess(float pkeep = 1.0f) {
     if (this->HaveColAccess()) return;
-    this->InitColData(max_nrow);
+    this->InitColData(pkeep);
   }
   /*!
    * \brief get the row iterator associated with FMatrix
@@ -248,8 +248,8 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
    * \param fo output stream to save to
    */
   inline void SaveColAccess(utils::IStream &fo) const {
-    fo.Write(&num_buffered_row_, sizeof(num_buffered_row_));
-    if (num_buffered_row_ != 0) {
+    fo.Write(buffered_rowset_);
+    if (buffered_rowset_.size() != 0) {
       SaveBinary(fo, col_ptr_, col_data_);
     }
   }
@@ -258,9 +258,8 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
    * \param fo output stream to load from
    */
   inline void LoadColAccess(utils::IStream &fi) {
-    utils::Check(fi.Read(&num_buffered_row_, sizeof(num_buffered_row_)) != 0,
-                 "invalid input file format");
-    if (num_buffered_row_ != 0) {
+    utils::Check(fi.Read(&buffered_rowset_), "invalid input file format");
+    if (buffered_rowset_.size() != 0) {
       LoadBinary(fi, &col_ptr_, &col_data_);
     }
   }
@@ -304,39 +303,43 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
  protected:
   /*!
    * \brief intialize column data
-   * \param max_nrow maximum number of rows supported
+   * \param pkeep probability to keep a row
    */
-  inline void InitColData(size_t max_nrow) {
+  inline void InitColData(float pkeep) {
+    buffered_rowset_.clear();
     // note: this part of code is serial, todo, parallelize this transformer
     utils::SparseCSRMBuilder<SparseBatch::Entry> builder(col_ptr_, col_data_);
     builder.InitBudget(0);
     // start working
     iter_->BeforeFirst();
-    num_buffered_row_ = 0;
     while (iter_->Next()) {
       const SparseBatch &batch = iter_->Value();
-      if (batch.base_rowid >= max_nrow) break;
-      const size_t nbatch = std::min(batch.size, max_nrow - batch.base_rowid);
-      for (size_t i = 0; i < nbatch; ++i, ++num_buffered_row_) {
-        SparseBatch::Inst inst = batch[i];
-        for (bst_uint j = 0; j < inst.length; ++j) {
-          builder.AddBudget(inst[j].findex);
+      for (size_t i = 0; i < batch.size; ++i) {
+        if (pkeep==1.0f || random::SampleBinary(pkeep)) {
+          buffered_rowset_.push_back(batch.base_rowid+i);
+          SparseBatch::Inst inst = batch[i];
+          for (bst_uint j = 0; j < inst.length; ++j) {
+            builder.AddBudget(inst[j].findex);
+          }
         }
       }
     }
     builder.InitStorage();
 
     iter_->BeforeFirst();
+    size_t ktop = 0;
     while (iter_->Next()) {
       const SparseBatch &batch = iter_->Value();
-      if (batch.base_rowid >= max_nrow) break;
-      const size_t nbatch = std::min(batch.size, max_nrow - batch.base_rowid);
-      for (size_t i = 0; i < nbatch; ++i) {
-        SparseBatch::Inst inst = batch[i];
-        for (bst_uint j = 0; j < inst.length; ++j) {
-          builder.PushElem(inst[j].findex,
-                           Entry((bst_uint)(batch.base_rowid+i),
-                                 inst[j].fvalue));
+      for (size_t i = 0; i < batch.size; ++i) {
+        if (ktop < buffered_rowset_.size() && 
+            buffered_rowset_[ktop] == batch.base_rowid+i) {
+          ++ ktop;
+          SparseBatch::Inst inst = batch[i];
+          for (bst_uint j = 0; j < inst.length; ++j) {
+            builder.PushElem(inst[j].findex,
+                             Entry((bst_uint)(batch.base_rowid+i),
+                                   inst[j].fvalue));
+          }
         }
       }
     }
@@ -353,8 +356,8 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
  private:
   // --- data structure used to support InitColAccess --
   utils::IIterator<SparseBatch> *iter_;
-  /*! \brief number */
-  size_t num_buffered_row_;
+  /*! \brief list of row index that are buffered */
+  std::vector<bst_uint> buffered_rowset_;
   /*! \brief column pointer of CSC format */
   std::vector<size_t> col_ptr_;
   /*! \brief column datas in CSC format */
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index a8cad7ebd..bd5cf6e3b 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -30,7 +30,7 @@ class BoostLearner {
     name_obj_ = "reg:linear";
     name_gbm_ = "gbtree";
     silent= 0;
-    max_buffer_row = std::numeric_limits<size_t>::max();
+    prob_buffer_row = 1.0f;
   }
   ~BoostLearner(void) {
     if (obj_ != NULL) delete obj_;
@@ -80,7 +80,7 @@ class BoostLearner {
    */
   inline void SetParam(const char *name, const char *val) {
     if (!strcmp(name, "silent")) silent = atoi(val);
-    if (!strcmp(name, "max_buffer_row")) sscanf(val, "%lu", &max_buffer_row);
+    if (!strcmp(name, "prob_buffer_row")) prob_buffer_row = static_cast<float>(atof(val));
     if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
     if (!strcmp("seed", name)) random::Seed(atoi(val));
     if (!strcmp(name, "num_class")) this->SetParam("num_output_group", val);
@@ -151,7 +151,7 @@ class BoostLearner {
    * \param p_train pointer to the matrix used by training
    */
   inline void CheckInit(DMatrix<FMatrix> *p_train) {
-    p_train->fmat.InitColAccess(max_buffer_row);
+    p_train->fmat.InitColAccess(prob_buffer_row);
   }
   /*!
    * \brief update the model for one iteration
@@ -293,7 +293,7 @@ class BoostLearner {
   // silent during training
   int silent;
   // maximum buffred row value
-  size_t max_buffer_row;
+  float prob_buffer_row;
   // evaluation set
   EvalSet evaluator_;
   // model parameter
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index b7d3f4e2f..35880b70e 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -80,13 +80,13 @@ class ColMaker: public IUpdater<FMatrix> {
                         const std::vector<unsigned> &root_index,
                         RegTree *p_tree) {
       this->InitData(gpair, fmat, root_index, *p_tree);
-      this->InitNewNode(qexpand, gpair, *p_tree);
+      this->InitNewNode(qexpand, gpair, fmat, *p_tree);
       
       for (int depth = 0; depth < param.max_depth; ++depth) {
         this->FindSplit(depth, this->qexpand, gpair, fmat, p_tree);
         this->ResetPosition(this->qexpand, fmat, *p_tree);
         this->UpdateQueueExpand(*p_tree, &this->qexpand);
-        this->InitNewNode(qexpand, gpair, *p_tree);
+        this->InitNewNode(qexpand, gpair, fmat, *p_tree);
         // if nothing left to be expand, break
         if (qexpand.size() == 0) break;
       }    
@@ -109,25 +109,31 @@ class ColMaker: public IUpdater<FMatrix> {
                          const FMatrix &fmat,
                          const std::vector<unsigned> &root_index, const RegTree &tree) {
       utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree");
+      const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
       {// setup position
-        position.resize(fmat.NumBufferedRow());
+        position.resize(gpair.size());
         if (root_index.size() == 0) {
-          std::fill(position.begin(), position.end(), 0);
+          for (size_t i = 0; i < rowset.size(); ++i) {
+            position[rowset[i]] = 0;
+          }
         } else {
-          for (size_t i = 0; i < position.size(); ++i) {
-            position[i] = root_index[i];
-            utils::Assert(root_index[i] < (unsigned)tree.param.num_roots, "root index exceed setting");
+          for (size_t i = 0; i < rowset.size(); ++i) {
+            const bst_uint ridx = rowset[i];
+            position[ridx] = root_index[ridx];
+            utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots, "root index exceed setting");
           }
         }
         // mark delete for the deleted datas
-        for (size_t i = 0; i < position.size(); ++i) {
-          if (gpair[i].hess < 0.0f) position[i] = -1;
+        for (size_t i = 0; i < rowset.size(); ++i) {
+          const bst_uint ridx = rowset[i];
+          if (gpair[ridx].hess < 0.0f) position[ridx] = -1;
         }
         // mark subsample
         if (param.subsample < 1.0f) {
-          for (size_t i = 0; i < position.size(); ++i) {
-            if (gpair[i].hess < 0.0f) continue;
-            if (random::SampleBinary(param.subsample) == 0) position[i] = -1;
+          for (size_t i = 0; i < rowset.size(); ++i) {
+            const bst_uint ridx = rowset[i];
+            if (gpair[ridx].hess < 0.0f) continue;
+            if (random::SampleBinary(param.subsample) == 0) position[ridx] = -1;
           }
         }
       }
@@ -168,6 +174,7 @@ class ColMaker: public IUpdater<FMatrix> {
     /*! \brief initialize the base_weight, root_gain, and NodeEntry for all the new nodes in qexpand */
     inline void InitNewNode(const std::vector<int> &qexpand,
                             const std::vector<bst_gpair> &gpair,
+                            const FMatrix &fmat,
                             const RegTree &tree) {
       {// setup statistics space for each tree node
         for (size_t i = 0; i < stemp.size(); ++i) {
@@ -175,13 +182,15 @@ class ColMaker: public IUpdater<FMatrix> {
         }
         snode.resize(tree.param.num_nodes, NodeEntry());
       }
+      const std::vector<bst_uint> &rowset = fmat.buffered_rowset();      
       // setup position
-      const unsigned ndata = static_cast<unsigned>(position.size());
+      const unsigned ndata = static_cast<unsigned>(rowset.size());
       #pragma omp parallel for schedule(static)
       for (unsigned i = 0; i < ndata; ++i) {
+        const bst_uint ridx = rowset[i];
         const int tid = omp_get_thread_num();
-        if (position[i] < 0) continue;
-        stemp[tid][position[i]].stats.Add(gpair[i]);
+        if (position[ridx] < 0) continue;
+        stemp[tid][position[ridx]].stats.Add(gpair[ridx]);
       }
       // sum the per thread statistics together
       for (size_t j = 0; j < qexpand.size(); ++j) {
@@ -303,17 +312,19 @@ class ColMaker: public IUpdater<FMatrix> {
     }
     // reset position of each data points after split is created in the tree
     inline void ResetPosition(const std::vector<int> &qexpand, const FMatrix &fmat, const RegTree &tree) {
+      const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
       // step 1, set default direct nodes to default, and leaf nodes to -1
-      const unsigned ndata = static_cast<unsigned>(position.size());
+      const unsigned ndata = static_cast<unsigned>(rowset.size());
       #pragma omp parallel for schedule(static)
-      for (unsigned i = 0; i < ndata; ++i) {
-        const int nid = position[i];
+      for (unsigned i = 0; i < ndata; ++i) {        
+        const bst_uint ridx = rowset[i];        
+        const int nid = position[ridx];
         if (nid >= 0) {
           if (tree[nid].is_leaf()) {
-            position[i] = -1;
+            position[ridx] = -1;
           } else {
             // push to default branch, correct latter
-            position[i] = tree[nid].default_left() ? tree[nid].cleft(): tree[nid].cright();
+            position[ridx] = tree[nid].default_left() ? tree[nid].cleft(): tree[nid].cright();
           }
         }
       }

From 1fd6ff817f645daef2958f16330921f261b06b38 Mon Sep 17 00:00:00 2001
From: "tqchen@graphlab.com" <tqchen@graphlab.com>
Date: Tue, 19 Aug 2014 12:20:31 -0700
Subject: [PATCH 40/52] ok

---
 src/data.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/data.h b/src/data.h
index f5e02a562..fea3f7a48 100644
--- a/src/data.h
+++ b/src/data.h
@@ -206,8 +206,8 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
   /*! \brief get col sorted iterator */
   inline ColIter GetSortedCol(size_t cidx) const {
     utils::Assert(cidx < this->NumCol(), "col id exceed bound");
-    return ColIter(&col_data_[col_ptr_[cidx]] - 1,
-                   &col_data_[col_ptr_[cidx + 1]] - 1);
+    return ColIter(&col_data_[0] + col_ptr_[cidx] - 1,
+                   &col_data_[0] + col_ptr_[cidx + 1] - 1);
   }
   /*!
    * \brief get reversed col iterator,
@@ -215,8 +215,8 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
    */
   inline ColBackIter GetReverseSortedCol(size_t cidx) const {
     utils::Assert(cidx < this->NumCol(), "col id exceed bound");
-    return ColBackIter(&col_data_[col_ptr_[cidx + 1]],
-                       &col_data_[col_ptr_[cidx]]);
+    return ColBackIter(&col_data_[0] + col_ptr_[cidx + 1],
+                       &col_data_[0] + col_ptr_[cidx]);
   }
   /*! \brief get col size */
   inline size_t GetColSize(size_t cidx) const {

From 58d74861b9b82be20ce5e907253101078f7291ec Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 14:29:32 -0700
Subject: [PATCH 41/52] fix multiclass

---
 src/learner/objective-inl.hpp | 44 ++++++++++++++++++++---------------
 src/learner/objective.h       |  2 +-
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index e45250950..41af8b605 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -105,19 +105,22 @@ class RegLossObj : public IObjFunction{
       scale_pos_weight = static_cast<float>(atof(val));
     }
   }
-  virtual void GetGradient(const std::vector<float>& preds,
+  virtual void GetGradient(const std::vector<float> &preds,
                            const MetaInfo &info,
                            int iter,
                            std::vector<bst_gpair> *out_gpair) {
-    utils::Check(preds.size() == info.labels.size(),
+    utils::Check(info.labels.size() != 0, "label set cannot be empty");
+    utils::Check(preds.size() % info.labels.size() == 0,
                  "labels are not correctly provided");
     std::vector<bst_gpair> &gpair = *out_gpair;
     gpair.resize(preds.size());
     // start calculating gradient
+    const unsigned nstep = static_cast<unsigned>(info.labels.size());
     const unsigned ndata = static_cast<unsigned>(preds.size());
     #pragma omp parallel for schedule(static)
-    for (unsigned j = 0; j < ndata; ++j) {
-      float p = loss.PredTransform(preds[j]);
+    for (unsigned i = 0; i < ndata; ++i) {
+      const unsigned j = i % nstep;
+      float p = loss.PredTransform(preds[i]);
       float w = info.GetWeight(j);
       if (info.labels[j] == 1.0f) w *= scale_pos_weight;
       gpair[j] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w,
@@ -155,25 +158,28 @@ class SoftmaxMultiClassObj : public IObjFunction {
   virtual void SetParam(const char *name, const char *val) {
     if (!strcmp( "num_class", name )) nclass = atoi(val);
   }
-  virtual void GetGradient(const std::vector<float>& preds,
+  virtual void GetGradient(const std::vector<float> &preds,
                            const MetaInfo &info,
                            int iter,
                            std::vector<bst_gpair> *out_gpair) {
     utils::Check(nclass != 0, "must set num_class to use softmax");
-    utils::Check(preds.size() == static_cast<size_t>(nclass) * info.labels.size(),
+    utils::Check(info.labels.size() != 0, "label set cannot be empty");
+    utils::Check(preds.size() % (static_cast<size_t>(nclass) * info.labels.size()) == 0,
                  "SoftmaxMultiClassObj: label size and pred size does not match");
     std::vector<bst_gpair> &gpair = *out_gpair;
     gpair.resize(preds.size());
-    const unsigned ndata = static_cast<unsigned>(info.labels.size());
+    const unsigned nstep = static_cast<unsigned>(info.labels.size() * nclass);
+    const unsigned ndata = static_cast<unsigned>(preds.size() / nclass);
     #pragma omp parallel
     {
       std::vector<float> rec(nclass);
       #pragma omp for schedule(static)
-      for (unsigned j = 0; j < ndata; ++j) {
+      for (unsigned i = 0; i < ndata; ++i) {
         for (int k = 0; k < nclass; ++k) {
-          rec[k] = preds[j * nclass + k];
+          rec[k] = preds[i * nclass + k];
         }
         Softmax(&rec);
+        const unsigned j = i % nstep;
         int label = static_cast<int>(info.labels[j]);
         utils::Check(label < nclass, "SoftmaxMultiClassObj: label exceed num_class");
         const float wt = info.GetWeight(j);
@@ -181,9 +187,9 @@ class SoftmaxMultiClassObj : public IObjFunction {
           float p = rec[k];
           const float h = 2.0f * p * (1.0f - p) * wt;
           if (label == k) {
-            gpair[j * nclass + k] = bst_gpair((p - 1.0f) * wt, h);
+            gpair[i * nclass + k] = bst_gpair((p - 1.0f) * wt, h);
           } else {
-            gpair[j * nclass + k] = bst_gpair(p* wt, h);
+            gpair[i * nclass + k] = bst_gpair(p* wt, h);
           }
         }
       }
@@ -203,7 +209,9 @@ class SoftmaxMultiClassObj : public IObjFunction {
   inline void Transform(std::vector<float> *io_preds, int prob) {
     utils::Check(nclass != 0, "must set num_class to use softmax");
     std::vector<float> &preds = *io_preds;
+    std::vector<float> tmp;
     const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
+    if (prob == 0) tmp.resize(ndata);
     #pragma omp parallel
     {
       std::vector<float> rec(nclass);
@@ -213,7 +221,7 @@ class SoftmaxMultiClassObj : public IObjFunction {
           rec[k] = preds[j * nclass + k];
         }
         if (prob == 0) {
-          preds[j] = FindMaxIndex(rec);
+          tmp[j] = FindMaxIndex(rec);
         } else {
           Softmax(&rec);
           for (int k = 0; k < nclass; ++k) {
@@ -222,9 +230,7 @@ class SoftmaxMultiClassObj : public IObjFunction {
         }
       }
     }
-    if (prob == 0) {
-      preds.resize(ndata);
-    }
+    if (prob == 0) preds = tmp;
   }
   // data field
   int nclass;
@@ -245,17 +251,17 @@ class LambdaRankObj : public IObjFunction {
     if (!strcmp( "fix_list_weight", name)) fix_list_weight = static_cast<float>(atof(val));
     if (!strcmp( "num_pairsample", name)) num_pairsample = atoi(val);
   }
-  virtual void GetGradient(const std::vector<float>& preds,
+  virtual void GetGradient(const std::vector<float> &preds,
                            const MetaInfo &info,
                            int iter,
                            std::vector<bst_gpair> *out_gpair) {
-    utils::Assert(preds.size() == info.labels.size(), "label size predict size not match");
+    utils::Check(preds.size() == info.labels.size(), "label size predict size not match");
     std::vector<bst_gpair> &gpair = *out_gpair;
     gpair.resize(preds.size());
     // quick consistency when group is not available
-    std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = info.labels.size();
     const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
-    utils::Check(gptr.size() != 0 && gptr.back() == preds.size(),
+    utils::Check(gptr.size() != 0 && gptr.back() == info.labels.size(),
                  "group structure not consistent with #rows");
     const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
     #pragma omp parallel
diff --git a/src/learner/objective.h b/src/learner/objective.h
index 513219093..d741ba61f 100644
--- a/src/learner/objective.h
+++ b/src/learner/objective.h
@@ -27,7 +27,7 @@ class IObjFunction{
    * \param iter current iteration number
    * \param out_gpair output of get gradient, saves gradient and second order gradient in
    */
-  virtual void GetGradient(const std::vector<float>& preds,
+  virtual void GetGradient(const std::vector<float> &preds,
                            const MetaInfo &info,
                            int iter,
                            std::vector<bst_gpair> *out_gpair) = 0;

From 3f5b5e1fdca40c0c5420545fe08fb182e6f8b2a5 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 16:10:19 -0700
Subject: [PATCH 42/52] add apratio

---
 src/learner/evaluation-inl.hpp | 24 ++++++++++++++++++------
 src/learner/evaluation.h       |  4 +++-
 src/learner/learner-inl.hpp    |  3 +++
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index bff37ff94..69f0bb4d9 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -159,7 +159,12 @@ struct EvalAMS : public IEvaluator {
 struct EvalPrecisionRatio : public IEvaluator{
  public:
   explicit EvalPrecisionRatio(const char *name) : name_(name) {
-    utils::Assert(sscanf(name, "pratio@%f", &ratio_) == 1, "BUG");
+    if (sscanf(name, "apratio@%f", &ratio_) == 1) {
+      use_ap = 1;
+    } else {
+      utils::Assert(sscanf(name, "pratio@%f", &ratio_) == 1, "BUG");
+      use_ap = 0;
+    }
   }
   virtual float Eval(const std::vector<float> &preds,
                      const MetaInfo &info) const {
@@ -179,13 +184,20 @@ struct EvalPrecisionRatio : public IEvaluator{
  protected:
   inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec, const MetaInfo &info) const {
     size_t cutoff = static_cast<size_t>(ratio_ * rec.size());
-    double wt_hit = 0.0, wsum = 0.0;
+    double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0;
     for (size_t j = 0; j < cutoff; ++j) {
-      wt_hit += info.labels[rec[j].second];
-      wsum += wt_hit / (j + 1);
-     }
-    return wsum / cutoff;
+      const float wt = info.GetWeight(j);
+      wt_hit += info.labels[rec[j].second] * wt;
+      wt_sum += wt;
+      wsum += wt_hit / wt_sum;
+    }
+    if (use_ap != 0) {
+      return wsum / cutoff;
+    } else {
+      return wt_hit / wt_sum;
+    }
   }
+  int use_ap;
   float ratio_;
   std::string name_;
 };
diff --git a/src/learner/evaluation.h b/src/learner/evaluation.h
index 79ad4902e..02fb8fdf8 100644
--- a/src/learner/evaluation.h
+++ b/src/learner/evaluation.h
@@ -8,6 +8,7 @@
 #include <string>
 #include <vector>
 #include "../utils/utils.h"
+#include "./dmatrix.h"
 
 namespace xgboost {
 namespace learner {
@@ -41,7 +42,8 @@ inline IEvaluator* CreateEvaluator(const char *name) {
   if (!strcmp(name, "auc")) return new EvalAuc();
   if (!strncmp(name, "ams@", 4)) return new EvalAMS(name);
   if (!strncmp(name, "pre@", 4)) return new EvalPrecision(name);
-  if (!strncmp(name, "pratio@", 4)) return new EvalPrecisionRatio(name);
+  if (!strncmp(name, "pratio@", 7)) return new EvalPrecisionRatio(name);
+  if (!strncmp(name, "apratio@", 8)) return new EvalPrecisionRatio(name);
   if (!strncmp(name, "map", 3)) return new EvalMAP(name);
   if (!strncmp(name, "ndcg", 3)) return new EvalNDCG(name);
   utils::Error("unknown evaluation metric type: %s", name);
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 4d227f488..54e17301f 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -80,6 +80,9 @@ class BoostLearner {
     if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
     if (!strcmp("seed", name)) random::Seed(atoi(val));
     if (!strcmp(name, "num_class")) this->SetParam("num_output_group", val);
+    if (!strcmp(name, "nthread")) {
+      omp_set_num_threads(atoi(val));
+    }
     if (gbm_ == NULL) {
       if (!strcmp(name, "objective")) name_obj_ = val;
       if (!strcmp(name, "booster")) name_gbm_ = val;

From 58354643b0d634ff0665bda41bf733f539c1f220 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 16:26:37 -0700
Subject: [PATCH 43/52] chg root index to booster info, need review

---
 python/xgboost_wrapper.cpp        |  6 +++---
 src/data.h                        | 18 ++++++++++++++++++
 src/gbm/gbm.h                     | 10 ++++------
 src/gbm/gbtree-inl.hpp            | 14 +++++++-------
 src/learner/dmatrix.h             | 21 +++++----------------
 src/learner/learner-inl.hpp       |  4 ++--
 src/tree/updater.h                |  5 ++---
 src/tree/updater_colmaker-inl.hpp |  8 ++++----
 src/tree/updater_prune-inl.hpp    |  2 +-
 src/tree/updater_refresh-inl.hpp  |  4 ++--
 10 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/python/xgboost_wrapper.cpp b/python/xgboost_wrapper.cpp
index 7f2365ba3..df05d9521 100644
--- a/python/xgboost_wrapper.cpp
+++ b/python/xgboost_wrapper.cpp
@@ -37,7 +37,7 @@ class Booster: public learner::BoostLearner<FMatrixS> {
     for (unsigned j = 0; j < ndata; ++j) {
       gpair_[j] = bst_gpair(grad[j], hess[j]);
     }
-    gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
+    gbm_->DoBoost(gpair_, train.fmat, train.info.info);
   }
   inline void CheckInitModel(void) {
     if (!init_model) {
@@ -151,8 +151,8 @@ extern "C"{
       if (src.info.weights.size() != 0) {
         ret.info.weights.push_back(src.info.weights[ridx]);
       }
-      if (src.info.root_index.size() != 0) {
-        ret.info.weights.push_back(src.info.root_index[ridx]);
+      if (src.info.info.root_index.size() != 0) {
+        ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
       }
     }
     return p_ret;
diff --git a/src/data.h b/src/data.h
index fea3f7a48..61d61e6a0 100644
--- a/src/data.h
+++ b/src/data.h
@@ -39,6 +39,24 @@ struct bst_gpair {
   bst_gpair(bst_float grad, bst_float hess) : grad(grad), hess(hess) {}
 };
 
+/*! 
+ * \brief extra information that might needed by gbm and tree module
+ * these information are not necessarily presented, and can be empty
+ */
+struct BoosterInfo {
+  /*!
+   * \brief specified root index of each instance,
+   *  can be used for multi task setting
+   */
+  std::vector<unsigned> root_index;
+  /*! \brief set fold indicator */
+  std::vector<unsigned> fold_index;
+  /*! \brief get root of ith instance */
+  inline unsigned GetRoot(size_t i) const {
+    return root_index.size() == 0 ? 0 : root_index[i];
+  }
+};
+
 /*! \brief read-only sparse instance batch in CSR format */
 struct SparseBatch {
   /*! \brief an entry of sparse vector */
diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h
index dcc204868..f47adfdd2 100644
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -43,12 +43,11 @@ class IGradBooster {
    * \brief peform update to the model(boosting)
    * \param gpair the gradient pair statistics of the data
    * \param fmat feature matrix that provide access to features
-   * \param root_index pre-partitioned root_index of each instance,
-   *   root_index.size() can be 0 which indicates that no pre-partition involved
+   * \param info meta information about training
    */
   virtual void DoBoost(const std::vector<bst_gpair> &gpair,
                        const FMatrix &fmat,
-                       const std::vector<unsigned> &root_index) = 0;
+                       const BoosterInfo &info) = 0;
   /*!
    * \brief generate predictions for given feature matrix
    * \param fmat feature matrix
@@ -56,13 +55,12 @@ class IGradBooster {
    *        this means we do not have buffer index allocated to the gbm
    *  a buffer index is assigned to each instance that requires repeative prediction
    *  the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
-   * \param root_index pre-partitioned root_index of each instance,
-   *   root_index.size() can be 0 which indicates that no pre-partition involved
+   * \param info extra side information that may be needed for prediction
    * \param out_preds output vector to hold the predictions
    */
   virtual void Predict(const FMatrix &fmat,
                        int64_t buffer_offset,
-                       const std::vector<unsigned> &root_index,
+                       const BoosterInfo &info,
                        std::vector<float> *out_preds) = 0;
   /*!
    * \brief dump the model in text format
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index 876e13c2d..3fa0f4dd7 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -84,9 +84,9 @@ class GBTree : public IGradBooster<FMatrix> {
   }
   virtual void DoBoost(const std::vector<bst_gpair> &gpair,
                        const FMatrix &fmat,
-                       const std::vector<unsigned> &root_index) {
+                       const BoosterInfo &info) {
     if (mparam.num_output_group == 1) {
-      this->BoostNewTrees(gpair, fmat, root_index, 0);
+      this->BoostNewTrees(gpair, fmat, info, 0);
     } else {
       const int ngroup = mparam.num_output_group;
       utils::Check(gpair.size() % ngroup == 0,
@@ -97,13 +97,13 @@ class GBTree : public IGradBooster<FMatrix> {
         for (size_t i = 0; i < tmp.size(); ++i) {
           tmp[i] = gpair[i * ngroup + gid];
         }
-        this->BoostNewTrees(tmp, fmat, root_index, gid);
+        this->BoostNewTrees(tmp, fmat, info, gid);
       }
     }
   }
   virtual void Predict(const FMatrix &fmat,
                        int64_t buffer_offset,
-                       const std::vector<unsigned> &root_index,
+                       const BoosterInfo &info,
                        std::vector<float> *out_preds) {
     int nthread;
     #pragma omp parallel
@@ -134,7 +134,7 @@ class GBTree : public IGradBooster<FMatrix> {
         const int tid = omp_get_thread_num();
         tree::RegTree::FVec &feats = thread_temp[tid];
         const size_t ridx = batch.base_rowid + i;
-        const unsigned root_idx = root_index.size() == 0 ? 0 : root_index[ridx];
+        const unsigned root_idx = info.GetRoot(i);
         // loop over output groups
         for (int gid = 0; gid < mparam.num_output_group; ++gid) {
           preds[ridx * mparam.num_output_group + gid] =
@@ -186,7 +186,7 @@ class GBTree : public IGradBooster<FMatrix> {
   // do group specific group
   inline void BoostNewTrees(const std::vector<bst_gpair> &gpair,
                             const FMatrix &fmat,
-                            const std::vector<unsigned> &root_index,
+                            const BoosterInfo &info,
                             int bst_group) {
     this->InitUpdater();
     // create the trees
@@ -200,7 +200,7 @@ class GBTree : public IGradBooster<FMatrix> {
     }
     // update the trees
     for (size_t i = 0; i < updaters.size(); ++i) {
-      updaters[i]->Update(gpair, fmat, root_index, new_trees);
+      updaters[i]->Update(gpair, fmat, info, new_trees);
     }
     // push back to model
     for (size_t i = 0; i < new_trees.size(); ++i) {
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index 5786fe6a1..b66cf86d0 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -28,11 +28,8 @@ struct MetaInfo {
   std::vector<bst_uint> group_ptr;
   /*! \brief weights of each instance, optional */
   std::vector<float> weights;
-  /*!
-   * \brief specified root index of each instance,
-   *  can be used for multi task setting
-   */
-  std::vector<unsigned> root_index;
+  /*! \brief information needed by booster */
+  BoosterInfo info;
   /*! 
    * \brief initialized margins,
    * if specified, xgboost will start from this init margin
@@ -48,7 +45,7 @@ struct MetaInfo {
     labels.clear();
     group_ptr.clear();
     weights.clear();
-    root_index.clear();
+    info.root_index.clear();
     base_margin.clear();
     num_row = num_col = 0;
   }
@@ -60,14 +57,6 @@ struct MetaInfo {
       return 1.0f;
     }
   }
-  /*! \brief get root index of i-th instance */
-  inline float GetRoot(size_t i) const {
-    if (root_index.size() != 0) {
-      return static_cast<float>(root_index[i]);
-    } else {
-      return 0;
-    }
-  }
   inline void SaveBinary(utils::IStream &fo) const {
     int version = kVersion;
     fo.Write(&version, sizeof(version));
@@ -76,7 +65,7 @@ struct MetaInfo {
     fo.Write(labels);
     fo.Write(group_ptr);
     fo.Write(weights);
-    fo.Write(root_index);
+    fo.Write(info.root_index);
     fo.Write(base_margin);
   }
   inline void LoadBinary(utils::IStream &fi) {
@@ -87,7 +76,7 @@ struct MetaInfo {
     utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
     utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
     utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
-    utils::Check(fi.Read(&root_index), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&info.root_index), "MetaInfo: invalid format");
     utils::Check(fi.Read(&base_margin), "MetaInfo: invalid format");
   }
   // try to load group information from file, if exists
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index bd5cf6e3b..18611bea5 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -161,7 +161,7 @@ class BoostLearner {
   inline void UpdateOneIter(int iter, const DMatrix<FMatrix> &train) {
     this->PredictRaw(train, &preds_);
     obj_->GetGradient(preds_, train.info, iter, &gpair_);
-    gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
+    gbm_->DoBoost(gpair_, train.fmat, train.info.info);
   }
   /*!
    * \brief evaluate the model for specific iteration
@@ -242,7 +242,7 @@ class BoostLearner {
   inline void PredictRaw(const DMatrix<FMatrix> &data,
                          std::vector<float> *out_preds) const {
     gbm_->Predict(data.fmat, this->FindBufferOffset(data),
-                  data.info.root_index, out_preds);
+                  data.info.info, out_preds);
     // add base margin
     std::vector<float> &preds = *out_preds;
     const unsigned ndata = static_cast<unsigned>(preds.size());
diff --git a/src/tree/updater.h b/src/tree/updater.h
index 2664e2ebd..cdb625266 100644
--- a/src/tree/updater.h
+++ b/src/tree/updater.h
@@ -29,8 +29,7 @@ class IUpdater {
    * \brief peform update to the tree models
    * \param gpair the gradient pair statistics of the data
    * \param fmat feature matrix that provide access to features
-   * \param root_index pre-partitioned root_index of each instance,
-   *          root_index.size() can be 0 which indicates that no pre-partition involved
+   * \param info extra side information that may be need, such as root index
    * \param trees pointer to the trese to be updated, upater will change the content of the tree
    *   note: all the trees in the vector are updated, with the same statistics, 
    *         but maybe different random seeds, usually one tree is passed in at a time, 
@@ -38,7 +37,7 @@ class IUpdater {
    */
   virtual void Update(const std::vector<bst_gpair> &gpair,
                       const FMatrix &fmat,
-                      const std::vector<unsigned> &root_index,
+                      const BoosterInfo &info,
                       const std::vector<RegTree*> &trees) = 0;
   // destructor
   virtual ~IUpdater(void) {}
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index 35880b70e..919dfcc28 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -25,7 +25,7 @@ class ColMaker: public IUpdater<FMatrix> {
   }
   virtual void Update(const std::vector<bst_gpair> &gpair,
                       const FMatrix &fmat,
-                      const std::vector<unsigned> &root_index,
+                      const BoosterInfo &info,
                       const std::vector<RegTree*> &trees) {
     // rescale learning rate according to size of trees
     float lr = param.learning_rate;
@@ -33,7 +33,7 @@ class ColMaker: public IUpdater<FMatrix> {
     // build tree
     for (size_t i = 0; i < trees.size(); ++i) {
       Builder builder(param);
-      builder.Update(gpair, fmat, root_index, trees[i]);
+      builder.Update(gpair, fmat, info, trees[i]);
     }
     param.learning_rate = lr;
   }
@@ -77,9 +77,9 @@ class ColMaker: public IUpdater<FMatrix> {
     // update one tree, growing
     virtual void Update(const std::vector<bst_gpair> &gpair,
                         const FMatrix &fmat,
-                        const std::vector<unsigned> &root_index,
+                        const BoosterInfo &info,
                         RegTree *p_tree) {
-      this->InitData(gpair, fmat, root_index, *p_tree);
+      this->InitData(gpair, fmat, info.root_index, *p_tree);
       this->InitNewNode(qexpand, gpair, fmat, *p_tree);
       
       for (int depth = 0; depth < param.max_depth; ++depth) {
diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp
index 363d6eec1..d92685ad7 100644
--- a/src/tree/updater_prune-inl.hpp
+++ b/src/tree/updater_prune-inl.hpp
@@ -24,7 +24,7 @@ class TreePruner: public IUpdater<FMatrix> {
   // update the tree, do pruning
   virtual void Update(const std::vector<bst_gpair> &gpair,
                       const FMatrix &fmat,
-                      const std::vector<unsigned> &root_index,
+                      const BoosterInfo &info,
                       const std::vector<RegTree*> &trees) {
     // rescale learning rate according to size of trees
     float lr = param.learning_rate;
diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp
index 12bbcf864..e23174e51 100644
--- a/src/tree/updater_refresh-inl.hpp
+++ b/src/tree/updater_refresh-inl.hpp
@@ -24,7 +24,7 @@ class TreeRefresher: public IUpdater<FMatrix> {
   // update the tree, do pruning
   virtual void Update(const std::vector<bst_gpair> &gpair,
                       const FMatrix &fmat,
-                      const std::vector<unsigned> &root_index,
+                      const BoosterInfo &info,
                       const std::vector<RegTree*> &trees) {
     if (trees.size() == 0) return;
     // number of threads
@@ -66,7 +66,7 @@ class TreeRefresher: public IUpdater<FMatrix> {
         feats.Fill(inst);
         for (size_t j = 0; j < trees.size(); ++j) {
           AddStats(*trees[j], feats, gpair[ridx],
-                   root_index.size() == 0 ? 0 : root_index[ridx],
+                   info.GetRoot(j),
                    &stemp[tid * trees.size() + j]);
         }
         feats.Drop(inst);

From edc539a024c48a03001d94c55b4c0383bc361b07 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 16:47:50 -0700
Subject: [PATCH 44/52] add message about glc

---
 README.md        | 5 +++++
 python/README.md | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 61472aa44..227e9f883 100644
--- a/README.md
+++ b/README.md
@@ -36,3 +36,8 @@ Build
   - In principle, you can put src/xgboost.cpp and src/io/io.cpp into the project, and build xgboost.
   - For python module, you need python/xgboost_wrapper.cpp and src/io/io.cpp to build a dll.
 
+
+Try Graphlab Create Version
+=====
+* Graphlab Create(GLC) is a scalable machine learning toolkit that allows you to deal with big data in python
+* XGBoost is adopted by boosted tree library in GLC. The GLC version allows you to do feature engineering, hyper-parameter searching and visualization in one framework. See http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand
diff --git a/python/README.md b/python/README.md
index 4aceb7d5e..a771d9317 100644
--- a/python/README.md
+++ b/python/README.md
@@ -4,3 +4,6 @@ see example for usage
 
 to make the python module, type make in the root directory of project
 
+Graphlab-Create Version
+=====
+Graphlab Create
\ No newline at end of file

From 24030b26fd6216a3c7e8cb60cbdd518a887d26a8 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 16:49:42 -0700
Subject: [PATCH 45/52] add

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 227e9f883..5d9db006a 100644
--- a/README.md
+++ b/README.md
@@ -40,4 +40,4 @@ Build
 Try Graphlab Create Version
 =====
 * Graphlab Create(GLC) is a scalable machine learning toolkit that allows you to deal with big data in python
-* XGBoost is adopted by boosted tree library in GLC. The GLC version allows you to do feature engineering, hyper-parameter searching and visualization in one framework. See http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand
+* XGBoost is adopted by boosted tree library in GLC. The GLC version allows you to do feature engineering, hyper-parameter searching and visualization in one framework. See the nice blogpost about [predicting bike sharing demand](http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand).

From bf71cf52befccba03238b93485cd04a6894ae8de Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 16:50:28 -0700
Subject: [PATCH 46/52] add

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 5d9db006a..e14f33bc0 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Build
   - For python module, you need python/xgboost_wrapper.cpp and src/io/io.cpp to build a dll.
 
 
-Try Graphlab Create Version
+Using XGBoost in Graphlab Create
 =====
-* Graphlab Create(GLC) is a scalable machine learning toolkit that allows you to deal with big data in python
+* Graphlab Create (GLC) is a scalable machine learning toolkit that allows you to deal with big data in python
 * XGBoost is adopted by boosted tree library in GLC. The GLC version allows you to do feature engineering, hyper-parameter searching and visualization in one framework. See the nice blogpost about [predicting bike sharing demand](http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand).

From 37b707e11057bd60de719df3ba18dda37a644471 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 16:51:27 -0700
Subject: [PATCH 47/52] clean up

---
 README.md | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/README.md b/README.md
index e14f33bc0..e226aec94 100644
--- a/README.md
+++ b/README.md
@@ -35,9 +35,3 @@ Build
 * Possible way to build using Visual Studio (not tested):
   - In principle, you can put src/xgboost.cpp and src/io/io.cpp into the project, and build xgboost.
   - For python module, you need python/xgboost_wrapper.cpp and src/io/io.cpp to build a dll.
-
-
-Using XGBoost in Graphlab Create
-=====
-* Graphlab Create (GLC) is a scalable machine learning toolkit that allows you to deal with big data in python
-* XGBoost is adopted by boosted tree library in GLC. The GLC version allows you to do feature engineering, hyper-parameter searching and visualization in one framework. See the nice blogpost about [predicting bike sharing demand](http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand).

From 2ac8cdb873f9abed0b7a33cefc6c0f7070ac0fad Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 19:27:33 -0700
Subject: [PATCH 48/52] check in linear model

---
 python/xgboost_wrapper.cpp        |   2 +-
 src/data.h                        |   8 +-
 src/gbm/gblinear-inl.hpp          | 262 ++++++++++++++++++++++++++++++
 src/gbm/gbm.h                     |  13 +-
 src/gbm/gbtree-inl.hpp            |   7 +-
 src/learner/evaluation-inl.hpp    |   2 +-
 src/learner/learner-inl.hpp       |   2 +-
 src/tree/updater_colmaker-inl.hpp |  10 +-
 8 files changed, 287 insertions(+), 19 deletions(-)
 create mode 100644 src/gbm/gblinear-inl.hpp

diff --git a/python/xgboost_wrapper.cpp b/python/xgboost_wrapper.cpp
index df05d9521..7bc25eb40 100644
--- a/python/xgboost_wrapper.cpp
+++ b/python/xgboost_wrapper.cpp
@@ -37,7 +37,7 @@ class Booster: public learner::BoostLearner<FMatrixS> {
     for (unsigned j = 0; j < ndata; ++j) {
       gpair_[j] = bst_gpair(grad[j], hess[j]);
     }
-    gbm_->DoBoost(gpair_, train.fmat, train.info.info);
+    gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
   }
   inline void CheckInitModel(void) {
     if (!init_model) {
diff --git a/src/data.h b/src/data.h
index 61d61e6a0..6f8297311 100644
--- a/src/data.h
+++ b/src/data.h
@@ -217,7 +217,7 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     utils::Check(this->HaveColAccess(), "NumCol:need column access");
     return col_ptr_.size() - 1;
   }
-  /*! \brief get number of buffered rows */  
+  /*! \brief get number of buffered rows */
   inline const std::vector<bst_uint> buffered_rowset(void) const {
     return buffered_rowset_;
   }
@@ -333,7 +333,7 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     while (iter_->Next()) {
       const SparseBatch &batch = iter_->Value();
       for (size_t i = 0; i < batch.size; ++i) {
-        if (pkeep==1.0f || random::SampleBinary(pkeep)) {
+        if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
           buffered_rowset_.push_back(batch.base_rowid+i);
           SparseBatch::Inst inst = batch[i];
           for (bst_uint j = 0; j < inst.length; ++j) {
@@ -349,9 +349,9 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
     while (iter_->Next()) {
       const SparseBatch &batch = iter_->Value();
       for (size_t i = 0; i < batch.size; ++i) {
-        if (ktop < buffered_rowset_.size() && 
+        if (ktop < buffered_rowset_.size() &&
             buffered_rowset_[ktop] == batch.base_rowid+i) {
-          ++ ktop;
+          ++ktop;
           SparseBatch::Inst inst = batch[i];
           for (bst_uint j = 0; j < inst.length; ++j) {
             builder.PushElem(inst[j].findex,
diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp
new file mode 100644
index 000000000..0c346d687
--- /dev/null
+++ b/src/gbm/gblinear-inl.hpp
@@ -0,0 +1,262 @@
+#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_
+#define XGBOOST_GBM_GBLINEAR_INL_HPP_
+/*!
+ * \file gblinear-inl.hpp
+ * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
+ *        the update rule is parallel coordinate descent (shotgun)
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <string>
+#include <algorithm>
+#include "./gbm.h"
+#include "../tree/updater.h"
+
+namespace xgboost {
+namespace gbm {
+/*!
+ * \brief gradient boosted linear model
+ * \tparam FMatrix the data type updater taking
+ */
+template<typename FMatrix>
+class GBLinear : public IGradBooster<FMatrix> {
+ public:
+  virtual ~GBLinear(void) {
+  }
+  // set model parameters
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strncmp(name, "bst:", 4)) {
+      param.SetParam(name + 4, val);
+    }
+    if (model.weight.size() == 0) {
+      model.param.SetParam(name, val);
+    }
+  }
+  virtual void LoadModel(utils::IStream &fi) {
+    model.LoadModel(fi);
+  }
+  virtual void SaveModel(utils::IStream &fo) const {
+    model.SaveModel(fo);
+  }
+  virtual void InitModel(void) {
+    model.InitModel();
+  }
+  virtual void DoBoost(const FMatrix &fmat,
+                       const BoosterInfo &info,
+                       std::vector<bst_gpair> *in_gpair) {
+    this->InitFeatIndex(fmat);
+    std::vector<bst_gpair> &gpair = *in_gpair;
+    const int ngroup = model.param.num_output_group;
+    const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
+    // for all the output group
+    for (int gid = 0; gid < ngroup; ++gid) {
+      double sum_grad = 0.0, sum_hess = 0.0;
+      const unsigned ndata = static_cast<unsigned>(rowset.size());
+      #pragma omp parallel for schedule(static) reduction(+: sum_grad, sum_hess)
+      for (unsigned i = 0; i < ndata; ++i) {
+        bst_gpair &p = gpair[rowset[i] * ngroup + gid];
+        if (p.hess >= 0.0f) {
+          sum_grad += p.grad; sum_hess += p.hess;
+        }
+      }
+      // remove bias effect
+      double dw = param.learning_rate * param.CalcDeltaBias(sum_grad, sum_hess, model.bias()[gid]);
+      model.bias()[gid] += dw;
+      // update grad value
+      #pragma omp parallel for schedule(static)
+      for (unsigned i = 0; i < ndata; ++i) {
+        bst_gpair &p = gpair[rowset[i] * ngroup + gid];
+        if (p.hess >= 0.0f) {
+          p.grad += p.hess * dw;
+        }
+      }
+    }
+    // number of features
+    const unsigned nfeat = static_cast<unsigned>(feat_index.size());
+    #pragma omp parallel for schedule(static)
+    for (unsigned i = 0; i < nfeat; ++i) {
+      const bst_uint fid = feat_index[i];
+      for (int gid = 0; gid < ngroup; ++gid) {
+        double sum_grad = 0.0, sum_hess = 0.0;
+        for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
+          const float v = it.fvalue();
+          bst_gpair &p = gpair[it.rindex() * ngroup + gid];
+          if (p.hess < 0.0f) continue;
+          sum_grad += p.grad * v;
+          sum_hess += p.hess * v * v;
+        }
+        float &w = model[fid][gid];
+        double dw = param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w);
+        w += dw;
+        // update grad value
+        for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
+          bst_gpair &p = gpair[it.rindex() * ngroup + gid];
+          if (p.hess < 0.0f) continue;
+          p.grad += p.hess * it.fvalue() * dw;
+        }
+      }
+    }
+  }
+
+  virtual void Predict(const FMatrix &fmat,
+                       int64_t buffer_offset,
+                       const BoosterInfo &info,
+                       std::vector<float> *out_preds) {
+    std::vector<float> &preds = *out_preds;
+    preds.resize(0);
+    // start collecting the prediction
+    utils::IIterator<SparseBatch> *iter = fmat.RowIterator();
+    iter->BeforeFirst();
+    const int ngroup = model.param.num_output_group;
+    while (iter->Next()) {
+      const SparseBatch &batch = iter->Value();
+      utils::Assert(batch.base_rowid * ngroup == preds.size(),
+                    "base_rowid is not set correctly");
+      // output convention: nrow * k, where nrow is number of rows
+      // k is number of group
+      preds.resize(preds.size() + batch.size * ngroup);
+      // parallel over local batch
+      const unsigned nsize = static_cast<unsigned>(batch.size);
+      #pragma omp parallel for schedule(static)
+      for (unsigned i = 0; i < nsize; ++i) {
+        const size_t ridx = batch.base_rowid + i;
+        // loop over output groups
+        for (int gid = 0; gid < ngroup; ++gid) {
+          this->Pred(batch[i], &preds[ridx * ngroup]);
+        }
+      }
+    }
+  }
+  virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
+    utils::Error("gblinear does not support dump model");
+    return std::vector<std::string>();
+  }
+
+ protected:
+  inline void InitFeatIndex(const FMatrix &fmat) {
+    if (feat_index.size() != 0) return;
+    // initialize feature index
+    unsigned ncol = static_cast<unsigned>(fmat.NumCol());
+    feat_index.reserve(ncol);
+    for (unsigned i = 0; i < ncol; ++i) {
+      if (fmat.GetColSize(i) != 0) {
+        feat_index.push_back(i);
+      }
+    }
+    random::Shuffle(feat_index);
+  }
+  inline void Pred(const SparseBatch::Inst &inst, float *preds) {
+    for (int gid = 0; gid < model.param.num_output_group; ++gid) {
+      float psum = model.bias()[gid];
+      for (bst_uint i = 0; i < inst.length; ++i) {
+        psum += inst[i].fvalue * model[inst[i].findex][gid];
+      }
+      preds[gid] = psum;
+    }
+  }
+  // training parameter
+  struct ParamTrain {
+    /*! \brief learning_rate */
+    float learning_rate;
+    /*! \brief regularization weight for L2 norm */
+    float reg_lambda;
+    /*! \brief regularization weight for L1 norm */
+    float reg_alpha;
+    /*! \brief regularization weight for L2 norm in bias */
+    float reg_lambda_bias;
+    // parameter
+    ParamTrain(void) {
+      reg_alpha = 0.0f;
+      reg_lambda = 0.0f;
+      reg_lambda_bias = 0.0f;
+      learning_rate = 1.0f;
+    }
+    inline void SetParam(const char *name, const char *val) {
+      // sync-names
+      if (!strcmp("eta", name)) learning_rate = static_cast<float>(atof(val));
+      if (!strcmp("lambda", name)) reg_lambda = static_cast<float>(atof(val));
+      if (!strcmp( "alpha", name)) reg_alpha = static_cast<float>(atof(val));
+      if (!strcmp( "lambda_bias", name)) reg_lambda_bias = static_cast<float>(atof(val));
+      // real names
+      if (!strcmp( "learning_rate", name)) learning_rate = static_cast<float>(atof(val));
+      if (!strcmp( "reg_lambda", name)) reg_lambda = static_cast<float>(atof(val));
+      if (!strcmp( "reg_alpha", name)) reg_alpha = static_cast<float>(atof(val));
+      if (!strcmp( "reg_lambda_bias", name)) reg_lambda_bias = static_cast<float>(atof(val));
+    }
+    // given original weight calculate delta
+    inline double CalcDelta(double sum_grad, double sum_hess, double w) {
+      if (sum_hess < 1e-5f) return 0.0f;
+      double tmp = w - (sum_grad + reg_lambda * w) / (sum_hess + reg_lambda);
+      if (tmp >=0) {
+        return std::max(-(sum_grad + reg_lambda * w + reg_alpha) / (sum_hess + reg_lambda), -w);
+      } else {
+        return std::min(-(sum_grad + reg_lambda * w - reg_alpha) / (sum_hess + reg_lambda), -w);
+      }
+    }
+    // given original weight calculate delta bias
+    inline double CalcDeltaBias(double sum_grad, double sum_hess, double w) {
+      return - (sum_grad + reg_lambda_bias * w) / (sum_hess + reg_lambda_bias);
+    }
+  };
+  // model for linear booster
+  class Model {
+   public:
+    // model parameter
+    struct Param {
+      // number of feature dimension
+      int num_feature;
+      // number of output group
+      int num_output_group;
+      // reserved field
+      int reserved[32];
+      // constructor
+      Param(void) {
+        num_feature = 0;
+        num_output_group = 1;
+        memset(reserved, 0, sizeof(reserved));
+      }
+      inline void SetParam(const char *name, const char *val) {
+        if (!strcmp(name, "bst:num_feature")) num_feature = atoi(val);
+        if (!strcmp(name, "num_output_group")) num_output_group = atoi(val);
+      }
+    };
+    // parameter
+    Param param;
+    // weight for each of feature, bias is the last one
+    std::vector<float> weight;
+    // initialize the model parameter
+    inline void InitModel(void) {
+      // bias is the last weight
+      weight.resize((param.num_feature + 1) * param.num_output_group);
+      std::fill(weight.begin(), weight.end(), 0.0f);
+    }
+    // save the model to file
+    inline void SaveModel(utils::IStream &fo) const {
+      fo.Write(&param, sizeof(Param));
+      fo.Write(weight);
+    }
+    // load model from file
+    inline void LoadModel(utils::IStream &fi) {
+      utils::Assert(fi.Read(&param, sizeof(Param)) != 0, "Load LinearBooster");
+      fi.Read(&weight);
+    }
+    // model bias
+    inline float* bias(void) {
+      return &weight[param.num_feature * param.num_output_group];
+    }
+    // get i-th weight
+    inline float* operator[](size_t i) {
+      return &weight[i * param.num_output_group];
+    }
+  };
+  // model field
+  Model model;
+  // training parameter
+  ParamTrain param;
+  // Per feature: shuffle index of each feature index
+  std::vector<bst_uint> feat_index;
+};
+
+}  // namespace gbm
+}  // namespace xgboost
+#endif  // XGBOOST_GBM_GBLINEAR_INL_HPP_
diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h
index f47adfdd2..7b551553a 100644
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -41,13 +41,14 @@ class IGradBooster {
   virtual void InitModel(void) = 0;
   /*!
    * \brief peform update to the model(boosting)
-   * \param gpair the gradient pair statistics of the data
    * \param fmat feature matrix that provide access to features
    * \param info meta information about training
+   * \param in_gpair address of the gradient pair statistics of the data
+   * the booster may change content of gpair
    */
-  virtual void DoBoost(const std::vector<bst_gpair> &gpair,
-                       const FMatrix &fmat,
-                       const BoosterInfo &info) = 0;
+  virtual void DoBoost(const FMatrix &fmat,
+                       const BoosterInfo &info,
+                       std::vector<bst_gpair> *in_gpair) = 0;
   /*!
    * \brief generate predictions for given feature matrix
    * \param fmat feature matrix
@@ -74,12 +75,16 @@ class IGradBooster {
 };
 }  // namespace gbm
 }  // namespace xgboost
+
 #include "gbtree-inl.hpp"
+#include "gblinear-inl.hpp"
+
 namespace xgboost {
 namespace gbm {
 template<typename FMatrix>
 inline IGradBooster<FMatrix>* CreateGradBooster(const char *name) {
   if (!strcmp("gbtree", name)) return new GBTree<FMatrix>();
+  if (!strcmp("gblinear", name)) return new GBLinear<FMatrix>();
   utils::Error("unknown booster type: %s", name);
   return NULL;
 }
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index 3fa0f4dd7..0e001a4e8 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -82,9 +82,10 @@ class GBTree : public IGradBooster<FMatrix> {
     utils::Assert(mparam.num_trees == 0, "GBTree: model already initialized");
     utils::Assert(trees.size() == 0, "GBTree: model already initialized");
   }
-  virtual void DoBoost(const std::vector<bst_gpair> &gpair,
-                       const FMatrix &fmat,
-                       const BoosterInfo &info) {
+  virtual void DoBoost(const FMatrix &fmat,
+                       const BoosterInfo &info,
+                       std::vector<bst_gpair> *in_gpair) {
+    const std::vector<bst_gpair> &gpair = *in_gpair;
     if (mparam.num_output_group == 1) {
       this->BoostNewTrees(gpair, fmat, info, 0);
     } else {
diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index 69f0bb4d9..72085be46 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -28,7 +28,7 @@ struct EvalEWiseBase : public IEvaluator {
                  "label and prediction size not match");
     const unsigned ndata = static_cast<unsigned>(preds.size());
     float sum = 0.0, wsum = 0.0;
-    #pragma omp parallel for reduction(+:sum, wsum) schedule(static)
+    #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
     for (unsigned i = 0; i < ndata; ++i) {
       const float wt = info.GetWeight(i);
       sum += Derived::EvalRow(info.labels[i], preds[i]) * wt;
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 6d00c3090..9150b5379 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -164,7 +164,7 @@ class BoostLearner {
   inline void UpdateOneIter(int iter, const DMatrix<FMatrix> &train) {
     this->PredictRaw(train, &preds_);
     obj_->GetGradient(preds_, train.info, iter, &gpair_);
-    gbm_->DoBoost(gpair_, train.fmat, train.info.info);
+    gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
   }
   /*!
    * \brief evaluate the model for specific iteration
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index 919dfcc28..afeccb206 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -81,7 +81,7 @@ class ColMaker: public IUpdater<FMatrix> {
                         RegTree *p_tree) {
       this->InitData(gpair, fmat, info.root_index, *p_tree);
       this->InitNewNode(qexpand, gpair, fmat, *p_tree);
-      
+
       for (int depth = 0; depth < param.max_depth; ++depth) {
         this->FindSplit(depth, this->qexpand, gpair, fmat, p_tree);
         this->ResetPosition(this->qexpand, fmat, *p_tree);
@@ -89,7 +89,7 @@ class ColMaker: public IUpdater<FMatrix> {
         this->InitNewNode(qexpand, gpair, fmat, *p_tree);
         // if nothing left to be expand, break
         if (qexpand.size() == 0) break;
-      }    
+      }
       // set all the rest expanding nodes to leaf
       for (size_t i = 0; i < qexpand.size(); ++i) {
         const int nid = qexpand[i];
@@ -182,7 +182,7 @@ class ColMaker: public IUpdater<FMatrix> {
         }
         snode.resize(tree.param.num_nodes, NodeEntry());
       }
-      const std::vector<bst_uint> &rowset = fmat.buffered_rowset();      
+      const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
       // setup position
       const unsigned ndata = static_cast<unsigned>(rowset.size());
       #pragma omp parallel for schedule(static)
@@ -316,8 +316,8 @@ class ColMaker: public IUpdater<FMatrix> {
       // step 1, set default direct nodes to default, and leaf nodes to -1
       const unsigned ndata = static_cast<unsigned>(rowset.size());
       #pragma omp parallel for schedule(static)
-      for (unsigned i = 0; i < ndata; ++i) {        
-        const bst_uint ridx = rowset[i];        
+      for (unsigned i = 0; i < ndata; ++i) {
+        const bst_uint ridx = rowset[i];
         const int nid = position[ridx];
         if (nid >= 0) {
           if (tree[nid].is_leaf()) {

From 07ddf9871875ee64c647fc43a34628f413d37320 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 19:41:58 -0700
Subject: [PATCH 49/52] add log

---
 CHANGES.md | 21 +++++++++++++++++++++
 README.md  | 14 +++++++++-----
 2 files changed, 30 insertions(+), 5 deletions(-)
 create mode 100644 CHANGES.md

diff --git a/CHANGES.md b/CHANGES.md
new file mode 100644
index 000000000..ee2adbbb7
--- /dev/null
+++ b/CHANGES.md
@@ -0,0 +1,21 @@
+Change Log of Versions
+=====
+
+xgboost-0.1
+=====
+* Initial release
+
+xgboost-0.2x
+=====
+* Python module
+* Weighted samples instances
+* Initial version of pairwise rank
+
+xgboost-unity
+=====
+* Faster tree construction module
+  - Allows subsample columns as well during tree construction
+* Support for boosting from initial predictions
+* Experimental version of LambdaRank
+* Linear booster is now parallelized, using parallel coordinated descent.
+* Add [code guide](src/README.md)
diff --git a/README.md b/README.md
index e226aec94..f9a72e418 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 xgboost: eXtreme Gradient Boosting 
-=======
+======
 An optimized general purpose gradient boosting (tree) library.
 
 Contributors: https://github.com/tqchen/xgboost/graphs/contributors
@@ -8,8 +8,10 @@ Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
 
 Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.com/tqchen/xgboost/issues?q=is%3Aissue+label%3Aquestion)
 
+Notes on the Code: [src/REAMDE.md](src/README.md)
+
 Features
-=======
+======
 * Sparse feature format:
   - Sparse feature format allows easy handling of missing values, and improve computation efficiency.
 * Push the limit on single machine:
@@ -19,11 +21,12 @@ Features
 * Layout of gradient boosting algorithm to support user defined objective
 * Python interface, works with numpy and scipy.sparse matrix
 
-xgboost-unity
-=======
-* Experimental branch(not usable yet): refactor xgboost, cleaner code, more flexibility
+Version
+======
+* This version is named xgboost-unity, the code has been refactored from 0.2x to be cleaner and more flexibility
 * This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
   - This means the model and buffer file of previous version can not be loaded in xgboost-unity
+* For legacy 0.2x code, refer to 
 
 Build
 ======
@@ -35,3 +38,4 @@ Build
 * Possible way to build using Visual Studio (not tested):
   - In principle, you can put src/xgboost.cpp and src/io/io.cpp into the project, and build xgboost.
   - For python module, you need python/xgboost_wrapper.cpp and src/io/io.cpp to build a dll.
+

From ce5b776bdc9e9212b19fb0fafa573d8bbf0fdd6f Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 19:47:05 -0700
Subject: [PATCH 50/52] add change note

---
 CHANGES.md | 2 +-
 README.md  | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index ee2adbbb7..1e3b5c67f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -18,4 +18,4 @@ xgboost-unity
 * Support for boosting from initial predictions
 * Experimental version of LambdaRank
 * Linear booster is now parallelized, using parallel coordinated descent.
-* Add [code guide](src/README.md)
+* Add [Code Guide](src/README.md) for customizing objective function and 
diff --git a/README.md b/README.md
index f9a72e418..7b28e4499 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
 
 Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.com/tqchen/xgboost/issues?q=is%3Aissue+label%3Aquestion)
 
-Notes on the Code: [src/REAMDE.md](src/README.md)
+Notes on the Code: [Code Guide](src/README.md)
 
 Features
 ======
@@ -26,7 +26,8 @@ Version
 * This version is named xgboost-unity, the code has been refactored from 0.2x to be cleaner and more flexibility
 * This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
   - This means the model and buffer file of previous version can not be loaded in xgboost-unity
-* For legacy 0.2x code, refer to 
+* For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22)
+* Change log in [CHANGES.md](CHANGES.md)
 
 Build
 ======

From 104fced9c31eeeb1075b2cea08c4f16f2c993015 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 19:52:43 -0700
Subject: [PATCH 51/52] ok

---
 CHANGES.md               |  6 +++---
 README.md                | 15 +++++++--------
 src/README.md            |  1 +
 src/learner/evaluation.h |  1 -
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 1e3b5c67f..2c0c5122f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,4 +1,4 @@
-Change Log of Versions
+Change Log
 =====
 
 xgboost-0.1
@@ -14,8 +14,8 @@ xgboost-0.2x
 xgboost-unity
 =====
 * Faster tree construction module
-  - Allows subsample columns as well during tree construction
+  - Allows subsample columns during tree construction via ```bst:col_samplebytree=ratio```
 * Support for boosting from initial predictions
 * Experimental version of LambdaRank
 * Linear booster is now parallelized, using parallel coordinated descent.
-* Add [Code Guide](src/README.md) for customizing objective function and 
+* Add [Code Guide](src/README.md) for customizing objective function and evaluation
diff --git a/README.md b/README.md
index 7b28e4499..73bcd88d0 100644
--- a/README.md
+++ b/README.md
@@ -21,14 +21,6 @@ Features
 * Layout of gradient boosting algorithm to support user defined objective
 * Python interface, works with numpy and scipy.sparse matrix
 
-Version
-======
-* This version is named xgboost-unity, the code has been refactored from 0.2x to be cleaner and more flexibility
-* This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
-  - This means the model and buffer file of previous version can not be loaded in xgboost-unity
-* For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22)
-* Change log in [CHANGES.md](CHANGES.md)
-
 Build
 ======
 * Simply type make
@@ -40,3 +32,10 @@ Build
   - In principle, you can put src/xgboost.cpp and src/io/io.cpp into the project, and build xgboost.
   - For python module, you need python/xgboost_wrapper.cpp and src/io/io.cpp to build a dll.
 
+Version
+======
+* This version is named xgboost-unity, the code has been refactored from 0.2x to be cleaner and more flexibility
+* This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
+  - This means the model and buffer file of previous version can not be loaded in xgboost-unity
+* For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22)
+* Change log in [CHANGES.md](CHANGES.md)
diff --git a/src/README.md b/src/README.md
index 35d9b08e8..5bfc3722a 100644
--- a/src/README.md
+++ b/src/README.md
@@ -1,5 +1,6 @@
 Coding Guide
 ======
+This file is intended to be notes about code structure in xgboost
 
 Project Logical Layout
 =======
diff --git a/src/learner/evaluation.h b/src/learner/evaluation.h
index 02fb8fdf8..966499965 100644
--- a/src/learner/evaluation.h
+++ b/src/learner/evaluation.h
@@ -43,7 +43,6 @@ inline IEvaluator* CreateEvaluator(const char *name) {
   if (!strncmp(name, "ams@", 4)) return new EvalAMS(name);
   if (!strncmp(name, "pre@", 4)) return new EvalPrecision(name);
   if (!strncmp(name, "pratio@", 7)) return new EvalPrecisionRatio(name);
-  if (!strncmp(name, "apratio@", 8)) return new EvalPrecisionRatio(name);
   if (!strncmp(name, "map", 3)) return new EvalMAP(name);
   if (!strncmp(name, "ndcg", 3)) return new EvalNDCG(name);
   utils::Error("unknown evaluation metric type: %s", name);

From 58cda4d7080ba39e6a502e25e4e8024956942454 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 22 Aug 2014 19:53:52 -0700
Subject: [PATCH 52/52] ok

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 73bcd88d0..340ca4a91 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
 
 Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.com/tqchen/xgboost/issues?q=is%3Aissue+label%3Aquestion)
 
-Notes on the Code: [Code Guide](src/README.md)
+Notes on the Code: [Code Guide](src)
 
 Features
 ======