Merge branch 'master' of ssh://github.com/tqchen/xgboost

save name_obj from now
Update README.md
2014-08-15 13:36:56 -07:00 · 2014-08-15 13:36:19 -07:00 · 2014-08-12 14:57:28 -07:00 · 2014-08-12 14:57:05 -07:00 · 2014-08-12 14:56:51 -07:00 · 2014-08-12 14:56:12 -07:00
58 changed files with 12394 additions and 1647 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,10 @@
 *buffer
 *model
 xgboost
 *pyc
 *train
 *test
 *group
 *rar
 *vali
 *data
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-Copyright (c) 2014 Tianqi Chen
+Copyright (c) 2014 by Tianqi Chen and Contributors 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
--- a/3
+++ b/3
@@ -10,7 +10,8 @@ OBJ =
 all: $(BIN) $(OBJ)
 export LDFLAGS= -pthread -lm 
-xgboost: regression/xgboost_reg_main.cpp regression/*.h booster/*.h booster/*/*.hpp booster/*.hpp
+xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h regrank/*.hpp booster/*.h booster/*/*.hpp booster/*.hpp
 $(BIN) : 
 	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
--- a/README.md
+++ b/README.md
@@ -1,20 +1,23 @@
 xgboost: eXtreme Gradient Boosting 
 =======
-A General purpose gradient boosting (tree) library.
+An optimized general purpose gradient boosting (tree) library.
-Authors:
+Contributors: https://github.com/tqchen/xgboost/graphs/contributors
 * Tianqi Chen, project creater
 * Kailong Chen, contributes regression module
 Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
 Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.com/tqchen/xgboost/issues?q=is%3Aissue+label%3Aquestion)
 Features
 =======
 * Sparse feature format:
  - Sparse feature format allows easy handling of missing values, and improve computation efficiency.
 * Push the limit on single machine:
  - Efficient implementation that optimizes memory and computation.
-* Layout of gradient boosting algorithm to support generic tasks, see project wiki.
+* Speed: XGBoost is very fast
  - IN [demo/higgs/speedtest.py](demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
 * Layout of gradient boosting algorithm to support user defined objective
 * Python interface, works with numpy and scipy.sparse matrix
 Supported key components
 =======
@@ -33,6 +36,12 @@ Planned components
    - matrix factorization
    - structured prediction
 Build
 ======
 * Simply type make
 * If your compiler does not come with OpenMP support, it will fire an warning telling you that the code will compile into single thread mode, and you will get single thread xgboost
  - You may get a error: -lgomp is not found, you can remove -fopenmp flag in Makefile to get single thread xgboost, or upgrade your compiler to compile multi-thread version
 File extension convention
 =======
 * .h are interface, utils and data structures, with detailed comment; 
--- a/booster/tree/xgboost_svdf_tree.hpp
+++ b/booster/tree/xgboost_svdf_tree.hpp
@@ -49,9 +49,8 @@ namespace xgboost{
            };
        private:
            Entry best_entry;
            const TreeParamTrain &param;
        public:
-            RTSelecter( const TreeParamTrain &p ):param( p ){
+            RTSelecter( void ){
                memset( &best_entry, 0, sizeof(best_entry) );
                best_entry.loss_chg = 0.0f;
            }
@@ -211,7 +210,7 @@ namespace xgboost{
                                         const SCEntry *entry, size_t start, size_t end, 
                                         int findex, float parent_base_weight ){
                // local selecter
-                RTSelecter slocal( param );
+                RTSelecter slocal;
                if( param.need_forward_search() ){
                    // forward process, default right
@@ -320,7 +319,7 @@ namespace xgboost{
                // after this point, tmp_rptr and entry is ready to use
                // global selecter
-                RTSelecter sglobal( param );
+                RTSelecter sglobal;
                // gain root 
                const double root_gain = param.CalcRootGain( rsum_grad, rsum_hess );
                // KEY: layerwise, weight of current node if it is leaf
--- a/booster/xgboost_data.h
+++ b/booster/xgboost_data.h
@@ -290,6 +290,7 @@ namespace xgboost{
                }
                // sort columns
                unsigned ncol = static_cast<unsigned>(this->NumCol());
                #pragma omp parallel for schedule(static)
                for (unsigned i = 0; i < ncol; i++){
                    std::sort(&col_data_[col_ptr_[i]], &col_data_[col_ptr_[i + 1]], REntry::cmp_fvalue);
                }
@@ -320,6 +321,8 @@ namespace xgboost{
                fi.Read(&col_access, sizeof(int));
                if (col_access != 0){
                    FMatrixS::LoadBinary(fi, col_ptr_, col_data_);
                }else{
                    this->InitData();                    
                }
            }
            /*!
@@ -371,14 +374,14 @@ namespace xgboost{
                size_t nrow;
                utils::Assert(fi.Read(&nrow, sizeof(size_t)) != 0, "Load FMatrixS");
                ptr.resize(nrow + 1);
-                utils::Assert( fi.Read( &ptr[0], ptr.size() * sizeof(size_t) ), "Load FMatrixS" );
+                utils::Assert(fi.Read(&ptr[0], ptr.size() * sizeof(size_t)) != 0, "Load FMatrixS");
                data.resize(ptr.back());
                if (data.size() != 0){
-                    utils::Assert( fi.Read( &data[0] , data.size() * sizeof(REntry) ) , "Load FMatrixS" );
+                    utils::Assert(fi.Read(&data[0], data.size() * sizeof(REntry)) != 0, "Load FMatrixS");
                }
            }
-        protected:
+        public:
            /*! \brief row pointer of CSR sparse storage */
            std::vector<size_t>  row_ptr_;
            /*! \brief data in the row */
--- a/booster/xgboost_gbmbase.h
+++ b/booster/xgboost_gbmbase.h
@@ -88,8 +88,8 @@ namespace xgboost{
                    }
                }
                if (mparam.num_pbuffer != 0){
-                    pred_buffer.resize ( mparam.num_pbuffer );
+                    pred_buffer.resize(mparam.PredBufferSize());
-                    pred_counter.resize( mparam.num_pbuffer );
+                    pred_counter.resize(mparam.PredBufferSize());
                    utils::Assert(fi.Read(&pred_buffer[0], pred_buffer.size()*sizeof(float)) != 0);
                    utils::Assert(fi.Read(&pred_counter[0], pred_counter.size()*sizeof(unsigned)) != 0);
                }
@@ -117,8 +117,8 @@ namespace xgboost{
             */
            inline void InitModel(void){
                pred_buffer.clear(); pred_counter.clear();
-                pred_buffer.resize ( mparam.num_pbuffer, 0.0 );
+                pred_buffer.resize(mparam.PredBufferSize(), 0.0);
-                pred_counter.resize( mparam.num_pbuffer, 0 );
+                pred_counter.resize(mparam.PredBufferSize(), 0);
                utils::Assert(mparam.num_boosters == 0);
                utils::Assert(boosters.size() == 0);
            }
@@ -130,6 +130,7 @@ namespace xgboost{
                if (tparam.nthread != 0){
                    omp_set_num_threads(tparam.nthread);
                }
                if (mparam.num_booster_group == 0) mparam.num_booster_group = 1;
                // make sure all the boosters get the latest parameters
                for (size_t i = 0; i < this->boosters.size(); i++){
                    this->ConfigBooster(this->boosters[i]);
@@ -175,12 +176,14 @@ namespace xgboost{
             * \param feats features of each instance
             * \param root_index pre-partitioned root index of each instance,
             *          root_index.size() can be 0 which indicates that no pre-partition involved
             * \param bst_group which booster group it belongs to, by default, we only have 1 booster group, and leave this parameter as default
             */
            inline void DoBoost(std::vector<float> &grad,
                                std::vector<float> &hess,
                                const booster::FMatrixS &feats,
-                                 const std::vector<unsigned> &root_index ) {
+                                const std::vector<unsigned> &root_index,
-                booster::IBooster *bst = this->GetUpdateBooster();
+                                int bst_group = 0 ) {
                booster::IBooster *bst = this->GetUpdateBooster( bst_group );
                bst->DoBoost(grad, hess, feats, root_index);
            }
            /*!
@@ -190,29 +193,42 @@ namespace xgboost{
             * \param row_index  row index in the feature matrix
             * \param buffer_index the buffer index of the current feature line, default -1 means no buffer assigned
             * \param root_index root id of current instance, default = 0
             * \param bst_group booster group index 
             * \return prediction
             */
-            inline float Predict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){
+            inline float Predict(const FMatrixS &feats, bst_uint row_index, 
-                size_t istart = 0;
+                                 int buffer_index = -1, unsigned root_index = 0, int bst_group = 0 ){
                size_t itop = 0;
                float  psum = 0.0f;
                const int bid = mparam.BufferOffset(buffer_index, bst_group);
                // load buffered results if any
-                if( mparam.do_reboost == 0 && buffer_index >= 0 ){
+                if (mparam.do_reboost == 0 && bid >= 0){
-                    utils::Assert( buffer_index < mparam.num_pbuffer, "buffer index exceed num_pbuffer" );
+                    itop = this->pred_counter[bid];
-                    istart = this->pred_counter[ buffer_index ];
+                    psum = this->pred_buffer[bid];
                    psum   = this->pred_buffer [ buffer_index ];
                }
-                for( size_t i = istart; i < this->boosters.size(); i ++ ){
+                for (size_t i = itop; i < this->boosters.size(); ++i ){
                    if( booster_info[i] == bst_group ){
                        psum += this->boosters[i]->Predict(feats, row_index, root_index);
                    }
                }
                // updated the buffered results
-                if( mparam.do_reboost == 0 && buffer_index >= 0 ){
+                if (mparam.do_reboost == 0 && bid >= 0){
-                    this->pred_counter[ buffer_index ] = static_cast<unsigned>( boosters.size() );
+                    this->pred_counter[bid] = static_cast<unsigned>(boosters.size());
-                    this->pred_buffer [ buffer_index ] = psum;
+                    this->pred_buffer[bid] = psum;
                }
                return psum;
            }
            /*! \return number of boosters so far */
            inline int NumBoosters(void) const{
                return mparam.num_boosters;
            }
            /*! \return number of booster groups */
            inline int NumBoosterGroup(void) const{
                if( mparam.num_booster_group == 0 ) return 1;
                return mparam.num_booster_group;
            }
        public:
            //--------trial code for interactive update an existing booster------
            //-------- usually not needed, ignore this region ---------
@@ -220,14 +236,17 @@ namespace xgboost{
             * \brief same as Predict, but removes the prediction of booster to be updated
             *        this function must be called once and only once for every data with pbuffer
             */
-            inline float InteractPredict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){
+            inline float InteractPredict(const FMatrixS &feats, bst_uint row_index, 
                                         int buffer_index = -1, unsigned root_index = 0, int bst_group = 0){
                float psum = this->Predict(feats, row_index, buffer_index, root_index);
                if (tparam.reupdate_booster != -1){
                    const int bid = tparam.reupdate_booster;
                    utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
                    if( bst_group == booster_info[bid] ){
                        psum -= boosters[bid]->Predict(feats, row_index, root_index);
                    }
                    if (mparam.do_reboost == 0 && buffer_index >= 0){
-                        this->pred_buffer[ buffer_index ] = psum;
+                        this->pred_buffer[mparam.BufferOffset(buffer_index,bst_group)] = psum;
                    }
                }
                return psum;
@@ -243,14 +262,20 @@ namespace xgboost{
                }
                boosters.resize(mparam.num_boosters -= 1);
                booster_info.resize(boosters.size());                
                // update pred counter
                for( size_t i = 0; i < pred_counter.size(); ++ i ){
                    if( pred_counter[i] > (unsigned)bid ) pred_counter[i] -= 1;                    
                }
            }
            /*! \brief update the prediction buffer, after booster have been updated */
-            inline void InteractRePredict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){
+            inline void InteractRePredict(const FMatrixS &feats, bst_uint row_index, 
                                          int buffer_index = -1, unsigned root_index = 0, int bst_group = 0 ){
                if (tparam.reupdate_booster != -1){
                    const int bid = tparam.reupdate_booster;
                    if( booster_info[bid]  != bst_group ) return;
                    utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
                    if (mparam.do_reboost == 0 && buffer_index >= 0){
-                        this->pred_buffer[ buffer_index ] += boosters[ bid ]->Predict( feats, row_index, root_index );
+                        this->pred_buffer[mparam.BufferOffset(buffer_index,bst_group)] += boosters[bid]->Predict(feats, row_index, root_index);
                    }
                }
            }
@@ -274,21 +299,23 @@ namespace xgboost{
             * \brief get a booster to update
             * \return the booster created
             */
-            inline booster::IBooster *GetUpdateBooster( void ){
+            inline booster::IBooster *GetUpdateBooster(int bst_group){
                if (tparam.reupdate_booster != -1){
                    const int bid = tparam.reupdate_booster;
                    utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
                    this->ConfigBooster(boosters[bid]);
                    utils::Assert( bst_group == booster_info[bid], "booster group must match existing reupdate booster");
                    return boosters[bid];
                }
                if (mparam.do_reboost == 0 || boosters.size() == 0){
                    mparam.num_boosters += 1;
                    boosters.push_back(booster::CreateBooster<FMatrixS>(mparam.booster_type));
-                    booster_info.push_back( 0 );
+                    booster_info.push_back(bst_group);
                    this->ConfigBooster(boosters.back());
                    boosters.back()->InitModel();
-                }else{
+                }
                else{
                    this->ConfigBooster(boosters.back());
                }
                return boosters.back();
@@ -311,8 +338,13 @@ namespace xgboost{
                 *        set to 1 for linear booster, so that regularization term can be considered
                 */
                int do_reboost;
                /*! 
                 * \brief number of booster group, how many predictions a single 
                 *        input instance could corresponds to
                 */
                int num_booster_group;
                /*! \brief reserved parameters */
-                int reserved[ 32 ];
+                int reserved[31];
                /*! \brief constructor */
                ModelParam(void){
                    num_boosters = 0;
@@ -320,6 +352,7 @@ namespace xgboost{
                    num_roots = num_feature = 0;
                    do_reboost = 0;
                    num_pbuffer = 0;
                    num_booster_group = 1;
                    memset(reserved, 0, sizeof(reserved));
                }
                /*!
@@ -335,9 +368,20 @@ namespace xgboost{
                    }
                    if (!strcmp("num_pbuffer", name))       num_pbuffer = atoi(val);
                    if (!strcmp("do_reboost", name))        do_reboost = atoi(val);
                    if (!strcmp("num_booster_group", name)) num_booster_group = atoi(val);
                    if (!strcmp("bst:num_roots", name))     num_roots = atoi(val);
                    if (!strcmp("bst:num_feature", name))   num_feature = atoi(val);
                }
                inline int PredBufferSize(void) const{
                    if (num_booster_group == 0) return num_pbuffer;
                    else return num_booster_group * num_pbuffer;
                }
                inline int BufferOffset( int buffer_index, int bst_group ) const{
                    if( buffer_index < 0 ) return -1;
                    utils::Assert( buffer_index < num_pbuffer, "buffer_indexexceed num_pbuffer" ); 
                    return buffer_index + num_pbuffer * bst_group;
                }
            };
            /*! \brief training parameters */
            struct TrainParam{
--- a/demo/binary_classification/mapfeat.py
+++ b/demo/binary_classification/mapfeat.py
@@ -24,7 +24,7 @@ def loadfmap( fname ):
    return fmap, nmap
 def write_nmap( fo, nmap ):    
-    for i in xrange( len(nmap) ):
+    for i in range( len(nmap) ):
        fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
 # start here
@@ -41,7 +41,7 @@ for l in open( 'agaricus-lepiota.data' ):
    else:
        assert arr[0] == 'e'
        fo.write('0')
-    for i in xrange( 1,len(arr) ):
+    for i in range( 1,len(arr) ):
        fo.write( ' %d:1' % fmap[i][arr[i].strip()] )
    fo.write('\n')
--- a/demo/binary_classification/mknfold.py
+++ b/demo/binary_classification/mknfold.py
@@ -3,7 +3,7 @@ import sys
 import random
 if len(sys.argv) < 2:
-    print 'Usage:<filename> <k> [nfold = 5]'
+    print ('Usage:<filename> <k> [nfold = 5]')
    exit(0)
 random.seed( 10 )
--- a/demo/binary_classification/mushroom.conf
+++ b/demo/binary_classification/mushroom.conf
@@ -2,7 +2,7 @@
 # choose the tree booster, 0: tree, 1: linear
 booster_type = 0 
 # choose logistic regression loss function for binary classification
-loss_type = 2 
+objective = binary:logistic
 # Tree Booster Parameters
 # step size shrinkage
@@ -23,5 +23,7 @@ save_period = 0
 data = "agaricus.txt.train" 
 # The path of validation data, used to monitor training process, here [test] sets name of the validation set
 eval[test] = "agaricus.txt.test" 
 # evaluate on training data as well each round
 eval_train = 1
 # The path of test data 
 test:data = "agaricus.txt.test"      
--- a/demo/kaggle-higgs/README.md
+++ b/demo/kaggle-higgs/README.md
@@ -0,0 +1,19 @@
 Guide for Kaggle Higgs Challenge
 =====
 This is the folder giving example of how to use XGBoost Python Module  to run Kaggle Higgs competition
 This script will achieve about 3.600 AMS score in public leadboard. To get start, you need do following step:
 1. Compile the XGBoost python lib
 ```bash
 cd ../../python
 make
 ```
 2. Put training.csv test.csv on folder './data' (you can create a symbolic link)
 3. Run ./run.sh
 Speed
 =====
 speedtest.py compares xgboost's speed on this dataset with sklearn.GBM
--- a/demo/kaggle-higgs/higgs-numpy.py
+++ b/demo/kaggle-higgs/higgs-numpy.py
@@ -0,0 +1,62 @@
 #!/usr/bin/python
 # this is the example script to use xgboost to train 
 import inspect
 import os
 import sys
 import numpy as np
 # add path of xgboost python module
 code_path = os.path.join(
    os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../python")
 sys.path.append(code_path)
 import xgboost as xgb
 test_size = 550000
 # path to where the data lies
 dpath = 'data'
 # load in training data, directly use numpy
 dtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s'.encode('utf-8')) } )
 print ('finish loading from csv ')
 label  = dtrain[:,32]
 data   = dtrain[:,1:31]
 # rescale weight to make it same as test set
 weight = dtrain[:,31] * float(test_size) / len(label)
 sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
 sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )
 # print weight statistics 
 print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
 # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
 xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
 # setup parameters for xgboost
 param = {}
 # use logistic regression loss, use raw prediction before logistic transformation
 # since we only need the rank
 param['objective'] = 'binary:logitraw'
 # scale weight of positive examples
 param['scale_pos_weight'] = sum_wneg/sum_wpos
 param['bst:eta'] = 0.1 
 param['bst:max_depth'] = 6
 param['eval_metric'] = 'auc'
 param['silent'] = 1
 param['nthread'] = 16
 # you can directly throw param in, though we want to watch multiple metrics here 
 plst = list(param.items())+[('eval_metric', 'ams@0.15')]
 watchlist = [ (xgmat,'train') ]
 # boost 120 tres
 num_round = 120
 print ('loading data end, start to boost trees')
 bst = xgb.train( plst, xgmat, num_round, watchlist );
 # save out model
 bst.save_model('higgs.model')
 print ('finish training')
--- a/demo/kaggle-higgs/higgs-pred.py
+++ b/demo/kaggle-higgs/higgs-pred.py
@@ -0,0 +1,54 @@
 #!/usr/bin/python
 # make prediction 
 import sys
 import numpy as np
 # add path of xgboost python module
 sys.path.append('../../python/')
 import xgboost as xgb
 # path to where the data lies
 dpath = 'data'
 modelfile = 'higgs.model'
 outfile = 'higgs.pred.csv'
 # make top 15% as positive 
 threshold_ratio = 0.15
 # load in training data, directly use numpy
 dtest = np.loadtxt( dpath+'/test.csv', delimiter=',', skiprows=1 )
 data   = dtest[:,1:31]
 idx = dtest[:,0]
 print ('finish loading from csv ')
 xgmat = xgb.DMatrix( data, missing = -999.0 )
 bst = xgb.Booster({'nthread':16})
 bst.load_model( modelfile )
 ypred = bst.predict( xgmat )
 res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] 
 rorder = {}
 for k, v in sorted( res, key = lambda x:-x[1] ):
    rorder[ k ] = len(rorder) + 1
 # write out predictions
 ntop = int( threshold_ratio * len(rorder ) )
 fo = open(outfile, 'w')
 nhit = 0
 ntot = 0
 fo.write('EventId,RankOrder,Class\n')
 for k, v in res:        
    if rorder[k] <= ntop:
        lb = 's'
        nhit += 1
    else:
        lb = 'b'        
    # change output rank order to follow Kaggle convention
    fo.write('%s,%d,%s\n' % ( k,  len(rorder)+1-rorder[k], lb ) )
    ntot += 1
 fo.close()
 print ('finished writing into prediction file')
--- a/demo/kaggle-higgs/run.sh
+++ b/demo/kaggle-higgs/run.sh
@@ -0,0 +1,14 @@
 #!/bin/bash
 python -u higgs-numpy.py
 ret=$?
 if [[ $ret != 0 ]]; then
    echo "ERROR in higgs-numpy.py"
    exit $ret
 fi
 python -u higgs-pred.py
 ret=$?
 if [[ $ret != 0 ]]; then
    echo "ERROR in higgs-pred.py"
    exit $ret
 fi
--- a/demo/kaggle-higgs/speedtest.py
+++ b/demo/kaggle-higgs/speedtest.py
@@ -0,0 +1,66 @@
 #!/usr/bin/python
 # this is the example script to use xgboost to train
 import sys
 import numpy as np
 # add path of xgboost python module
 sys.path.append('../../python/')
 import xgboost as xgb
 from sklearn.ensemble import GradientBoostingClassifier
 import time
 test_size = 550000
 # path to where the data lies
 dpath = 'data'
 # load in training data, directly use numpy
 dtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s') } )
 print ('finish loading from csv ')
 label  = dtrain[:,32]
 data   = dtrain[:,1:31]
 # rescale weight to make it same as test set
 weight = dtrain[:,31] * float(test_size) / len(label)
 sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
 sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )
 # print weight statistics
 print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
 # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
 xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
 # setup parameters for xgboost
 param = {}
 # use logistic regression loss
 param['objective'] = 'binary:logitraw'
 # scale weight of positive examples
 param['scale_pos_weight'] = sum_wneg/sum_wpos
 param['bst:eta'] = 0.1
 param['bst:max_depth'] = 6
 param['eval_metric'] = 'auc'
 param['silent'] = 1
 param['nthread'] = 4
 plst = param.items()+[('eval_metric', 'ams@0.15')]
 watchlist = [ (xgmat,'train') ]
 # boost 10 tres
 num_round = 10
 print ('loading data end, start to boost trees')
 print ("training GBM from sklearn")
 tmp = time.time()
 gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
 gbm.fit(data, label)
 print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
 #raw_input()
 print ("training xgboost")
 threads = [1, 2, 4, 16]
 for i in threads:
    param['nthread'] = i
    tmp = time.time()
    plst = param.items()+[('eval_metric', 'ams@0.15')]
    bst = xgb.train( plst, xgmat, num_round, watchlist );
    print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))
 print ('finish training')
--- a/demo/multiclass_classification/README.md
+++ b/demo/multiclass_classification/README.md
@@ -0,0 +1,10 @@
 Demonstrating how to use XGBoost accomplish Multi-Class classification task on [UCI Dermatology dataset](https://archive.ics.uci.edu/ml/datasets/Dermatology)
 Make sure you make make xgboost python module in ../../python
 1. Run runexp.sh
 ```bash
 ./runexp.sh
 ```
 Explainations can be found in [wiki](https://github.com/tqchen/xgboost/wiki)
--- a/demo/multiclass_classification/runexp.sh
+++ b/demo/multiclass_classification/runexp.sh
@@ -0,0 +1,9 @@
 #!/bin/bash
 if [ -f dermatology.data ]
 then
    echo "use existing data to run multi class classification"
 else
    echo "getting data from uci, make sure you are connected to internet"
    wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data
 fi
 python train.py
--- a/demo/multiclass_classification/train.py
+++ b/demo/multiclass_classification/train.py
@@ -0,0 +1,49 @@
 #! /usr/bin/python
 import sys
 import numpy as np
 sys.path.append('../../python/')
 import xgboost as xgb
 # label need to be 0 to num_class -1
 data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1 } )
 sz = data.shape
 train = data[:int(sz[0] * 0.7), :]
 test = data[int(sz[0] * 0.7):, :]
 train_X = train[:,0:33]
 train_Y = train[:, 34]
 test_X = test[:,0:33]
 test_Y = test[:, 34]
 xg_train = xgb.DMatrix( train_X, label=train_Y)
 xg_test = xgb.DMatrix(test_X, label=test_Y)
 # setup parameters for xgboost
 param = {}
 # use softmax multi-class classification
 param['objective'] = 'multi:softmax'
 # scale weight of positive examples
 param['bst:eta'] = 0.1
 param['bst:max_depth'] = 6
 param['silent'] = 1
 param['nthread'] = 4
 param['num_class'] = 6
 watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
 num_round = 5
 bst = xgb.train(param, xg_train, num_round, watchlist );
 # get prediction
 pred = bst.predict( xg_test );
 print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
 # do the same thing again, but output probabilities
 param['objective'] = 'multi:softprob'
 bst = xgb.train(param, xg_train, num_round, watchlist );
 # get prediction, this is in 1D array, need reshape to (nclass, ndata)
 yprob = bst.predict( xg_test ).reshape( 6, test_Y.shape[0] )
 ylabel = np.argmax( yprob, axis=0)
 print ('predicting, classification error=%f' % (sum( int(ylabel[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
--- a/demo/rank/README
+++ b/demo/rank/README
@@ -0,0 +1,13 @@
 Instructions:
 The dataset for ranking demo is from LETOR04 MQ2008 fold1, 
 You can use the following command to run the example
 Get the data: ./wgetdata.sh
 Run the example: ./runexp.sh
--- a/demo/rank/mq2008.conf
+++ b/demo/rank/mq2008.conf
@@ -0,0 +1,30 @@
 # General Parameters, see comment for each definition
 # choose the tree booster, 0: tree, 1: linear
 booster_type = 0 
 # specify objective
 objective="rank:pairwise"
 # Tree Booster Parameters
 # step size shrinkage
 bst:eta = 0.1 
 # minimum loss reduction required to make a further partition
 bst:gamma = 1.0 
 # minimum sum of instance weight(hessian) needed in a child
 bst:min_child_weight = 0.1
 # maximum depth of a tree
 bst:max_depth = 6
 # Task parameters
 # the number of round to do boosting
 num_round = 4
 # 0 means do not save any model except the final round model
 save_period = 0 
 # The path of training data
 data = "mq2008.train" 
 # The path of validation data, used to monitor training process, here [test] sets name of the validation set
 eval[test] = "mq2008.vali" 
 # The path of test data 
 test:data = "mq2008.test"      
--- a/demo/rank/runexp.sh
+++ b/demo/rank/runexp.sh
@@ -0,0 +1,11 @@
 python trans_data.py train.txt mq2008.train mq2008.train.group
 python trans_data.py test.txt mq2008.test mq2008.test.group
 python trans_data.py vali.txt mq2008.vali mq2008.vali.group
 ../../xgboost mq2008.conf
 ../../xgboost mq2008.conf task=pred model_in=0004.model
--- a/demo/rank/trans_data.py
+++ b/demo/rank/trans_data.py
@@ -0,0 +1,41 @@
 import sys
 def save_data(group_data,output_feature,output_group):
    if len(group_data) == 0:
        return
    output_group.write(str(len(group_data))+"\n")
    for data in group_data:
        # only include nonzero features
        feats = [ p for p in data[2:] if float(p.split(':')[1]) != 0.0 ]        
        output_feature.write(data[0] + " " + " ".join(feats) + "\n")
 if __name__ == "__main__":
    if len(sys.argv) != 4:
        print ("Usage: python trans_data.py [Ranksvm Format Input] [Output Feature File] [Output Group File]")
        sys.exit(0)
    fi = open(sys.argv[1])
    output_feature = open(sys.argv[2],"w")
    output_group = open(sys.argv[3],"w")
    group_data = []
    group = ""
    for line in fi:
        if not line:
            break
        if "#" in line:
            line = line[:line.index("#")]
        splits = line.strip().split(" ")
        if splits[1] != group:
            save_data(group_data,output_feature,output_group)
            group_data = []
        group = splits[1]
        group_data.append(splits)
    save_data(group_data,output_feature,output_group)
    fi.close()
    output_feature.close()
    output_group.close()
--- a/demo/rank/wgetdata.sh
+++ b/demo/rank/wgetdata.sh
@@ -0,0 +1,4 @@
 #!/bin/bash
 wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar
 unrar x MQ2008.rar
 mv -f MQ2008/Fold1/*.txt .
--- a/demo/regression/machine.conf
+++ b/demo/regression/machine.conf
@@ -1,9 +1,9 @@
 # General Parameters, see comment for each definition
 # choose the tree booster, 0: tree, 1: linear
 booster_type = 0 
-# this is the only difference with classification, use 0: linear regression
+# this is the only difference with classification, use reg:linear to do linear classification
-# when labels are in [0,1] we can also use 1: logistic regression
+# when labels are in [0,1] we can also use reg:logistic
-loss_type = 0
+objective = reg:linear
 # Tree Booster Parameters
 # step size shrinkage
--- a/demo/regression/mapfeat.py
+++ b/demo/regression/mapfeat.py
@@ -7,7 +7,7 @@ fmap = {}
 for l in open( 'machine.data' ):
    arr = l.split(',')
    fo.write(arr[8])
-    for i in xrange( 0,6 ):
+    for i in range( 0,6 ):
        fo.write( ' %d:%s' %(i,arr[i+2]) )
    if arr[0] not in fmap:
@@ -24,9 +24,9 @@ fo = open('featmap.txt', 'w')
 # list from machine.names
 names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ]; 
-for i in xrange(0,6):
+for i in range(0,6):
    fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
-for v, k in sorted( fmap.iteritems(), key = lambda x:x[1] ):
+for v, k in sorted( fmap.items(), key = lambda x:x[1] ):
    fo.write( '%d\tvendor=%s\ti\n' % (k, v))
 fo.close()
--- a/demo/regression/mknfold.py
+++ b/demo/regression/mknfold.py
@@ -3,7 +3,7 @@ import sys
 import random
 if len(sys.argv) < 2:
-    print 'Usage:<filename> <k> [nfold = 5]'
+    print ('Usage:<filename> <k> [nfold = 5]')
    exit(0)
 random.seed( 10 )
--- a/python/Makefile
+++ b/python/Makefile
@@ -0,0 +1,26 @@
 export CC  = gcc
 export CXX = g++
 export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas  -fopenmp
 # specify tensor path
 SLIB = libxgboostpy.so
 .PHONY: clean all
 all: $(SLIB)
 export LDFLAGS= -pthread -lm 
 libxgboostpy.so: xgboost_python.cpp ../regrank/*.h ../booster/*.h ../booster/*/*.hpp ../booster/*.hpp
 $(SLIB) :
 	$(CXX) $(CFLAGS) -fPIC $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)
 $(BIN) : 
 	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
 $(OBJ) : 
 	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
 install:
 	cp -f -r $(BIN)  $(INSTALL_PATH)
 clean:
 	$(RM) $(OBJ) $(BIN) $(SLIB) *~
--- a/python/README.md
+++ b/python/README.md
@@ -0,0 +1,3 @@
 python wrapper for xgboost using ctypes
 see example for usage
--- a/python/example/README.md
+++ b/python/example/README.md
@@ -0,0 +1,3 @@
 example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format
 for usage: see demo.py and comments in demo.py
--- a/python/example/agaricus.txt.test
+++ b/python/example/agaricus.txt.test
--- a/python/example/agaricus.txt.train
+++ b/python/example/agaricus.txt.train
--- a/python/example/demo.py
+++ b/python/example/demo.py
@@ -0,0 +1,96 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 import scipy.sparse
 # append the path to xgboost, you may need to change the following line
 sys.path.append('../')
 import xgboost as xgb
 ### simple example
 # load file from text file, also binary buffer generated by xgboost
 dtrain = xgb.DMatrix('agaricus.txt.train')
 dtest = xgb.DMatrix('agaricus.txt.test')
 # specify parameters via map, definition are same as c++ version
 param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
 # specify validations set to watch performance
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 num_round = 2
 bst = xgb.train( param, dtrain, num_round, evallist )
 # this is prediction
 preds = bst.predict( dtest )
 labels = dtest.get_label()
 print ('error=%f' % (  sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
 bst.save_model('0001.model')
 # dump model
 bst.dump_model('dump.raw.txt')
 # dump model with feature map
 bst.dump_model('dump.raw.txt','featmap.txt')
 ###
 # build dmatrix in python iteratively
 #
 print ('start running example of build DMatrix in python')
 dtrain = xgb.DMatrix()
 labels = []
 for l in open('agaricus.txt.train'):
    arr = l.split()
    labels.append( int(arr[0]))
    feats = []
    for it in arr[1:]:
        k,v = it.split(':')
        feats.append( (int(k), float(v)) )
    dtrain.add_row( feats )
 dtrain.set_label( labels )
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
 ###
 # build dmatrix from scipy.sparse
 print ('start running example of build DMatrix from scipy.sparse')
 labels = []
 row = []; col = []; dat = []
 i = 0
 for l in open('agaricus.txt.train'):
    arr = l.split()
    labels.append( int(arr[0]))
    for it in arr[1:]:
        k,v = it.split(':')
        row.append(i); col.append(int(k)); dat.append(float(v))
    i += 1
 csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
 dtrain = xgb.DMatrix( csr )
 dtrain.set_label(labels)
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
 print ('start running example of build DMatrix from numpy array')
 # NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
 npymat = csr.todense()
 dtrain = xgb.DMatrix( npymat )
 dtrain.set_label(labels)
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
 ###
 # advanced: cutomsized loss function, set loss_type to 0, so that predict get untransformed score
 # 
 print ('start running example to used cutomized objective function')
 # note: set objective= binary:logistic means the prediction will get logistic transformed
 #       in most case, we may want to leave it as default
 param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
 # user define objective function, given prediction, return gradient and second order gradient
 def logregobj( preds, dtrain ):
    labels = dtrain.get_label()
    grad = preds - labels
    hess = preds * (1.0-preds)
    return grad, hess
 # training with customized objective, we can also do step by step training, simply look at xgboost.py's implementation of train
 bst = xgb.train( param, dtrain, num_round, evallist, logregobj )
--- a/python/example/featmap.txt
+++ b/python/example/featmap.txt
@@ -0,0 +1,126 @@
 0	cap-shape=bell	i
 1	cap-shape=conical	i
 2	cap-shape=convex	i
 3	cap-shape=flat	i
 4	cap-shape=knobbed	i
 5	cap-shape=sunken	i
 6	cap-surface=fibrous	i
 7	cap-surface=grooves	i
 8	cap-surface=scaly	i
 9	cap-surface=smooth	i
 10	cap-color=brown	i
 11	cap-color=buff	i
 12	cap-color=cinnamon	i
 13	cap-color=gray	i
 14	cap-color=green	i
 15	cap-color=pink	i
 16	cap-color=purple	i
 17	cap-color=red	i
 18	cap-color=white	i
 19	cap-color=yellow	i
 20	bruises?=bruises	i
 21	bruises?=no	i
 22	odor=almond	i
 23	odor=anise	i
 24	odor=creosote	i
 25	odor=fishy	i
 26	odor=foul	i
 27	odor=musty	i
 28	odor=none	i
 29	odor=pungent	i
 30	odor=spicy	i
 31	gill-attachment=attached	i
 32	gill-attachment=descending	i
 33	gill-attachment=free	i
 34	gill-attachment=notched	i
 35	gill-spacing=close	i
 36	gill-spacing=crowded	i
 37	gill-spacing=distant	i
 38	gill-size=broad	i
 39	gill-size=narrow	i
 40	gill-color=black	i
 41	gill-color=brown	i
 42	gill-color=buff	i
 43	gill-color=chocolate	i
 44	gill-color=gray	i
 45	gill-color=green	i
 46	gill-color=orange	i
 47	gill-color=pink	i
 48	gill-color=purple	i
 49	gill-color=red	i
 50	gill-color=white	i
 51	gill-color=yellow	i
 52	stalk-shape=enlarging	i
 53	stalk-shape=tapering	i
 54	stalk-root=bulbous	i
 55	stalk-root=club	i
 56	stalk-root=cup	i
 57	stalk-root=equal	i
 58	stalk-root=rhizomorphs	i
 59	stalk-root=rooted	i
 60	stalk-root=missing	i
 61	stalk-surface-above-ring=fibrous	i
 62	stalk-surface-above-ring=scaly	i
 63	stalk-surface-above-ring=silky	i
 64	stalk-surface-above-ring=smooth	i
 65	stalk-surface-below-ring=fibrous	i
 66	stalk-surface-below-ring=scaly	i
 67	stalk-surface-below-ring=silky	i
 68	stalk-surface-below-ring=smooth	i
 69	stalk-color-above-ring=brown	i
 70	stalk-color-above-ring=buff	i
 71	stalk-color-above-ring=cinnamon	i
 72	stalk-color-above-ring=gray	i
 73	stalk-color-above-ring=orange	i
 74	stalk-color-above-ring=pink	i
 75	stalk-color-above-ring=red	i
 76	stalk-color-above-ring=white	i
 77	stalk-color-above-ring=yellow	i
 78	stalk-color-below-ring=brown	i
 79	stalk-color-below-ring=buff	i
 80	stalk-color-below-ring=cinnamon	i
 81	stalk-color-below-ring=gray	i
 82	stalk-color-below-ring=orange	i
 83	stalk-color-below-ring=pink	i
 84	stalk-color-below-ring=red	i
 85	stalk-color-below-ring=white	i
 86	stalk-color-below-ring=yellow	i
 87	veil-type=partial	i
 88	veil-type=universal	i
 89	veil-color=brown	i
 90	veil-color=orange	i
 91	veil-color=white	i
 92	veil-color=yellow	i
 93	ring-number=none	i
 94	ring-number=one	i
 95	ring-number=two	i
 96	ring-type=cobwebby	i
 97	ring-type=evanescent	i
 98	ring-type=flaring	i
 99	ring-type=large	i
 100	ring-type=none	i
 101	ring-type=pendant	i
 102	ring-type=sheathing	i
 103	ring-type=zone	i
 104	spore-print-color=black	i
 105	spore-print-color=brown	i
 106	spore-print-color=buff	i
 107	spore-print-color=chocolate	i
 108	spore-print-color=green	i
 109	spore-print-color=orange	i
 110	spore-print-color=purple	i
 111	spore-print-color=white	i
 112	spore-print-color=yellow	i
 113	population=abundant	i
 114	population=clustered	i
 115	population=numerous	i
 116	population=scattered	i
 117	population=several	i
 118	population=solitary	i
 119	habitat=grasses	i
 120	habitat=leaves	i
 121	habitat=meadows	i
 122	habitat=paths	i
 123	habitat=urban	i
 124	habitat=waste	i
 125	habitat=woods	i
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -0,0 +1,205 @@
 # Author: Tianqi Chen, Bing Xu
 # module for xgboost
 import ctypes 
 import os
 # optinally have scipy sparse, though not necessary
 import numpy
 import numpy.ctypeslib 
 import scipy.sparse as scp
 # set this line correctly
 XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostpy.so'
 # entry type of sparse matrix
 class REntry(ctypes.Structure):
    _fields_ = [("findex", ctypes.c_uint), ("fvalue", ctypes.c_float) ]
 # load in xgboost library
 xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
 xglib.XGDMatrixCreate.restype = ctypes.c_void_p
 xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
 xglib.XGDMatrixGetLabel.restype =  ctypes.POINTER( ctypes.c_float )
 xglib.XGDMatrixGetWeight.restype =  ctypes.POINTER( ctypes.c_float )
 xglib.XGDMatrixGetRow.restype = ctypes.POINTER( REntry )
 xglib.XGBoosterCreate.restype = ctypes.c_void_p
 xglib.XGBoosterPredict.restype = ctypes.POINTER( ctypes.c_float ) 
 def ctypes2numpy( cptr, length ):
    # convert a ctypes pointer array to numpy
    assert isinstance( cptr, ctypes.POINTER( ctypes.c_float ) )
    res = numpy.zeros( length, dtype='float32' )
    assert ctypes.memmove( res.ctypes.data, cptr, length * res.strides[0] )
    return res
 # data matrix used in xgboost
 class DMatrix:
    # constructor
    def __init__(self, data=None, label=None, missing=0.0, weight = None):
        # force into void_p, mac need to pass things in as void_p
        self.handle = ctypes.c_void_p( xglib.XGDMatrixCreate() )
        if data == None:
            return
        if isinstance(data,str):
            xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data.encode('utf-8')), 1)             
        elif isinstance(data,scp.csr_matrix):
            self.__init_from_csr(data)
        elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
            self.__init_from_npy2d(data, missing)
        else:
            try:
                csr = scp.csr_matrix(data)
                self.__init_from_csr(csr)
            except:
                raise Exception("can not intialize DMatrix from"+str(type(data)))
        if label != None:
            self.set_label(label)
        if weight !=None:
            self.set_weight(weight)
    # convert data from csr matrix
    def __init_from_csr(self,csr):
        assert len(csr.indices) == len(csr.data)
        xglib.XGDMatrixParseCSR( self.handle, 
                                 ( ctypes.c_ulong  * len(csr.indptr) )(*csr.indptr),
                                 ( ctypes.c_uint  * len(csr.indices) )(*csr.indices),
                                 ( ctypes.c_float * len(csr.data) )(*csr.data),
                                 len(csr.indptr), len(csr.data) )
    # convert data from numpy matrix
    def __init_from_npy2d(self,mat,missing):
        data = numpy.array( mat.reshape(mat.size), dtype='float32' )
        xglib.XGDMatrixParseMat( self.handle, 
                                 data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), 
                                 mat.shape[0], mat.shape[1], ctypes.c_float(missing) )
    # destructor
    def __del__(self):
        xglib.XGDMatrixFree(self.handle)    
    # load data from file 
    def load(self, fname, silent=True):
        xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
    # load data from file 
    def save_binary(self, fname, silent=True):
        xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
    # set label of dmatrix
    def set_label(self, label):
        xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) )
    # set group size of dmatrix, used for rank
    def set_group(self, group):
        xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group) )
    # set weight of each instances
    def set_weight(self, weight):
        xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_float*len(weight))(*weight), len(weight) )
    # get label from dmatrix
    def get_label(self):
        length = ctypes.c_ulong()
        labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length))
        return ctypes2numpy( labels, length.value );
    # get weight from dmatrix
    def get_weight(self):
        length = ctypes.c_ulong()
        weights = xglib.XGDMatrixGetWeight(self.handle, ctypes.byref(length))
        return ctypes2numpy( weights, length.value );
    # clear everything
    def clear(self):
        xglib.XGDMatrixClear(self.handle)
    def num_row(self):
        return xglib.XGDMatrixNumRow(self.handle)
    # append a row to DMatrix
    def add_row(self, row):
        xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row) )
    # get n-throw from DMatrix
    def __getitem__(self, ridx):
        length = ctypes.c_ulong()
        row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
        return [ (int(row[i].findex),row[i].fvalue) for i in range(length.value) ]
 class Booster:
    """learner class """
    def __init__(self, params={}, cache=[]):
        """ constructor, param: """    
        for d in cache:
            assert isinstance(d,DMatrix)
        dmats = ( ctypes.c_void_p  * len(cache) )(*[ d.handle for d in cache])
        self.handle = ctypes.c_void_p( xglib.XGBoosterCreate( dmats, len(cache) ) )
        self.set_param( {'seed':0} )
        self.set_param( params )
    def __del__(self):
        xglib.XGBoosterFree(self.handle) 
    def set_param(self, params, pv=None):
        if isinstance(params,dict):
            for k, v in params.items():
                xglib.XGBoosterSetParam(
                    self.handle, ctypes.c_char_p(k.encode('utf-8')), 
                    ctypes.c_char_p(str(v).encode('utf-8')))        
        elif isinstance(params,str) and pv != None:
            xglib.XGBoosterSetParam(
                self.handle, ctypes.c_char_p(params.encode('utf-8')),
                ctypes.c_char_p(str(pv).encode('utf-8')) )
        else:
            for k, v in params:
                xglib.XGBoosterSetParam(
                    self.handle, ctypes.c_char_p(k.encode('utf-8')),
                    ctypes.c_char_p(str(v).encode('utf-8')) )             
    def update(self, dtrain):
        """ update """
        assert isinstance(dtrain, DMatrix)
        xglib.XGBoosterUpdateOneIter( self.handle, dtrain.handle )
    def boost(self, dtrain, grad, hess, bst_group = -1):
        """ update """
        assert len(grad) == len(hess)
        assert isinstance(dtrain, DMatrix)
        xglib.XGBoosterBoostOneIter( self.handle, dtrain.handle,
                                     (ctypes.c_float*len(grad))(*grad),
                                     (ctypes.c_float*len(hess))(*hess),
                                     len(grad), bst_group )
    def update_interact(self, dtrain, action, booster_index=None):
        """ beta: update with specified action"""
        assert isinstance(dtrain, DMatrix)
        if booster_index != None:
            self.set_param('interact:booster_index', str(booster_index))
        xglib.XGBoosterUpdateInteract(
            self.handle, dtrain.handle, ctypes.c_char_p(str(action)) )
    def eval_set(self, evals, it = 0):
        for d in evals:
            assert isinstance(d[0], DMatrix)
            assert isinstance(d[1], str)
        dmats = ( ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
        evnames = ( ctypes.c_char_p * len(evals) )(
            *[ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
        xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) )
    def eval(self, mat, name = 'eval', it = 0 ):
        self.eval_set( [(mat,name)], it)
    def predict(self, data, bst_group = -1):
        length = ctypes.c_ulong()
        preds = xglib.XGBoosterPredict( self.handle, data.handle, ctypes.byref(length), bst_group)
        return ctypes2numpy( preds, length.value )
    def save_model(self, fname):
        """ save model to file """
        xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
    def load_model(self, fname):
        """load model from file"""
        xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
    def dump_model(self, fname, fmap=''):
        """dump model into text file"""
        xglib.XGBoosterDumpModel(
            self.handle, ctypes.c_char_p(fname.encode('utf-8')), 
            ctypes.c_char_p(fmap.encode('utf-8')))
 def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
    """ train a booster with given paramaters """
    bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
    if obj == None:
        for i in range(num_boost_round):
            bst.update( dtrain )
            if len(evals) != 0:
                bst.eval_set( evals, i )
    else:
        # try customized objective function
        for i in range(num_boost_round):
            pred = bst.predict( dtrain )
            grad, hess = obj( pred, dtrain )
            bst.boost( dtrain, grad, hess )
            if len(evals) != 0:
                bst.eval_set( evals, i )        
    return bst
--- a/python/xgboost_python.cpp
+++ b/python/xgboost_python.cpp
@@ -0,0 +1,297 @@
 // implementations in ctypes
 #include "xgboost_python.h"
 #include "../regrank/xgboost_regrank.h"
 #include "../regrank/xgboost_regrank_data.h"
 namespace xgboost{
    namespace python{
        class DMatrix: public regrank::DMatrix{
        public:
            // whether column is initialized
            bool init_col_;
        public:
            DMatrix(void){
                init_col_ = false;
            }            
            ~DMatrix(void){}
        public:            
            inline void Load(const char *fname, bool silent){
                this->CacheLoad(fname, silent);
                init_col_ = this->data.HaveColAccess();
            }
            inline void Clear( void ){
                this->data.Clear();
                this->info.labels.clear();
                this->info.weights.clear();
                this->info.group_ptr.clear();
            }
            inline size_t NumRow( void ) const{
                return this->data.NumRow();
            }
            inline void AddRow( const XGEntry *data, size_t len ){
                xgboost::booster::FMatrixS &mat = this->data;
                mat.row_data_.resize( mat.row_ptr_.back() + len );
                memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len );
                mat.row_ptr_.push_back( mat.row_ptr_.back() + len );
                init_col_ = false;
            }
            inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{
                const xgboost::booster::FMatrixS &mat = this->data;
                *len = mat.row_ptr_[ridx+1] - mat.row_ptr_[ridx];
                return &mat.row_data_[ mat.row_ptr_[ridx] ];
            }
            inline void ParseCSR( const size_t *indptr,
                                  const unsigned *indices,
                                  const float *data,
                                  size_t nindptr,
                                  size_t nelem ){
                xgboost::booster::FMatrixS &mat = this->data;
                mat.row_ptr_.resize( nindptr );
                memcpy( &mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr );
                mat.row_data_.resize( nelem );
                for( size_t i = 0; i < nelem; ++ i ){
                    mat.row_data_[i] = XGEntry(indices[i], data[i]);
                }
                this->data.InitData();
                this->init_col_ = true;
            }
            inline void ParseMat( const float *data,
                                  size_t nrow,
                                  size_t ncol,
                                  float  missing ){
                xgboost::booster::FMatrixS &mat = this->data;
                mat.Clear();
                for( size_t i = 0; i < nrow; ++i, data += ncol ){
                    size_t nelem = 0;
                    for( size_t j = 0; j < ncol; ++j ){
                        if( data[j] != missing ){                           
                            mat.row_data_.push_back( XGEntry(j, data[j]) );
                            ++ nelem;
                        }
                    }
                    mat.row_ptr_.push_back( mat.row_ptr_.back() + nelem );
                }
                this->data.InitData();
                this->init_col_ = true;
            }
            inline void SetLabel( const float *label, size_t len ){
                this->info.labels.resize( len );
                memcpy( &(this->info).labels[0], label, sizeof(float)*len );
            }
            inline void SetGroup( const unsigned *group, size_t len ){
                this->info.group_ptr.resize( len + 1 );
                this->info.group_ptr[0] = 0;
                for( size_t i = 0; i < len; ++ i ){
                    this->info.group_ptr[i+1] = this->info.group_ptr[i]+group[i];
                }
            }
            inline void SetWeight( const float *weight, size_t len ){
                this->info.weights.resize( len );
                memcpy( &(this->info).weights[0], weight, sizeof(float)*len );
            }
            inline const float* GetLabel( size_t* len ) const{
                *len = this->info.labels.size();
                return &(this->info.labels[0]);
            }
            inline const float* GetWeight( size_t* len ) const{
                *len = this->info.weights.size();
                return &(this->info.weights[0]);
            }
            inline void CheckInit(void){
                if(!init_col_){
                    this->data.InitData();
                    init_col_ = true;
                }
                utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
            }
        };
        class Booster: public xgboost::regrank::RegRankBoostLearner{
        private:
            bool init_trainer, init_model;
        public:
            Booster(const std::vector<regrank::DMatrix *> mats){
                silent = 1;
                init_trainer = false;
                init_model = false;
                this->SetCacheData(mats);
            }
            inline void CheckInit(void){
                if( !init_trainer ){
                    this->InitTrainer(); init_trainer = true;
                }
                if( !init_model ){
                    this->InitModel(); init_model = true;
                }
            }
            inline void LoadModel( const char *fname ){
                xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
                this->init_model = true;
            }
            inline void SetParam( const char *name, const char *val ){
                if( !strcmp( name, "seed" ) ) random::Seed(atoi(val));
                xgboost::regrank::RegRankBoostLearner::SetParam( name, val );
            }
            const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){
                this->CheckInit();
                this->Predict( this->preds_, dmat, bst_group );
                *len = this->preds_.size();
                return &this->preds_[0];
            }
            inline void BoostOneIter( const DMatrix &train, 
                                      float *grad, float *hess, size_t len, int bst_group ){
                this->grad_.resize( len ); this->hess_.resize( len );
                memcpy( &this->grad_[0], grad, sizeof(float)*len );
                memcpy( &this->hess_[0], hess, sizeof(float)*len );
                if( grad_.size() == train.Size() ){
                    if( bst_group < 0 ) bst_group = 0;
                    base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index, bst_group);
                }else{
                    utils::Assert( bst_group == -1, "must set bst_group to -1 to support all group boosting" );
                    int ngroup = base_gbm.NumBoosterGroup();
                    utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" );
                    std::vector<float> tgrad( train.Size() ), thess( train.Size() );
                    for( int g = 0; g < ngroup; ++ g ){
                        memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() );
                        memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() );
                        base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g );
                    }
                }                
            }
        };
    };
 };
 using namespace xgboost::python;
 extern "C"{
    void* XGDMatrixCreate( void ){
        return new DMatrix();
    }
    void XGDMatrixFree( void *handle ){
        delete static_cast<DMatrix*>(handle);
    }
    void XGDMatrixLoad( void *handle, const char *fname, int silent ){
        static_cast<DMatrix*>(handle)->Load(fname, silent!=0);
    }
    void XGDMatrixSaveBinary( void *handle, const char *fname, int silent ){
        static_cast<DMatrix*>(handle)->SaveBinary(fname, silent!=0);
    }
    void XGDMatrixParseCSR( void *handle, 
                            const size_t *indptr,
                            const unsigned *indices,
                            const float *data,
                            size_t nindptr,
                            size_t nelem ){
        static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
    }
    void XGDMatrixParseMat( void *handle, 
                            const float *data,
                            size_t nrow,
                            size_t ncol,
                            float  missing ){
      static_cast<DMatrix*>(handle)->ParseMat(data, nrow, ncol, missing);
    }
    void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
        static_cast<DMatrix*>(handle)->SetLabel(label,len);        
    }
    void XGDMatrixSetWeight( void *handle, const float *weight, size_t len ){
        static_cast<DMatrix*>(handle)->SetWeight(weight,len);        
    }
    void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len ){
        static_cast<DMatrix*>(handle)->SetGroup(group,len);        
    }
    const float* XGDMatrixGetLabel( const void *handle, size_t* len ){
        return static_cast<const DMatrix*>(handle)->GetLabel(len);
    }
    const float* XGDMatrixGetWeight( const void *handle, size_t* len ){
        return static_cast<const DMatrix*>(handle)->GetWeight(len);
    }
    void XGDMatrixClear(void *handle){
        static_cast<DMatrix*>(handle)->Clear();
    }
    void XGDMatrixAddRow( void *handle, const XGEntry *data, size_t len ){
        static_cast<DMatrix*>(handle)->AddRow(data, len);
    }
    size_t XGDMatrixNumRow(const void *handle){
        return static_cast<const DMatrix*>(handle)->NumRow();
    }
    const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len){
        return static_cast<DMatrix*>(handle)->GetRow(ridx, len);
    }
    // xgboost implementation
    void *XGBoosterCreate( void *dmats[], size_t len ){
        std::vector<xgboost::regrank::DMatrix*> mats;
        for( size_t i = 0; i < len; ++i ){
            DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
            dtr->CheckInit();
            mats.push_back( dtr );
        }
        return new Booster( mats );
    }
    void XGBoosterFree( void *handle ){
        delete  static_cast<Booster*>(handle);
    }
    void XGBoosterSetParam( void *handle, const char *name, const char *value ){
        static_cast<Booster*>(handle)->SetParam( name, value );
    }
    void XGBoosterUpdateOneIter( void *handle, void *dtrain ){
        Booster *bst = static_cast<Booster*>(handle);
        DMatrix *dtr = static_cast<DMatrix*>(dtrain);
        bst->CheckInit(); dtr->CheckInit(); 
        bst->UpdateOneIter( *dtr );
    }    
    void XGBoosterBoostOneIter( void *handle, void *dtrain, 
                                float *grad, float *hess, size_t len, int bst_group ){
        Booster *bst = static_cast<Booster*>(handle);
        DMatrix *dtr = static_cast<DMatrix*>(dtrain);
        bst->CheckInit(); dtr->CheckInit(); 
        bst->BoostOneIter( *dtr, grad, hess, len, bst_group );
    }      
    void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len ){
        Booster *bst = static_cast<Booster*>(handle);
        bst->CheckInit();
        std::vector<std::string> names;
        std::vector<const xgboost::regrank::DMatrix*> mats;
        for( size_t i = 0; i < len; ++i ){
            mats.push_back( static_cast<DMatrix*>(dmats[i]) );
            names.push_back( std::string( evnames[i]) );
        }
        bst->EvalOneIter( iter, mats, names, stderr );
    }
    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group ){
        return static_cast<Booster*>(handle)->Pred( *static_cast<DMatrix*>(dmat), len, bst_group );
    }
    void XGBoosterLoadModel( void *handle, const char *fname ){        
        static_cast<Booster*>(handle)->LoadModel( fname );        
    } 
    void XGBoosterSaveModel( const void *handle, const char *fname ){
        static_cast<const Booster*>(handle)->SaveModel( fname );
    }
    void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap ){
        using namespace xgboost::utils;
        FILE *fo = FopenCheck( fname, "w" );
        FeatMap featmap; 
        if( strlen(fmap) != 0 ){ 
            featmap.LoadText( fmap );
        }
        static_cast<Booster*>(handle)->DumpModel( fo, featmap, false );
        fclose( fo );
    }
    void XGBoosterUpdateInteract( void *handle, void *dtrain, const char *action ){
        Booster *bst = static_cast<Booster*>(handle);
        DMatrix *dtr = static_cast<DMatrix*>(dtrain);        
        bst->CheckInit(); dtr->CheckInit(); 
        std::string act( action );
        bst->UpdateInteract( act, *dtr );
    }
 };
--- a/python/xgboost_python.h
+++ b/python/xgboost_python.h
@@ -0,0 +1,209 @@
 #ifndef XGBOOST_PYTHON_H
 #define XGBOOST_PYTHON_H
 /*!
 * \file xgboost_python.h
 * \author Tianqi Chen
 * \brief python wrapper for xgboost, using ctypes, 
 *        hides everything behind functions
 *      use c style interface
 */
 #include "../booster/xgboost_data.h"
 extern "C"{
    /*! \brief type of row entry */
    typedef xgboost::booster::FMatrixS::REntry XGEntry;
    /*! 
     * \brief create a data matrix 
     * \return a new data matrix
     */
    void* XGDMatrixCreate(void);
    /*! 
     * \brief free space in data matrix
     */
    void XGDMatrixFree(void *handle);
    /*! 
     * \brief load a data matrix from text file or buffer(if exists)
     * \param handle a instance of data matrix
     * \param fname file name 
     * \param silent print statistics when loading
     */
    void XGDMatrixLoad(void *handle, const char *fname, int silent);
    /*!
     * \brief load a data matrix into binary file
     * \param handle a instance of data matrix
     * \param fname file name 
     * \param silent print statistics when saving
     */
    void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
    /*! 
     * \brief set matrix content from csr format
     * \param handle a instance of data matrix
     * \param indptr pointer to row headers
     * \param indices findex
     * \param data    fvalue
     * \param nindptr number of rows in the matix + 1 
     * \param nelem number of nonzero elements in the matrix
     */
    void XGDMatrixParseCSR( void *handle, 
                            const size_t *indptr,
                            const unsigned *indices,
                            const float *data,
                            size_t nindptr,
                            size_t nelem );
    /*! 
     * \brief set matrix content from data content
     * \param handle a instance of data matrix
     * \param data pointer to the data space
     * \param nrow number of rows
     * \param ncol number columns
     * \param missing which value to represent missing value
     */
    void XGDMatrixParseMat( void *handle, 
                            const float *data,
                            size_t nrow,
                            size_t ncol,
                            float  missing );
    /*! 
     * \brief set label of the training matrix
     * \param handle a instance of data matrix
     * \param label pointer to label
     * \param len length of array
     */    
    void XGDMatrixSetLabel( void *handle, const float *label, size_t len );        
    /*! 
     * \brief set label of the training matrix
     * \param handle a instance of data matrix
     * \param group pointer to group size
     * \param len length of array
     */    
    void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len );        
    /*! 
     * \brief set weight of each instacne
     * \param handle a instance of data matrix
     * \param weight data pointer to weights
     * \param len length of array
     */    
    void XGDMatrixSetWeight( void *handle, const float *weight, size_t len );        
    /*! 
     * \brief get label set from matrix
     * \param handle a instance of data matrix
     * \param len used to set result length
     * \return pointer to the label
     */
    const float* XGDMatrixGetLabel( const void *handle, size_t* len );
    /*! 
     * \brief get weight set from matrix
     * \param handle a instance of data matrix
     * \param len used to set result length
     * \return pointer to the weight
     */
    const float* XGDMatrixGetWeight( const void *handle, size_t* len );
    /*! 
     * \brief clear all the records, including feature matrix and label
     * \param handle a instance of data matrix
     */
    void XGDMatrixClear(void *handle);
    /*! 
     * \brief return number of rows
     */    
    size_t XGDMatrixNumRow(const void *handle);
    /*! 
     * \brief add row 
     * \param handle a instance of data matrix
     * \param data array of row content 
     * \param len length of array
     */
    void XGDMatrixAddRow(void *handle, const XGEntry *data, size_t len);
    /*! 
     * \brief get ridx-th row of sparse matrix
     * \param handle handle
     * \param ridx row index 
     * \param len used to set result length
     * \reurn pointer to the row
     */    
    const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len);
    // --- start XGBoost class
    /*! 
     * \brief create xgboost learner 
     * \param dmats matrices that are set to be cached
     * \param create a booster
     */
    void *XGBoosterCreate( void* dmats[], size_t len ); 
    /*! 
     * \brief free obj in handle 
     * \param handle handle to be freed
     */
    void XGBoosterFree( void* handle ); 
    /*! 
     * \brief set parameters 
     * \param handle handle
     * \param name  parameter name
     * \param val value of parameter
     */    
    void XGBoosterSetParam( void *handle, const char *name, const char *value );   
    /*! 
     * \brief update the model in one round using dtrain
     * \param handle handle
     * \param dtrain training data
     */        
    void XGBoosterUpdateOneIter( void *handle, void *dtrain );   
    /*!
     * \brief update the model, by directly specify gradient and second order gradient, 
     *        this can be used to replace UpdateOneIter, to support customized loss function
     * \param handle handle
     * \param dtrain training data
     * \param grad gradient statistics
     * \param hess second order gradient statistics
     * \param len length of grad/hess array
     * \param bst_group boost group we are working at, default = -1
     */
    void XGBoosterBoostOneIter( void *handle, void *dtrain, 
                                float *grad, float *hess, size_t len, int bst_group );   
    /*! 
     * \brief print evaluation statistics to stdout for xgboost
     * \param handle handle
     * \param iter current iteration rounds
     * \param dmats pointers to data to be evaluated
     * \param evnames pointers to names of each data
     * \param len  length of dmats
     */        
    void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len );   
    /*! 
     * \brief make prediction based on dmat
     * \param handle handle
     * \param dmat data matrix
     * \param len used to store length of returning result
     * \param bst_group booster group, if model contains multiple booster group, default = -1 means predict for all groups 
     */    
    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group );
    /*! 
     * \brief load model from existing file
     * \param handle handle
     * \param fname file name
     */    
    void XGBoosterLoadModel( void *handle, const char *fname );
    /*! 
     * \brief save model into existing file
     * \param handle handle
     * \param fname file name
     */    
    void XGBoosterSaveModel( const void *handle, const char *fname );
    /*! 
     * \brief dump model into text file
     * \param handle handle
     * \param fname file name
     * \param fmap  name to fmap can be empty string
     */    
    void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap );
    /*! 
     * \brief interactively update model: beta
     * \param handle handle
     * \param dtrain training data
     * \param action action name
     */        
    void XGBoosterUpdateInteract( void *handle, void *dtrain, const char* action );   
 };
 #endif
--- a/regrank/xgboost_regrank.h
+++ b/regrank/xgboost_regrank.h
@@ -0,0 +1,401 @@
 #ifndef XGBOOST_REGRANK_H
 #define XGBOOST_REGRANK_H
 /*!
 * \file xgboost_regrank.h
 * \brief class for gradient boosted regression and ranking
 * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
 */
 #include <cmath>
 #include <cstdlib>
 #include <cstring>
 #include "xgboost_regrank_data.h"
 #include "xgboost_regrank_eval.h"
 #include "xgboost_regrank_obj.h"
 #include "../utils/xgboost_omp.h"
 #include "../booster/xgboost_gbmbase.h"
 #include "../utils/xgboost_utils.h"
 #include "../utils/xgboost_stream.h"
 namespace xgboost{
    namespace regrank{
        /*! \brief class for gradient boosted regression and ranking */
        class RegRankBoostLearner{
        public:
            /*! \brief constructor */
            RegRankBoostLearner(void){
                silent = 0;
                obj_ = NULL;
                name_obj_ = "reg:linear";
            }
            /*! \brief destructor */
            ~RegRankBoostLearner(void){
                if( obj_ != NULL ) delete obj_;
            }
            /*!
             * \brief a regression booter associated with training and evaluating data
             * \param mats  array of pointers to matrix whose prediction result need to be cached
             */
            RegRankBoostLearner(const std::vector<DMatrix *>& mats){
                silent = 0;
                obj_ = NULL;
                name_obj_ = "reg:linear";
                this->SetCacheData(mats);
            }            
            /*!
             * \brief add internal cache space for mat, this can speedup prediction for matrix,
             *        please cache prediction for training and eval data
             *    warning: if the model is loaded from file from some previous training history
             *             set cache data must be called with exactly SAME 
             *             data matrices to continue training otherwise it will cause error
             * \param mats  array of pointers to matrix whose prediction result need to be cached
             */          
            inline void SetCacheData(const std::vector<DMatrix *>& mats){
                // estimate feature bound
                int num_feature = 0;
                // assign buffer index
                unsigned buffer_size = 0;
                utils::Assert( cache_.size() == 0, "can only call cache data once" );
                for( size_t i = 0; i < mats.size(); ++i ){
                    bool dupilicate = false;
                    for( size_t j = 0; j < i; ++ j ){
                        if( mats[i] == mats[j] ) dupilicate = true;
                    }
                    if( dupilicate ) continue;
                    // set mats[i]'s cache learner pointer to this
                    mats[i]->cache_learner_ptr_ = this;
                    cache_.push_back( CacheEntry( mats[i], buffer_size, mats[i]->Size() ) );
                    buffer_size += static_cast<unsigned>(mats[i]->Size());
                    num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
                }
                char str_temp[25];
                if (num_feature > mparam.num_feature){
                    mparam.num_feature = num_feature;
                    sprintf(str_temp, "%d", num_feature);
                    base_gbm.SetParam("bst:num_feature", str_temp);
                }
                sprintf(str_temp, "%u", buffer_size);
                base_gbm.SetParam("num_pbuffer", str_temp);
                if (!silent){
                    printf("buffer_size=%u\n", buffer_size);
                }
            }
            /*!
             * \brief set parameters from outside
             * \param name name of the parameter
             * \param val  value of the parameter
             */
            inline void SetParam(const char *name, const char *val){
                if (!strcmp(name, "silent"))  silent = atoi(val);
                if (!strcmp(name, "eval_metric"))  evaluator_.AddEval(val);
                if (!strcmp(name, "objective") )   name_obj_ = val;
                if (!strcmp(name, "num_class") )   base_gbm.SetParam("num_booster_group", val );
                mparam.SetParam(name, val);
                base_gbm.SetParam(name, val);
                cfg_.push_back( std::make_pair( std::string(name), std::string(val) ) );
            }
            /*!
            * \brief initialize solver before training, called before training
            * this function is reserved for solver to allocate necessary space and do other preparation
            */
            inline void InitTrainer(void){
                if( mparam.num_class != 0 ){
                    if( name_obj_ != "multi:softmax" && name_obj_ != "multi:softprob"){
                        name_obj_ = "multi:softmax";
                        printf("auto select objective=softmax to support multi-class classification\n" );
                    }
                }
                base_gbm.InitTrainer();                
                obj_ = CreateObjFunction( name_obj_.c_str() );
                for( size_t i = 0; i < cfg_.size(); ++ i ){
                    obj_->SetParam( cfg_[i].first.c_str(), cfg_[i].second.c_str() );
                }
                evaluator_.AddEval( obj_->DefaultEvalMetric() );
            }
            /*!
             * \brief initialize the current data storage for model, if the model is used first time, call this function
             */
            inline void InitModel(void){
                base_gbm.InitModel();
                mparam.AdjustBase(name_obj_.c_str());
            }
            /*!
             * \brief load model from file 
             * \param fname file name
             */
            inline void LoadModel(const char *fname){
                utils::FileStream fi(utils::FopenCheck(fname, "rb"));
                this->LoadModel(fi);
                fi.Close();          
            }
            /*!
             * \brief load model from stream
             * \param fi input stream
             */
            inline void LoadModel(utils::IStream &fi){
                base_gbm.LoadModel(fi);
                utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
                // save name obj
                size_t len;                
                if( fi.Read(&len, sizeof(len)) != 0 ){
                    name_obj_.resize( len );
                    if( len != 0 ){
                        utils::Assert( fi.Read(&name_obj_[0], len*sizeof(char)) != 0 );
                    }
                }
            }
            /*!
             * \brief DumpModel
             * \param fo text file
             * \param fmap feature map that may help give interpretations of feature
             * \param with_stats whether print statistics as well
             */
            inline void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats){
                base_gbm.DumpModel(fo, fmap, with_stats);
            }
            /*!
             * \brief Dump path of all trees
             * \param fo text file
             * \param data input data
             */
            inline void DumpPath(FILE *fo, const DMatrix &data){
                base_gbm.DumpPath(fo, data.data);
            }
            /*!
            * \brief save model to stream
            * \param fo output stream
            */
            inline void SaveModel(utils::IStream &fo) const{
                base_gbm.SaveModel(fo);
                fo.Write(&mparam, sizeof(ModelParam));
                // save name obj
                size_t len = name_obj_.length();
                fo.Write(&len, sizeof(len));
                fo.Write(&name_obj_[0], len*sizeof(char));
            }
            /*!
             * \brief save model into file
             * \param fname file name
             */
            inline void SaveModel(const char *fname) const{
                utils::FileStream fo(utils::FopenCheck(fname, "wb"));
                this->SaveModel(fo);
                fo.Close();                
            }
            /*!
             * \brief update the model for one iteration
             */
            inline void UpdateOneIter(const DMatrix &train){
                this->PredictRaw(preds_, train);
                obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_);
                if( grad_.size() == train.Size() ){
                    base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index);
                }else{
                    int ngroup = base_gbm.NumBoosterGroup();
                    utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" );
                    std::vector<float> tgrad( train.Size() ), thess( train.Size() );
                    for( int g = 0; g < ngroup; ++ g ){
                        memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() );
                        memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() );
                        base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g );
                    }
                }
            }
            /*!
             * \brief evaluate the model for specific iteration
             * \param iter iteration number
             * \param evals datas i want to evaluate
             * \param evname name of each dataset
             * \param fo file to output log
             */
            inline void EvalOneIter(int iter,
                                    const std::vector<const DMatrix*> &evals,
                                    const std::vector<std::string> &evname,
                                    FILE *fo=stderr ){
                fprintf(fo, "[%d]", iter);
                for (size_t i = 0; i < evals.size(); ++i){
                    this->PredictRaw(preds_, *evals[i]);
                    obj_->EvalTransform(preds_);
                    evaluator_.Eval(fo, evname[i].c_str(), preds_, evals[i]->info);
                }
                fprintf(fo, "\n");
                fflush(fo);
            }
            /*! 
             * \brief get prediction
             * \param storage to store prediction
             * \param data input data
             * \param bst_group booster group we are in
             */
            inline void Predict(std::vector<float> &preds, const DMatrix &data, int bst_group = -1){
                this->PredictRaw( preds, data, bst_group );
                obj_->PredTransform( preds );
            }            
        public:
            /*!
             * \brief interactive update 
             * \param action action type 
             * \parma train training data
             */
            inline void UpdateInteract(std::string action, const DMatrix& train){
                for(size_t i = 0; i < cache_.size(); ++i){
                    this->InteractPredict(preds_, *cache_[i].mat_);
                }
                if (action == "remove"){
                    base_gbm.DelteBooster(); return;
                }
                obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_);
                std::vector<unsigned> root_index;
                base_gbm.DoBoost(grad_, hess_, train.data, root_index);
                for(size_t i = 0; i < cache_.size(); ++i){
                    this->InteractRePredict(*cache_[i].mat_);
                }
            }
        private:
            /*! \brief get the transformed predictions, given data */
            inline void InteractPredict(std::vector<float> &preds, const DMatrix &data){
                int buffer_offset = this->FindBufferOffset(data);
                utils::Assert( buffer_offset >=0, "interact mode must cache training data" );
                preds.resize(data.Size());
                const unsigned ndata = static_cast<unsigned>(data.Size());
                #pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j){
                    preds[j] = mparam.base_score + base_gbm.InteractPredict(data.data, j, buffer_offset + j);                    
                }
                obj_->PredTransform( preds );
            }
            /*! \brief repredict trial */
            inline void InteractRePredict(const DMatrix &data){
                int buffer_offset = this->FindBufferOffset(data);
                utils::Assert( buffer_offset >=0, "interact mode must cache training data" );
                const unsigned ndata = static_cast<unsigned>(data.Size());
                #pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j){
                    base_gbm.InteractRePredict(data.data, j, buffer_offset + j);
                }
            }
            /*! \brief get un-transformed prediction*/
            inline void PredictRaw(std::vector<float> &preds, const DMatrix &data, int bst_group = -1 ){
                int buffer_offset =  this->FindBufferOffset(data);
                if( bst_group < 0 ){
                    int ngroup = base_gbm.NumBoosterGroup();
                    preds.resize( data.Size() * ngroup );
                    for( int g = 0; g < ngroup; ++ g ){ 
                        this->PredictBuffer(&preds[ data.Size() * g ], data, buffer_offset, g );
                    }
                }else{
                    preds.resize( data.Size() );
                    this->PredictBuffer(&preds[0], data, buffer_offset, bst_group );
                }
            }
            /*! \brief get the un-transformed predictions, given data */
            inline void PredictBuffer(float *preds, const DMatrix &data, int buffer_offset, int bst_group ){
                const unsigned ndata = static_cast<unsigned>(data.Size());
                if( buffer_offset >= 0 ){  
                    #pragma omp parallel for schedule( static )
                    for (unsigned j = 0; j < ndata; ++j){
                        preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j, data.info.GetRoot(j), bst_group );
                    }
                }else
                    #pragma omp parallel for schedule( static )
                    for (unsigned j = 0; j < ndata; ++j){
                        preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, -1, data.info.GetRoot(j), bst_group );
                    }{
                }
            }
        private:
            /*! \brief training parameter for regression */
            struct ModelParam{
                /* \brief global bias */
                float base_score;
                /* \brief type of loss function */
                int loss_type;
                /* \brief number of features  */
                int num_feature;  
                /* \brief number of class, if it is multi-class classification  */
                int num_class; 
                /*! \brief reserved field */
                int reserved[15];
                /*! \brief constructor */
                ModelParam(void){
                    base_score = 0.5f;
                    loss_type = -1;
                    num_feature = 0;
                    num_class = 0;
                    memset(reserved, 0, sizeof(reserved));
                }
                /*!
                 * \brief set parameters from outside
                 * \param name name of the parameter
                 * \param val  value of the parameter
                 */
                inline void SetParam(const char *name, const char *val){
                    if (!strcmp("base_score", name))  base_score = (float)atof(val);
                    if (!strcmp("num_class", name))   num_class = atoi(val);
                    if (!strcmp("loss_type", name))   loss_type = atoi(val);
                    if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
                }
                /*!
                * \brief adjust base_score based on loss type and objective function
                */
                inline void AdjustBase(const char *obj){
                    // some tweaks for loss type
                    if( loss_type == -1 ){
                        loss_type = 1;
                        if( !strcmp("reg:linear", obj ) ) loss_type = 0;
                    }
                    if (loss_type == 1 || loss_type == 2|| loss_type == 3){
                        utils::Assert(base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain");
                        base_score = -logf(1.0f / base_score - 1.0f);
                    }
                }
            };
        private:
            struct CacheEntry{
                const DMatrix *mat_;
                int   buffer_offset_;
                size_t num_row_; 
                CacheEntry(const DMatrix *mat, int buffer_offset, size_t num_row)
                    :mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row){}
            };           
            /*! \brief the entries indicates that we have internal prediction cache */
            std::vector<CacheEntry> cache_;
        private:
            // find internal bufer offset for certain matrix, if not exist, return -1
            inline int FindBufferOffset(const DMatrix &mat){
                for(size_t i = 0; i < cache_.size(); ++i){
                    if( cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this ) {
                        if( cache_[i].num_row_ == mat.Size() ){                            
                            return cache_[i].buffer_offset_; 
                        }else{
                            fprintf( stderr, "warning: number of rows in input matrix changed as remembered in cachelist, ignore cached results\n" );
                            fflush( stderr );
                        }
                    }
                }
                return -1;
            } 
        protected:
            int silent;
            EvalSet evaluator_;
            booster::GBMBase base_gbm;
            ModelParam   mparam;           
            // objective fnction
            IObjFunction *obj_;
            // name of objective function
            std::string name_obj_;
            std::vector< std::pair<std::string, std::string> > cfg_;
        protected:
            std::vector<float> grad_, hess_, preds_;
        };
    }
 };
 #endif
--- a/regrank/xgboost_regrank_data.h
+++ b/regrank/xgboost_regrank_data.h
@@ -0,0 +1,260 @@
 #ifndef XGBOOST_REGRANK_DATA_H
 #define XGBOOST_REGRANK_DATA_H
 /*!
 * \file xgboost_regrank_data.h
 * \brief input data structure for regression, binary classification, and rankning.
 *     Format:
 *        The data should contain each data instance in each line.
 *		  The format of line data is as below:
 *        label <nonzero feature dimension> [feature index:feature value]+
 *     When using rank, an addtional group file with suffix group must be provided, giving the number of instances in each group
 *     When using weighted aware classification(regression), an addtional weight file must be provided, giving the weight of each instance
 * 
 * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
 */
 #include <cstdio>
 #include <vector>
 #include <string>
 #include <cstring>
 #include "../booster/xgboost_data.h"
 #include "../utils/xgboost_utils.h"
 #include "../utils/xgboost_stream.h"
 namespace xgboost{
    /*! \brief namespace to handle regression and rank */
    namespace regrank{
        /*! \brief data matrix for regression content */
        struct DMatrix{
        public:
            /*! \brief data information besides the features */
            struct Info{
                /*! \brief label of each instance */
                std::vector<float> labels;
                /*! \brief the index of begin and end of a groupneeded when the learning task is ranking */
                std::vector<unsigned> group_ptr;
                /*! \brief weights of each instance, optional */            
                std::vector<float> weights;
                /*! \brief specified root index of each instance, can be used for multi task setting*/
                std::vector<unsigned> root_index;
                /*! \brief get weight of each instances */
                inline float GetWeight( size_t i ) const{
                    if( weights.size() != 0 ) return weights[i];
                    else return 1.0f;
                }
                inline float GetRoot( size_t i ) const{
                    if( root_index.size() != 0 ) return static_cast<float>(root_index[i]);
                    else return 0;
                }
            };
        public:
            /*! \brief feature data content */
            booster::FMatrixS data;
            /*! \brief information fields */
            Info info;
            /*! 
             * \brief cache pointer to verify if the data structure is cached in some learner 
             * this is a bit ugly, we need to have double check verification, so if one side get deleted, 
             * and some strange re-allocation gets the same pointer we will still be fine
             */
            void *cache_learner_ptr_;
        public:
            /*! \brief default constructor */
            DMatrix(void):cache_learner_ptr_(NULL){}
            /*! \brief get the number of instances */
            inline size_t Size() const{
                return data.NumRow();
            }           
            /*!
             * \brief load from text file
             * \param fname name of text data
             * \param silent whether print information or not
             */
            inline void LoadText(const char* fname, bool silent = false){
                data.Clear();
                FILE* file = utils::FopenCheck(fname, "r");
                float label; bool init = true;
                char tmp[1024];
                std::vector<booster::bst_uint> findex;
                std::vector<booster::bst_float> fvalue;
                while (fscanf(file, "%s", tmp) == 1){
                    unsigned index; float value;
                    if (sscanf(tmp, "%u:%f", &index, &value) == 2){
                        findex.push_back(index); fvalue.push_back(value);
                    }
                    else{
                        if (!init){
                            info.labels.push_back(label);
                            data.AddRow(findex, fvalue);
                        }
                        findex.clear(); fvalue.clear();
                        utils::Assert(sscanf(tmp, "%f", &label) == 1, "invalid format");
                        init = false;
                    }
                }
                info.labels.push_back(label);
                data.AddRow(findex, fvalue);
                // initialize column support as well
                data.InitData();
                if (!silent){
                    printf("%ux%u matrix with %lu entries is loaded from %s\n",
                           (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
                }
                fclose(file);
                this->TryLoadGroup(fname, silent);
                this->TryLoadWeight(fname, silent);
            }
            /*!
             * \brief load from binary file
             * \param fname name of binary data
             * \param silent whether print information or not
             * \return whether loading is success
             */
            inline bool LoadBinary(const char* fname, bool silent = false){
                FILE *fp = fopen64(fname, "rb");
                if (fp == NULL) return false;
                utils::FileStream fs(fp);
                data.LoadBinary(fs);
                info.labels.resize(data.NumRow());
                utils::Assert(fs.Read(&info.labels[0], sizeof(float)* data.NumRow()) != 0, "DMatrix LoadBinary");
                {// load in group ptr
                    unsigned ngptr;
                    if( fs.Read(&ngptr, sizeof(unsigned) ) != 0 ){
                        info.group_ptr.resize( ngptr );
                        if( ngptr != 0 ){
                            utils::Assert( fs.Read(&info.group_ptr[0], sizeof(unsigned) * ngptr) != 0, "Load group file");
                            utils::Assert( info.group_ptr.back() == data.NumRow(), "number of group must match number of record" );
                        }
                    }
                }
                {// load in weight
                    unsigned nwt;
                    if( fs.Read(&nwt, sizeof(unsigned) ) != 0 ){
                        utils::Assert( nwt == 0 || nwt == data.NumRow(), "invalid weight" );
                        info.weights.resize( nwt );
                        if( nwt != 0 ){
                            utils::Assert( fs.Read(&info.weights[0], sizeof(unsigned) * nwt) != 0, "Load weight file");
                        }
                    }
                }
                fs.Close();
                if (!silent){
                    printf("%ux%u matrix with %lu entries is loaded from %s\n",
                           (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
                    if( info.group_ptr.size() != 0 ){
                        printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1 );
                    }
                }
                return true;
            }
            /*!
             * \brief save to binary file
             * \param fname name of binary data
             * \param silent whether print information or not
             */
            inline void SaveBinary(const char* fname, bool silent = false){
                // initialize column support as well
                data.InitData();
                utils::FileStream fs(utils::FopenCheck(fname, "wb"));
                data.SaveBinary(fs);
                utils::Assert( info.labels.size() == data.NumRow(), "label size is not consistent with feature matrix size" );
                fs.Write(&info.labels[0], sizeof(float) * data.NumRow());
                {// write out group ptr
                    unsigned ngptr = static_cast<unsigned>( info.group_ptr.size() );
                    fs.Write(&ngptr, sizeof(unsigned) );
                    if( ngptr != 0 ){
                        fs.Write(&info.group_ptr[0], sizeof(unsigned) * ngptr);
                    }
                }                
                {// write out weight
                    unsigned nwt = static_cast<unsigned>( info.weights.size() );
                    fs.Write( &nwt, sizeof(unsigned) );
                    if( nwt != 0 ){
                        fs.Write(&info.weights[0], sizeof(float) * nwt);
                    }
                }
                fs.Close();
                if (!silent){
                    printf("%ux%u matrix with %lu entries is saved to %s\n",
                       (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
                    if( info.group_ptr.size() != 0 ){
                        printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1 );
                    }
                }
            }
            /*!
             * \brief cache load data given a file name, if filename ends with .buffer, direct load binary
             *        otherwise the function will first check if fname + '.buffer' exists,
             *        if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file,
             *        and try to create a buffer file
             * \param fname name of binary data
             * \param silent whether print information or not
             * \param savebuffer whether do save binary buffer if it is text
             */
            inline void CacheLoad(const char *fname, bool silent = false, bool savebuffer = true){
                int len = strlen(fname);
                if (len > 8 && !strcmp(fname + len - 7, ".buffer")){
                    if( !this->LoadBinary(fname, silent) ){
                        fprintf(stderr,"can not open file \"%s\"", fname);
                        utils::Error("DMatrix::CacheLoad failed");
                    }
                    return;
                }
                char bname[1024];
                sprintf(bname, "%s.buffer", fname);
                if (!this->LoadBinary(bname, silent)){
                    this->LoadText(fname, silent);
                    if (savebuffer) this->SaveBinary(bname, silent);
                }
            }
        private:
            inline bool TryLoadGroup(const char* fname, bool silent = false){
                std::string name = fname;
                if (name.length() > 8 && !strcmp(fname + name.length() - 7, ".buffer")){
                    name.resize( name.length() - 7 );
                }
                name += ".group";
                //if exists group data load it in
                FILE *fi = fopen64(name.c_str(), "r");
                if (fi == NULL) return false;                
                info.group_ptr.push_back(0);
                unsigned nline;
                while (fscanf(fi, "%u", &nline) == 1){
                    info.group_ptr.push_back(info.group_ptr.back()+nline);
                }
                if(!silent){
                    printf("%lu groups are loaded from %s\n", info.group_ptr.size()-1, name.c_str());
                }
                fclose(fi);
                utils::Assert( info.group_ptr.back() == data.NumRow(), "DMatrix: group data does not match the number of rows in feature matrix" );
                return true;
            }
            inline bool TryLoadWeight(const char* fname, bool silent = false){
                std::string name = fname;
                if (name.length() > 8 && !strcmp(fname + name.length() - 7, ".buffer")){
                    name.resize( name.length() - 7 );
                }
                name += ".weight";
                //if exists group data load it in
                FILE *fi = fopen64(name.c_str(), "r");
                if (fi == NULL) return false;                
                float wt;
                while (fscanf(fi, "%f", &wt) == 1){
                    info.weights.push_back( wt );
                }
                if(!silent){
                    printf("loading weight from %s\n", name.c_str());
                }
                fclose(fi);
                utils::Assert( info.weights.size() == data.NumRow(), "DMatrix: weight data does not match the number of rows in feature matrix" );
                return true;
            }
        };
    };
 };
 #endif
--- a/regrank/xgboost_regrank_eval.h
+++ b/regrank/xgboost_regrank_eval.h
@@ -0,0 +1,375 @@
 #ifndef XGBOOST_REGRANK_EVAL_H
 #define XGBOOST_REGRANK_EVAL_H
 /*!
 * \file xgboost_regrank_eval.h
 * \brief evaluation metrics for regression and classification and rank
 * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
 */
 #include <cmath>
 #include <vector>
 #include <algorithm>
 #include "../utils/xgboost_utils.h"
 #include "../utils/xgboost_omp.h"
 #include "../utils/xgboost_random.h"
 #include "xgboost_regrank_data.h"
 #include "xgboost_regrank_utils.h"
 namespace xgboost{
    namespace regrank{
        /*! \brief evaluator that evaluates the loss metrics */
        struct IEvaluator{
            /*!
             * \brief evaluate a specific metric
             * \param preds prediction
             * \param info information, including label etc.
             */
            virtual float Eval(const std::vector<float> &preds,
            const DMatrix::Info &info) const = 0;
            /*! \return name of metric */
            virtual const char *Name(void) const = 0;
            /*! \brief virtual destructor */
            virtual ~IEvaluator(void){}
        };
        /*! \brief RMSE */
        struct EvalRMSE : public IEvaluator{
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                const unsigned ndata = static_cast<unsigned>(preds.size());
                float sum = 0.0, wsum = 0.0;
                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
                for (unsigned i = 0; i < ndata; ++i){
                    const float wt = info.GetWeight(i);
                    const float diff = info.labels[i] - preds[i];
                    sum += diff*diff * wt;
                    wsum += wt;
                }
                return sqrtf(sum / wsum);
            }
            virtual const char *Name(void) const{
                return "rmse";
            }
        };
        /*! \brief Error */
        struct EvalLogLoss : public IEvaluator{
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                const unsigned ndata = static_cast<unsigned>(preds.size());
                float sum = 0.0f, wsum = 0.0f;
                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
                for (unsigned i = 0; i < ndata; ++i){
                    const float y = info.labels[i];
                    const float py = preds[i];
                    const float wt = info.GetWeight(i);
                    sum -= wt * (y * std::log(py) + (1.0f - y)*std::log(1 - py));
                    wsum += wt;
                }
                return sum / wsum;
            }
            virtual const char *Name(void) const{
                return "negllik";
            }
        };
        /*! \brief Error */
        struct EvalError : public IEvaluator{
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                const unsigned ndata = static_cast<unsigned>(preds.size());
                float sum = 0.0f, wsum = 0.0f;
                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
                for (unsigned i = 0; i < ndata; ++i){
                    const float wt = info.GetWeight(i);                    
                    if (preds[i] > 0.5f){
                        if (info.labels[i] < 0.5f) sum += wt;
                    }
                    else{
                        if (info.labels[i] >= 0.5f) sum += wt;
                    }
                    wsum += wt;
                }
                return sum / wsum;
            }
            virtual const char *Name(void) const{
                return "error";
            }
        };
        /*! \brief AMS: also records best threshold */
        struct EvalAMS : public IEvaluator{
        public:
            EvalAMS(const char *name){
                name_ = name;
                // note: ams@0 will automatically select which ratio to go
                utils::Assert( sscanf(name, "ams@%f", &ratio_ ) == 1, "invalid ams format" );
            }            
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                const unsigned ndata = static_cast<unsigned>(preds.size());
                utils::Assert( info.weights.size() == ndata, "we need weight to evaluate ams");
                std::vector< std::pair<float, unsigned> > rec(ndata);
                #pragma omp parallel for schedule( static )                
                for (unsigned i = 0; i < ndata; ++i){
                    rec[i] = std::make_pair( preds[i], i );
                }
                std::sort( rec.begin(), rec.end(), CmpFirst );
                unsigned ntop = static_cast<unsigned>( ratio_ * ndata );
                if( ntop == 0 ) ntop = ndata;
                const double br = 10.0;
                unsigned thresindex = 0;
                double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
                for (unsigned i = 0; i < ndata-1 && i < ntop; ++i){
                    const unsigned ridx = rec[i].second;
                    const float wt = info.weights[ridx];
                    if( info.labels[ridx] > 0.5f ){
                        s_tp += wt;
                    }else{
                        b_fp += wt;
                    }
                    if( rec[i].first != rec[i+1].first ){
                        double ams = sqrtf( 2*((s_tp+b_fp+br) * log( 1.0 + s_tp/(b_fp+br) ) - s_tp) );
                        if( tams < ams ){
                            thresindex = i;
                            tams = ams;
                        }
                    }
                }
                if( ntop == ndata ){
                    fprintf( stderr, "\tams-ratio=%g", float(thresindex)/ndata );
                    return tams;
                }else{
                    return sqrtf( 2*((s_tp+b_fp+br) * log( 1.0 + s_tp/(b_fp+br) ) - s_tp) );
                }
            }
            virtual const char *Name(void) const{
                return name_.c_str();
            }
        private:
            std::string name_;
            float ratio_;
        };
        /*! \brief Error for multi-class classification, need exact match */
        struct EvalMatchError : public IEvaluator{
        public:
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                const unsigned ndata = static_cast<unsigned>(preds.size());
                float sum = 0.0f, wsum = 0.0f;
                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
                for (unsigned i = 0; i < ndata; ++i){
                    const float wt = info.GetWeight(i);
                    int label = static_cast<int>(info.labels[i]);
                    if (static_cast<int>(preds[i]) != label ) sum += wt;
                    wsum += wt;
                }
                return sum / wsum;
            }
            virtual const char *Name(void) const{
                return "merror";
            }
        };
        /*! \brief Area under curve, for both classification and rank */
        struct EvalAuc : public IEvaluator{
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
                const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
                utils::Assert(gptr.back() == preds.size(), "EvalAuc: group structure must match number of prediction");
                const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
                double sum_auc = 0.0f;
                #pragma omp parallel reduction(+:sum_auc) 
                {
                    // each thread takes a local rec
                    std::vector< std::pair<float, unsigned> > rec;
                    #pragma omp for schedule(static) 
                    for (unsigned k = 0; k < ngroup; ++k){
                        rec.clear();
                        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j){
                            rec.push_back(std::make_pair(preds[j], j));
                        }
                        std::sort(rec.begin(), rec.end(), CmpFirst);
                        // calculate AUC
                        double sum_pospair = 0.0;
                        double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
                        for (size_t j = 0; j < rec.size(); ++j){
                            const float wt = info.GetWeight(rec[j].second);
                            const float ctr = info.labels[rec[j].second];
                            // keep bucketing predictions in same bucket
                            if (j != 0 && rec[j].first != rec[j - 1].first){
                                sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
                                sum_npos += buf_pos; sum_nneg += buf_neg;
                                buf_neg = buf_pos = 0.0f;
                            }
                            buf_pos += ctr * wt; buf_neg += (1.0f - ctr) * wt;
                        }
                        sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
                        sum_npos += buf_pos; sum_nneg += buf_neg;
                        // 
                        utils::Assert(sum_npos > 0.0 && sum_nneg > 0.0, "the dataset only contains pos or neg samples");
                        // this is the AUC
                        sum_auc += sum_pospair / (sum_npos*sum_nneg);
                    }
                }
                // return average AUC over list
                return static_cast<float>(sum_auc) / ngroup;
            }
            virtual const char *Name(void) const{
                return "auc";
            }
        };
        /*! \brief Evaluate rank list */          
        struct EvalRankList : public IEvaluator{
        public:
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                const std::vector<unsigned> &gptr = info.group_ptr;
                utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
                utils::Assert( gptr.back() == preds.size(), "EvalRanklist: group structure must match number of prediction");
                const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
                double sum_metric = 0.0f;
                #pragma omp parallel reduction(+:sum_metric) 
                {
                    // each thread takes a local rec
                    std::vector< std::pair<float, unsigned> > rec;
                    #pragma omp for schedule(static) 
                    for (unsigned k = 0; k < ngroup; ++k){
                        rec.clear();
                        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j){
                            rec.push_back(std::make_pair(preds[j], (int)info.labels[j]));
                        }
                        sum_metric += this->EvalMetric( rec );                        
                    }
                }
                return static_cast<float>(sum_metric) / ngroup;
            }
            virtual const char *Name(void) const{
                return name_.c_str();
            }
        protected:
            EvalRankList(const char *name){
                name_ = name;
                if( sscanf(name, "%*[^@]@%u", &topn_) != 1 ){
                    topn_ = UINT_MAX;
                }
            }
            /*! \return evaluation metric, given the pair_sort record, (pred,label) */
            virtual float EvalMetric( std::vector< std::pair<float, unsigned> > &pair_sort ) const = 0;
        protected:
            unsigned topn_;
            std::string name_;
        };
        /*! \brief Precison at N, for both classification and rank */
        struct EvalPrecision : public EvalRankList{
        public:
            EvalPrecision(const char *name):EvalRankList(name){}
        protected:
            virtual float EvalMetric( std::vector< std::pair<float, unsigned> > &rec ) const {
                // calculate Preicsion
                std::sort(rec.begin(), rec.end(), CmpFirst);
                unsigned nhit = 0;
                for (size_t j = 0; j < rec.size() && j < this->topn_; ++j){
                    nhit += (rec[j].second != 0 );
                }
                return static_cast<float>( nhit ) / topn_;
            }
        };
        /*! \brief NDCG */
        struct EvalNDCG : public EvalRankList{
        public:
            EvalNDCG(const char *name):EvalRankList(name){}
        protected:
            inline float CalcDCG( const std::vector< std::pair<float,unsigned> > &rec ) const {
                double sumdcg = 0.0;
                for( size_t i = 0; i < rec.size() && i < this->topn_; i ++ ){
                    const unsigned rel = rec[i].second;
                    if( rel != 0 ){ 
                        sumdcg += logf(2.0f) * ((1<<rel)-1) / logf( i + 2 );
                    }
                }
                return static_cast<float>(sumdcg);
            }
            virtual float EvalMetric( std::vector< std::pair<float, unsigned> > &rec ) const {
                std::sort(rec.begin(), rec.end(), CmpSecond);
                float idcg = this->CalcDCG(rec);
                std::sort(rec.begin(), rec.end(), CmpFirst);
                float dcg = this->CalcDCG(rec);
                if( idcg == 0.0f ) return 0.0f;
                else return dcg/idcg;
            }
        };
        /*! \brief Precison at N, for both classification and rank */
        struct EvalMAP : public EvalRankList{
        public:
            EvalMAP(const char *name):EvalRankList(name){}
        protected:
            virtual float EvalMetric( std::vector< std::pair<float, unsigned> > &rec ) const {
                std::sort(rec.begin(), rec.end(), CmpFirst);
                unsigned nhits = 0;
                double sumap = 0.0;
                for( size_t i = 0; i < rec.size(); ++i){
                    if( rec[i].second != 0 ){
                        nhits += 1;
                        if( i < this->topn_ ){
                            sumap += static_cast<float>(nhits) / (i+1);
                        }
                    }
                }
                if (nhits != 0) sumap /= nhits;
                return static_cast<float>(sumap);                
            }
        };
    };
    namespace regrank{
        /*! \brief a set of evaluators */
        struct EvalSet{
        public:
            inline void AddEval(const char *name){
                for (size_t i = 0; i < evals_.size(); ++i){
                    if (!strcmp(name, evals_[i]->Name())) return;
                }
                if (!strcmp(name, "rmse"))    evals_.push_back(new EvalRMSE());
                if (!strcmp(name, "error"))   evals_.push_back(new EvalError());
                if (!strcmp(name, "merror"))   evals_.push_back(new EvalMatchError());
                if (!strcmp(name, "logloss")) evals_.push_back(new EvalLogLoss());
                if (!strcmp(name, "auc"))    evals_.push_back(new EvalAuc());
                if (!strncmp(name, "ams@",4))  evals_.push_back(new EvalAMS(name));
                if (!strncmp(name, "pre@", 4)) evals_.push_back(new EvalPrecision(name));
                if (!strncmp(name, "map", 3))   evals_.push_back(new EvalMAP(name));
                if (!strncmp(name, "ndcg", 3))  evals_.push_back(new EvalNDCG(name));
            }
            ~EvalSet(){
                for (size_t i = 0; i < evals_.size(); ++i){
                    delete evals_[i];
                }
            }
            inline void Eval(FILE *fo, const char *evname,
                const std::vector<float> &preds,
                const DMatrix::Info &info) const{
                for (size_t i = 0; i < evals_.size(); ++i){
                    float res = evals_[i]->Eval(preds, info);
                    fprintf(fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res);
                }
            }
        private:
            std::vector<const IEvaluator*> evals_;
        };
    };
 };
 #endif
--- a/regrank/xgboost_regrank_main.cpp
+++ b/regrank/xgboost_regrank_main.cpp
@@ -0,0 +1,303 @@
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #include <ctime>
 #include <string>
 #include <cstring>
 #include "xgboost_regrank.h"
 #include "../utils/xgboost_fmap.h"
 #include "../utils/xgboost_random.h"
 #include "../utils/xgboost_config.h"
 namespace xgboost{
    namespace regrank{
        /*!
        * \brief wrapping the training process of the gradient boosting regression model,
        *   given the configuation
        * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.chen@gmail.com
        */
        class RegBoostTask{
        public:
            inline int Run(int argc, char *argv[]){
                if (argc < 2){
                    printf("Usage: <config>\n");
                    return 0;
                }
                utils::ConfigIterator itr(argv[1]);
                while (itr.Next()){
                    this->SetParam(itr.name(), itr.val());
                }
                for (int i = 2; i < argc; i++){
                    char name[256], val[256];
                    if (sscanf(argv[i], "%[^=]=%s", name, val) == 2){
                        this->SetParam(name, val);
                    }
                }
                this->InitData();
                this->InitLearner();
                if (task == "dump"){
                    this->TaskDump();
                    return 0;
                }
                if (task == "interact"){
                    this->TaskInteractive(); return 0;
                }
                if (task == "dumppath"){
                    this->TaskDumpPath(); return 0;
                }
                if (task == "eval"){
                    this->TaskEval(); return 0;
                }
                if (task == "pred"){
                    this->TaskPred();
                }
                else{
                    this->TaskTrain();
                }
                return 0;
            }
            inline void SetParam(const char *name, const char *val){
                if (!strcmp("silent", name))       silent = atoi(val);
                if (!strcmp("use_buffer", name))   use_buffer = atoi(val);
                if (!strcmp("seed", name))         random::Seed(atoi(val));
                if (!strcmp("num_round", name))    num_round = atoi(val);
                if (!strcmp("save_period", name))  save_period = atoi(val);
                if (!strcmp("eval_train", name))   eval_train = atoi(val);
                if (!strcmp("task", name))         task = val;
                if (!strcmp("data", name))        train_path = val;
                if (!strcmp("test:data", name))   test_path = val;
                if (!strcmp("model_in", name))    model_in = val;
                if (!strcmp("model_out", name))   model_out = val;
                if (!strcmp("model_dir", name))    model_dir_path = val;
                if (!strcmp("fmap", name))        name_fmap = val;
                if (!strcmp("name_dump", name))   name_dump = val;
                if (!strcmp("name_dumppath", name))   name_dumppath = val;
                if (!strcmp("name_pred", name))   name_pred = val;
                if (!strcmp("dump_stats", name))   dump_model_stats = atoi(val);
                if (!strcmp("interact:action", name))  interact_action = val;
                if (!strncmp("batch:", name, 6)){
                    cfg_batch.PushBack(name + 6, val);
                }
                if (!strncmp("eval[", name, 5)) {
                    char evname[256];
                    utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display");
                    eval_data_names.push_back(std::string(evname));
                    eval_data_paths.push_back(std::string(val));
                }
                cfg.PushBack(name, val);
            }
        public:
            RegBoostTask(void){
                // default parameters
                silent = 0;
                use_buffer = 1;
                num_round = 10;
                save_period = 0;
                eval_train = 0;
                dump_model_stats = 0;
                task = "train";
                model_in = "NULL";
                model_out = "NULL";
                name_fmap = "NULL";
                name_pred = "pred.txt";
                name_dump = "dump.txt";
                name_dumppath = "dump.path.txt";
                model_dir_path = "./";
                interact_action = "update";
            }
            ~RegBoostTask(void){
                for (size_t i = 0; i < deval.size(); i++){
                    delete deval[i];
                }
            }
        private:
            inline void InitData(void){
                if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
                if (task == "dump") return;
                if (task == "pred" || task == "dumppath"){
                    data.CacheLoad(test_path.c_str(), silent != 0, use_buffer != 0);
                }
                else{
                    // training 
                    data.CacheLoad(train_path.c_str(), silent != 0, use_buffer != 0);
                    utils::Assert(eval_data_names.size() == eval_data_paths.size());
                    for (size_t i = 0; i < eval_data_names.size(); ++i){
                        deval.push_back(new DMatrix());
                        deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
                        devalall.push_back(deval.back());
                    }
                    std::vector<DMatrix *> dcache(1, &data);
                    for( size_t i = 0; i < deval.size(); ++ i){
                        dcache.push_back( deval[i] );
                    }
                    // set cache data to be all training and evaluation data
                    learner.SetCacheData(dcache);
                    // add training set to evaluation set if needed
                    if( eval_train != 0 ){
                        devalall.push_back( &data );
                        eval_data_names.push_back( std::string("train") );
                    }
                }
            }
            inline void InitLearner(void){
                cfg.BeforeFirst();
                while (cfg.Next()){
                    learner.SetParam(cfg.name(), cfg.val());
                }
                if (model_in != "NULL"){
                    utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
                    learner.LoadModel(fi);
                    fi.Close();
                }
                else{
                    utils::Assert(task == "train", "model_in not specified");
                    learner.InitModel();
                }
                learner.InitTrainer();
            }
            inline void TaskTrain(void){
                const time_t start = time(NULL);
                unsigned long elapsed = 0;
                for (int i = 0; i < num_round; ++i){
                    elapsed = (unsigned long)(time(NULL) - start);
                    if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
                    learner.UpdateOneIter(data);                    
                    learner.EvalOneIter(i, devalall, eval_data_names);
                    if (save_period != 0 && (i + 1) % save_period == 0){
                        this->SaveModel(i);
                    }
                    elapsed = (unsigned long)(time(NULL) - start);
                }
                // always save final round
                if ((save_period == 0 || num_round % save_period != 0) && model_out != "NONE"){                    
                    if (model_out == "NULL"){
                        this->SaveModel(num_round - 1);
                    }
                    else{
                        this->SaveModel(model_out.c_str());
                    }
                }
                if (!silent){
                    printf("\nupdating end, %lu sec in all\n", elapsed);
                }
            }
            inline void TaskEval(void){
                learner.EvalOneIter(0, devalall, eval_data_names);
            }
            inline void TaskInteractive(void){
                const time_t start = time(NULL);
                unsigned long elapsed = 0;
                int batch_action = 0;
                cfg_batch.BeforeFirst();
                while (cfg_batch.Next()){
                    if (!strcmp(cfg_batch.name(), "run")){
                        learner.UpdateInteract(interact_action, data);
                        batch_action += 1;
                    }
                    else{
                        learner.SetParam(cfg_batch.name(), cfg_batch.val());
                    }
                }
                if (batch_action == 0){
                    learner.UpdateInteract(interact_action, data);
                }
                utils::Assert(model_out != "NULL", "interactive mode must specify model_out");
                this->SaveModel(model_out.c_str());
                elapsed = (unsigned long)(time(NULL) - start);
                if (!silent){
                    printf("\ninteractive update, %d batch actions, %lu sec in all\n", batch_action, elapsed);
                }
            }
            inline void TaskDump(void){
                FILE *fo = utils::FopenCheck(name_dump.c_str(), "w");
                learner.DumpModel(fo, fmap, dump_model_stats != 0);
                fclose(fo);
            }
            inline void TaskDumpPath(void){
                FILE *fo = utils::FopenCheck(name_dumppath.c_str(), "w");
                learner.DumpPath(fo, data);
                fclose(fo);
            }
            inline void SaveModel(const char *fname) const{
                utils::FileStream fo(utils::FopenCheck(fname, "wb"));
                learner.SaveModel(fo);
                fo.Close();
            }
            inline void SaveModel(int i) const{
                char fname[256];
                sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1);
                this->SaveModel(fname);
            }
            inline void TaskPred(void){
                std::vector<float> preds;
                if (!silent) printf("start prediction...\n");
                learner.Predict(preds, data);
                if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
                FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
                for (size_t i = 0; i < preds.size(); i++){
                    fprintf(fo, "%f\n", preds[i]);
                }
                fclose(fo);
            }
        private:
            /* \brief whether silent */
            int silent;
            /* \brief whether use auto binary buffer */
            int use_buffer;
            /* \brief whether evaluate training statistics */            
            int eval_train;
            /* \brief number of boosting iterations */
            int num_round;
            /* \brief the period to save the model, 0 means only save the final round model */
            int save_period;
            /*! \brief interfact action */
            std::string interact_action;
            /* \brief the path of training/test data set */
            std::string train_path, test_path;
            /* \brief the path of test model file, or file to restart training */
            std::string model_in;
            /* \brief the path of final model file, to be saved */
            std::string model_out;
            /* \brief the path of directory containing the saved models */
            std::string model_dir_path;
            /* \brief task to perform */
            std::string task;
            /* \brief name of predict file */
            std::string name_pred;
            /* \brief whether dump statistics along with model */
            int dump_model_stats;
            /* \brief name of feature map */
            std::string name_fmap;
            /* \brief name of dump file */
            std::string name_dump;
            /* \brief name of dump path file */
            std::string name_dumppath;
            /* \brief the paths of validation data sets */
            std::vector<std::string> eval_data_paths;
            /* \brief the names of the evaluation data used in output log */
            std::vector<std::string> eval_data_names;
            /*! \brief saves configurations */
            utils::ConfigSaver cfg;
            /*! \brief batch configurations */
            utils::ConfigSaver cfg_batch;
        private:
            DMatrix data;
            std::vector<DMatrix*> deval;
            std::vector<const DMatrix*> devalall;
            utils::FeatMap fmap;
            RegRankBoostLearner learner;
        };
    };
 };
 int main( int argc, char *argv[] ){
  xgboost::random::Seed( 0 );
  xgboost::regrank::RegBoostTask tsk;
  return tsk.Run( argc, argv );
 }
--- a/regrank/xgboost_regrank_obj.h
+++ b/regrank/xgboost_regrank_obj.h
@@ -0,0 +1,131 @@
 #ifndef XGBOOST_REGRANK_OBJ_H
 #define XGBOOST_REGRANK_OBJ_H
 /*!
 * \file xgboost_regrank_obj.h
 * \brief defines objective function interface used in xgboost for regression and rank
 * \author Tianqi Chen, Kailong Chen
 */
 #include "xgboost_regrank_data.h"
 namespace xgboost{
    namespace regrank{
        /*! \brief interface of objective function */
        class IObjFunction{
        public:
            /*! \brief virtual destructor */
            virtual ~IObjFunction(void){}
            /*!
             * \brief set parameters from outside
             * \param name name of the parameter
             * \param val  value of the parameter
             */
            virtual void SetParam(const char *name, const char *val) = 0;
            /*! 
             * \brief get gradient over each of predictions, given existing information
             * \param preds prediction of current round             
             * \param info information about labels, weights, groups in rank
             * \param iter current iteration number 
             * \param grad gradient over each preds
             * \param hess second order gradient over each preds
             */
            virtual void GetGradient(const std::vector<float>& preds,  
                                     const DMatrix::Info &info,
                                     int iter,
                                     std::vector<float> &grad, 
                                     std::vector<float> &hess ) = 0;
            /*! \return the default evaluation metric for the problem */
            virtual const char* DefaultEvalMetric(void) = 0;
            /*! 
             * \brief transform prediction values, this is only called when Prediction is called
             * \param preds prediction values, saves to this vector as well
             */
            virtual void PredTransform(std::vector<float> &preds){}
            /*! 
             * \brief transform prediction values, this is only called when Eval is called, usually it redirect to PredTransform
             * \param preds prediction values, saves to this vector as well
             */
            virtual void EvalTransform(std::vector<float> &preds){ this->PredTransform(preds); }
        };
    };
    namespace regrank{
        /*! \brief defines functions to calculate some commonly used functions */
        struct LossType{
        public:
            const static int kLinearSquare = 0;
            const static int kLogisticNeglik = 1;
            const static int kLogisticClassify = 2;
            const static int kLogisticRaw = 3;
        public:
            /*! \brief indicate which type we are using */
            int loss_type;
        public:
            /*!
             * \brief transform the linear sum to prediction
             * \param x linear sum of boosting ensemble
             * \return transformed prediction
             */
            inline float PredTransform(float x){
                switch (loss_type){
                case kLogisticRaw: 
                case kLinearSquare: return x;
                case kLogisticClassify:
                case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
                default: utils::Error("unknown loss_type"); return 0.0f;
                }
            }
            /*!
             * \brief calculate first order gradient of loss, given transformed prediction
             * \param predt transformed prediction
             * \param label true label
             * \return first order gradient
             */
            inline float FirstOrderGradient(float predt, float label) const{
                switch (loss_type){
                case kLinearSquare: return predt - label;
                case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
                case kLogisticClassify:
                case kLogisticNeglik: return predt - label;
                default: utils::Error("unknown loss_type"); return 0.0f;
                }
            }
            /*!
             * \brief calculate second order gradient of loss, given transformed prediction
             * \param predt transformed prediction
             * \param label true label
             * \return second order gradient
             */
            inline float SecondOrderGradient(float predt, float label) const{
                switch (loss_type){
                case kLinearSquare: return 1.0f;
                case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
                case kLogisticClassify:
                case kLogisticNeglik: return predt * (1 - predt);
                default: utils::Error("unknown loss_type"); return 0.0f;
                }
            }
        };
    };
 };
 #include "xgboost_regrank_obj.hpp"
 namespace xgboost{
    namespace regrank{        
       inline IObjFunction* CreateObjFunction( const char *name ){
           if( !strcmp("reg:linear", name ) )     return new RegressionObj( LossType::kLinearSquare );
           if( !strcmp("reg:logistic", name ) )    return new RegressionObj( LossType::kLogisticNeglik );
           if( !strcmp("binary:logistic", name ) ) return new RegressionObj( LossType::kLogisticClassify );
           if( !strcmp("binary:logitraw", name ) ) return new RegressionObj( LossType::kLogisticRaw );
           if( !strcmp("multi:softmax", name ) )   return new SoftmaxMultiClassObj(0);
           if( !strcmp("multi:softprob", name ) )   return new SoftmaxMultiClassObj(1);
           if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
           if( !strcmp("rank:softmax", name ) )  return new SoftmaxRankObj();
           utils::Error("unknown objective function type");
           return NULL;
       }
    };
 };
 #endif
--- a/regrank/xgboost_regrank_obj.hpp
+++ b/regrank/xgboost_regrank_obj.hpp
@@ -0,0 +1,353 @@
 #ifndef XGBOOST_REGRANK_OBJ_HPP
 #define XGBOOST_REGRANK_OBJ_HPP
 /*!
 * \file xgboost_regrank_obj.hpp
 * \brief implementation of objective functions
 * \author Tianqi Chen, Kailong Chen
 */
 //#include "xgboost_regrank_sample.h"
 #include <vector>
 #include <functional>
 #include "xgboost_regrank_utils.h"
 namespace xgboost{
    namespace regrank{        
        class RegressionObj : public IObjFunction{
        public:
            RegressionObj( int loss_type ){
                loss.loss_type = loss_type;
                scale_pos_weight = 1.0f;
            }
            virtual ~RegressionObj(){}
            virtual void SetParam(const char *name, const char *val){
                if( !strcmp( "loss_type", name ) ) loss.loss_type = atoi( val );
                if( !strcmp( "scale_pos_weight", name ) ) scale_pos_weight = (float)atof( val );
            }
            virtual void GetGradient(const std::vector<float>& preds,  
                                     const DMatrix::Info &info,
                                     int iter,
                                     std::vector<float> &grad, 
                                     std::vector<float> &hess ) {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                grad.resize(preds.size()); hess.resize(preds.size());
                const unsigned ndata = static_cast<unsigned>(preds.size());
                #pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j){
                    float p = loss.PredTransform(preds[j]);
                    float w = info.GetWeight(j);
                    if( info.labels[j] == 1.0f ) w *= scale_pos_weight;
                    grad[j] = loss.FirstOrderGradient(p, info.labels[j]) * w;
                    hess[j] = loss.SecondOrderGradient(p, info.labels[j]) * w;
                }
            }
            virtual const char* DefaultEvalMetric(void) {
                if( loss.loss_type == LossType::kLogisticClassify ) return "error";
                if( loss.loss_type == LossType::kLogisticRaw ) return "auc";
                return "rmse";
            }
            virtual void PredTransform(std::vector<float> &preds){
                const unsigned ndata = static_cast<unsigned>(preds.size());
                #pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j){
                    preds[j] = loss.PredTransform( preds[j] );
                }
            }
        private:
            float scale_pos_weight;
            LossType loss;
        };
    };
    namespace regrank{
        // simple softmax rak
        class SoftmaxRankObj : public IObjFunction{
        public:
            SoftmaxRankObj(void){
            }
            virtual ~SoftmaxRankObj(){}
            virtual void SetParam(const char *name, const char *val){
            }
            virtual void GetGradient(const std::vector<float>& preds,  
                                     const DMatrix::Info &info,
                                     int iter,
                                     std::vector<float> &grad, 
                                     std::vector<float> &hess ) {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                grad.resize(preds.size()); hess.resize(preds.size());
                const std::vector<unsigned> &gptr = info.group_ptr;
                utils::Assert( gptr.size() != 0 && gptr.back() == preds.size(), "rank loss must have group file" );
                const unsigned ngroup = static_cast<unsigned>( gptr.size() - 1 );
                #pragma omp parallel
                {
                    std::vector< float > rec;                    
                    #pragma omp for schedule(static)
                    for (unsigned k = 0; k < ngroup; ++k){
                        rec.clear();
                        int nhit = 0;
                        for(unsigned j = gptr[k]; j < gptr[k+1]; ++j ){
                            rec.push_back( preds[j] );
                            grad[j] = hess[j] = 0.0f;
                            nhit += info.labels[j];
                        }
                        Softmax( rec );
                        if( nhit == 1 ){
                            for(unsigned j = gptr[k]; j < gptr[k+1]; ++j ){
                                float p = rec[ j - gptr[k] ];
                                grad[j] = p - info.labels[j];
                                hess[j] = 2.0f * p * ( 1.0f - p );
                            }  
                        }else{
                            utils::Assert( nhit == 0, "softmax does not allow multiple labels" );
                        }
                    }
                }
            }
            virtual const char* DefaultEvalMetric(void) {
                return "pre@1";
            }
        };
        // simple softmax multi-class classification
        class SoftmaxMultiClassObj : public IObjFunction{
        public:
            SoftmaxMultiClassObj(int output_prob):output_prob(output_prob){
                nclass = 0;
            }
            virtual ~SoftmaxMultiClassObj(){}
            virtual void SetParam(const char *name, const char *val){
                if( !strcmp( "num_class", name ) ) nclass = atoi(val); 
            }
            virtual void GetGradient(const std::vector<float>& preds,  
                                     const DMatrix::Info &info,
                                     int iter,
                                     std::vector<float> &grad, 
                                     std::vector<float> &hess ) {
                utils::Assert( nclass != 0, "must set num_class to use softmax" );
                utils::Assert( preds.size() == (size_t)nclass * info.labels.size(), "SoftmaxMultiClassObj: label size and pred size does not match" );
                grad.resize(preds.size()); hess.resize(preds.size());
                const unsigned ndata = static_cast<unsigned>(info.labels.size());
                #pragma omp parallel
                {
                    std::vector<float> rec(nclass);
                    #pragma omp for schedule(static)
                    for (unsigned j = 0; j < ndata; ++j){
                        for( int k = 0; k < nclass; ++ k ){
                            rec[k] = preds[j + k * ndata];
                        }
                        Softmax( rec );
                        int label = static_cast<int>(info.labels[j]);
                        if( label < 0 ){
                            label = -label - 1;
                        }
                        utils::Assert( label < nclass, "SoftmaxMultiClassObj: label exceed num_class" );
                        for( int k = 0; k < nclass; ++ k ){
                            float p = rec[ k ];
                            if( label == k ){
                                grad[j+k*ndata] = p - 1.0f;
                            }else{
                                grad[j+k*ndata] = p;
                            }
                            hess[j+k*ndata] = 2.0f * p * ( 1.0f - p );
                        }  
                    }
                }
            }
            virtual void PredTransform(std::vector<float> &preds){
                this->Transform(preds, output_prob);
            }
            virtual void EvalTransform(std::vector<float> &preds){
                this->Transform(preds, 0);
            }
        private:
            inline void Transform(std::vector<float> &preds, int prob){
                utils::Assert( nclass != 0, "must set num_class to use softmax" );
                utils::Assert( preds.size() % nclass == 0, "SoftmaxMultiClassObj: label size and pred size does not match" );                
                const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
                #pragma omp parallel
                {
                    std::vector<float> rec(nclass);
                    #pragma omp for schedule(static)
                    for (unsigned j = 0; j < ndata; ++j){
                        for( int k = 0; k < nclass; ++ k ){
                            rec[k] = preds[j + k * ndata];
                        }
                        if( prob == 0 ){
                            preds[j] = FindMaxIndex( rec );
                        }else{
                            Softmax( rec );
                            for( int k = 0; k < nclass; ++ k ){
                                preds[j + k * ndata] = rec[k];
                            }
                        }
                    }
                }
                if( prob == 0 ){
                    preds.resize( ndata );
                }
            }
            virtual const char* DefaultEvalMetric(void) {
                return "merror";
            }
        private:
            int nclass;
            int output_prob;
        };
    };
    namespace regrank{
        /*! \brief objective for lambda rank */
        class LambdaRankObj : public IObjFunction{
        public:
            LambdaRankObj(void){
                loss.loss_type = LossType::kLogisticRaw;
                fix_list_weight = 0.0f;
                num_pairsample = 1;
            }
            virtual ~LambdaRankObj(){}
            virtual void SetParam(const char *name, const char *val){
                if( !strcmp( "loss_type", name ) )       loss.loss_type = atoi( val );
                if( !strcmp( "fix_list_weight", name ) ) fix_list_weight = (float)atof( val );
                if( !strcmp( "num_pairsample", name ) )  num_pairsample = atoi( val );
            }
        public:
            virtual void GetGradient(const std::vector<float>& preds,  
                                     const DMatrix::Info &info,
                                     int iter,
                                     std::vector<float> &grad, 
                                     std::vector<float> &hess ) {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );              
                grad.resize(preds.size()); hess.resize(preds.size());
                const std::vector<unsigned> &gptr = info.group_ptr;
                utils::Assert( gptr.size() != 0 && gptr.back() == preds.size(), "rank loss must have group file" );
                const unsigned ngroup = static_cast<unsigned>( gptr.size() - 1 );
                #pragma omp parallel
                {
                    // parall construct, declare random number generator here, so that each 
                    // thread use its own random number generator, seed by thread id and current iteration
                    random::Random rnd; rnd.Seed( iter * 1111 + omp_get_thread_num() );
                    std::vector<LambdaPair> pairs;
                    std::vector<ListEntry>  lst;
                    std::vector< std::pair<float,unsigned> > rec;
                    #pragma omp for schedule(static)
                    for (unsigned k = 0; k < ngroup; ++k){
                        lst.clear(); pairs.clear(); 
                        for(unsigned j = gptr[k]; j < gptr[k+1]; ++j ){
                            lst.push_back( ListEntry(preds[j], info.labels[j], j ) );
                            grad[j] = hess[j] = 0.0f;
                        }                        
                        std::sort( lst.begin(), lst.end(), ListEntry::CmpPred );
                        rec.resize( lst.size() );
                        for( unsigned i = 0; i < lst.size(); ++i ){
                            rec[i] = std::make_pair( lst[i].label, i );
                        }
                        std::sort( rec.begin(), rec.end(), CmpFirst );
                        // enumerate buckets with same label, for each item in the lst, grab another sample randomly
                        for( unsigned i = 0; i < rec.size(); ){
                            unsigned j = i + 1;
                            while( j < rec.size() && rec[j].first == rec[i].first ) ++ j;
                            // bucket in [i,j), get a sample outside bucket
                            unsigned nleft = i, nright = rec.size() - j;
                            if( nleft + nright != 0 ){
                                int nsample = num_pairsample;
                                while( nsample -- ){
                                    for( unsigned pid = i; pid < j; ++ pid ){
                                        unsigned ridx = static_cast<unsigned>( rnd.RandDouble() * (nleft+nright) );
                                        if( ridx < nleft ){
                                            pairs.push_back( LambdaPair( rec[ridx].second, rec[pid].second ) );
                                        }else{
                                            pairs.push_back( LambdaPair( rec[pid].second, rec[ridx+j-i].second ) );
                                        }
                                    }      
                                }
                            }
                            i = j;
                        }
                        // get lambda weight for the pairs
                        this->GetLambdaWeight( lst, pairs );
                        // rescale each gradient and hessian so that the lst have constant weighted
                        float scale = 1.0f / num_pairsample;
                        if( fix_list_weight != 0.0f ){
                            scale *= fix_list_weight / (gptr[k+1] - gptr[k]);
                        }
                        for( size_t i = 0; i < pairs.size(); ++ i ){
                            const ListEntry &pos = lst[ pairs[i].pos_index ];
                            const ListEntry &neg = lst[ pairs[i].neg_index ];
                            const float w = pairs[i].weight * scale;
                            float p = loss.PredTransform( pos.pred - neg.pred );
                            float g = loss.FirstOrderGradient( p, 1.0f );
                            float h = loss.SecondOrderGradient( p, 1.0f );
                            // accumulate gradient and hessian in both pid, and nid, 
                            grad[ pos.rindex ] += g * w; 
                            grad[ neg.rindex ] -= g * w;
                            // take conservative update, scale hessian by 2
                            hess[ pos.rindex ] += 2.0f * h * w; 
                            hess[ neg.rindex ] += 2.0f * h * w;
                        }                       
                    }
                }
            }
            virtual const char* DefaultEvalMetric(void) {
                return "map";
            }
        private:
            // loss function
            LossType loss;
            // number of samples peformed for each instance
            int num_pairsample;            
            // fix weight of each elements in list
            float fix_list_weight;
        protected:
            /*! \brief helper information in a list */
            struct ListEntry{
                /*! \brief the predict score we in the data */
                float pred;
                /*! \brief the actual label of the entry */
                float label;
                /*! \brief row index in the data matrix */                
                unsigned rindex;
                // constructor
                ListEntry(float pred, float label, unsigned rindex): pred(pred),label(label),rindex(rindex){}
                // comparator by prediction
                inline static bool CmpPred(const ListEntry &a, const ListEntry &b){
                    return a.pred > b.pred;
                }
                // comparator by label
                inline static bool CmpLabel(const ListEntry &a, const ListEntry &b){
                    return a.label > b.label;
                }
            };
            /*! \brief a pair in the lambda rank */
            struct LambdaPair{
                /*! \brief positive index: this is a position in the list */
                unsigned pos_index;
                /*! \brief negative index: this is a position in the list */
                unsigned neg_index;
                /*! \brief weight to be filled in */
                float weight;
                LambdaPair( unsigned pos_index, unsigned neg_index ):pos_index(pos_index),neg_index(neg_index),weight(1.0f){}
            };            
            /*! 
             * \brief get lambda weight for existing pairs 
             * \param list a list that is sorted by pred score
             * \param pairs record of pairs, containing the pairs to fill in weights
             */
            virtual void GetLambdaWeight( const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs ) = 0;
        };
    };
    namespace regrank{
        class PairwiseRankObj: public LambdaRankObj{
        public:
            virtual ~PairwiseRankObj(void){}
            virtual void GetLambdaWeight( const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs ){}            
        };
    };
 };
 #endif
--- a/regrank/xgboost_regrank_utils.h
+++ b/regrank/xgboost_regrank_utils.h
@@ -0,0 +1,45 @@
 #ifndef XGBOOST_REGRANK_UTILS_H
 #define XGBOOST_REGRANK_UTILS_H
 /*!
 * \file xgboost_regrank_utils.h
 * \brief useful helper functions
 * \author Tianqi Chen, Kailong Chen
 */
 namespace xgboost{
    namespace regrank{
        // simple helper function to do softmax
        inline static void Softmax( std::vector<float>& rec ){
            float wmax = rec[0];
            for( size_t i = 1; i < rec.size(); ++ i ){
                wmax = std::max( rec[i], wmax );
            }
            double wsum = 0.0f;
            for( size_t i = 0; i < rec.size(); ++ i ){
                rec[i] = expf(rec[i]-wmax);
                    wsum += rec[i];
            }
            for( size_t i = 0; i < rec.size(); ++ i ){
                rec[i] /= static_cast<float>(wsum);
            }                
        }        
        // simple helper function to do softmax
        inline static int FindMaxIndex( std::vector<float>& rec ){
            size_t mxid = 0;
            for( size_t i = 1; i < rec.size(); ++ i ){
                if( rec[i] > rec[mxid]+1e-6f ){
                    mxid = i;
                }
            }
            return (int)mxid;
        }        
        inline static bool CmpFirst(const std::pair<float, unsigned> &a, const std::pair<float, unsigned> &b){
            return a.first > b.first;
        }
        inline static bool CmpSecond(const std::pair<float, unsigned> &a, const std::pair<float, unsigned> &b){
            return a.second > b.second;
        }
    };
 };
 #endif
--- a/regression/xgboost_reg.h
+++ b/regression/xgboost_reg.h
@@ -1,403 +0,0 @@
 #ifndef XGBOOST_REG_H
 #define XGBOOST_REG_H
 /*!
 * \file xgboost_reg.h
 * \brief class for gradient boosted regression
 * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
 */
 #include <cmath>
 #include <cstdlib>
 #include <cstring>
 #include "xgboost_reg_data.h"
 #include "xgboost_reg_eval.h"
 #include "../utils/xgboost_omp.h"
 #include "../booster/xgboost_gbmbase.h"
 #include "../utils/xgboost_utils.h"
 #include "../utils/xgboost_stream.h"
 namespace xgboost{
    namespace regression{
        /*! \brief class for gradient boosted regression */
        class RegBoostLearner{
        public:
            /*! \brief constructor */
            RegBoostLearner( void ){
                silent = 0; 
            }
            /*! 
            * \brief a regression booter associated with training and evaluating data 
            * \param train pointer to the training data
            * \param evals array of evaluating data
            * \param evname name of evaluation data, used print statistics
            */
            RegBoostLearner( const DMatrix *train,
                             const std::vector<DMatrix *> &evals,
                             const std::vector<std::string> &evname ){
                silent = 0;
                this->SetData(train,evals,evname);
            }
            /*! 
            * \brief associate regression booster with training and evaluating data 
            * \param train pointer to the training data
            * \param evals array of evaluating data
            * \param evname name of evaluation data, used print statistics
            */
            inline void SetData( const DMatrix *train,
                                 const std::vector<DMatrix *> &evals,
                                 const std::vector<std::string> &evname ){
                this->train_ = train;
                this->evals_ = evals;
                this->evname_ = evname; 
                // estimate feature bound
                int num_feature = (int)(train->data.NumCol());
                // assign buffer index
                unsigned buffer_size = static_cast<unsigned>( train->Size() );
                for( size_t i = 0; i < evals.size(); ++ i ){
                    buffer_size += static_cast<unsigned>( evals[i]->Size() );
                    num_feature = std::max( num_feature, (int)(evals[i]->data.NumCol()) );
                }
                char str_temp[25];
                if( num_feature > mparam.num_feature ){
                    mparam.num_feature = num_feature;
                    sprintf( str_temp, "%d", num_feature );
                    base_gbm.SetParam( "bst:num_feature", str_temp );
                }
                sprintf( str_temp, "%u", buffer_size );
                base_gbm.SetParam( "num_pbuffer", str_temp );
                if( !silent ){
                    printf( "buffer_size=%u\n", buffer_size );
                }
                // set eval_preds tmp sapce
                this->eval_preds_.resize( evals.size(), std::vector<float>() );
            }
            /*! 
            * \brief set parameters from outside 
            * \param name name of the parameter
            * \param val  value of the parameter
            */
            inline void SetParam( const char *name, const char *val ){
                if( !strcmp( name, "silent") )  silent = atoi( val );
                if( !strcmp( name, "eval_metric") )  evaluator_.AddEval( val );                
                mparam.SetParam( name, val );
                base_gbm.SetParam( name, val );
            }
            /*!
            * \brief initialize solver before training, called before training
            * this function is reserved for solver to allocate necessary space and do other preparation 
            */
            inline void InitTrainer( void ){
                base_gbm.InitTrainer();
                if( mparam.loss_type == kLogisticClassify ){
                    evaluator_.AddEval( "error" );
                }else{
                    evaluator_.AddEval( "rmse" );
                }
                evaluator_.Init();
            } 
            /*!
            * \brief initialize the current data storage for model, if the model is used first time, call this function
            */
            inline void InitModel( void ){
                base_gbm.InitModel();
                mparam.AdjustBase();
            }
            /*! 
            * \brief load model from stream
            * \param fi input stream
            */          
            inline void LoadModel( utils::IStream &fi ){
                base_gbm.LoadModel( fi );
                utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 );
            }
            /*! 
             * \brief DumpModel
             * \param fo text file 
             * \param fmap feature map that may help give interpretations of feature            
              * \param with_stats whether print statistics as well
             */            
            inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){
                base_gbm.DumpModel( fo, fmap, with_stats );
            }
            /*! 
             * \brief Dump path of all trees
             * \param fo text file 
             * \param data input data
             */
            inline void DumpPath( FILE *fo, const DMatrix &data ){
                base_gbm.DumpPath( fo, data.data );
            }
            /*! 
            * \brief save model to stream
            * \param fo output stream
            */
            inline void SaveModel( utils::IStream &fo ) const{
                base_gbm.SaveModel( fo );	
                fo.Write( &mparam, sizeof(ModelParam) );
            } 
            /*! 
             * \brief update the model for one iteration
             * \param iteration iteration number
             */
            inline void UpdateOneIter( int iter ){
                this->PredictBuffer( preds_, *train_, 0 );
                this->GetGradient( preds_, train_->labels, grad_, hess_ );
                std::vector<unsigned> root_index;
                base_gbm.DoBoost( grad_, hess_, train_->data, root_index );                
            }
            /*! 
             * \brief evaluate the model for specific iteration
             * \param iter iteration number
             * \param fo file to output log
             */            
            inline void EvalOneIter( int iter, FILE *fo = stderr ){
                fprintf( fo, "[%d]", iter );
                int buffer_offset = static_cast<int>( train_->Size() );
                for( size_t i = 0; i < evals_.size(); ++i ){
                    std::vector<float> &preds = this->eval_preds_[ i ];
                    this->PredictBuffer( preds, *evals_[i], buffer_offset);
                    evaluator_.Eval( fo, evname_[i].c_str(), preds, (*evals_[i]).labels );
                    buffer_offset += static_cast<int>( evals_[i]->Size() );
                }
                fprintf( fo,"\n" );
            }
            /*! \brief get prediction, without buffering */
            inline void Predict( std::vector<float> &preds, const DMatrix &data ){
                preds.resize( data.Size() );
                const unsigned ndata = static_cast<unsigned>( data.Size() );
                #pragma omp parallel for schedule( static )
                for( unsigned j = 0; j < ndata; ++ j ){
                    preds[j] = mparam.PredTransform
                        ( mparam.base_score + base_gbm.Predict( data.data, j, -1 ) );
                }
            }
        public:
            /*! 
             * \brief update the model for one iteration
             * \param iteration iteration number
             */
            inline void UpdateInteract( std::string action ){
                this->InteractPredict( preds_, *train_, 0 ); 
                int buffer_offset = static_cast<int>( train_->Size() );
                for( size_t i = 0; i < evals_.size(); ++i ){
                    std::vector<float> &preds = this->eval_preds_[ i ];                
                    this->InteractPredict( preds, *evals_[i], buffer_offset );
                    buffer_offset += static_cast<int>( evals_[i]->Size() );
                }
                if( action == "remove" ){
                    base_gbm.DelteBooster(); return;
                }
                this->GetGradient( preds_, train_->labels, grad_, hess_ );
                std::vector<unsigned> root_index;
                base_gbm.DoBoost( grad_, hess_, train_->data, root_index );
                this->InteractRePredict( *train_, 0 );
                buffer_offset = static_cast<int>( train_->Size() );
                for( size_t i = 0; i < evals_.size(); ++i ){
                    this->InteractRePredict( *evals_[i], buffer_offset );
                    buffer_offset += static_cast<int>( evals_[i]->Size() );
                }
            }
        private:
            /*! \brief get the transformed predictions, given data */
            inline void InteractPredict( std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset ){
                preds.resize( data.Size() );
                const unsigned ndata = static_cast<unsigned>( data.Size() );
                #pragma omp parallel for schedule( static )
                for( unsigned j = 0; j < ndata; ++ j ){                
                    preds[j] = mparam.PredTransform
                        ( mparam.base_score + base_gbm.InteractPredict( data.data, j, buffer_offset + j ) );
                }
            }
            /*! \brief repredict trial */
            inline void InteractRePredict( const DMatrix &data, unsigned buffer_offset ){
                const unsigned ndata = static_cast<unsigned>( data.Size() );
                #pragma omp parallel for schedule( static )
                for( unsigned j = 0; j < ndata; ++ j ){
                    base_gbm.InteractRePredict( data.data, j, buffer_offset + j );
                }
            }
        private:
            /*! \brief get the transformed predictions, given data */
            inline void PredictBuffer( std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset ){
                preds.resize( data.Size() );
                const unsigned ndata = static_cast<unsigned>( data.Size() );
                #pragma omp parallel for schedule( static )
                for( unsigned j = 0; j < ndata; ++ j ){                
                    preds[j] = mparam.PredTransform
                        ( mparam.base_score + base_gbm.Predict( data.data, j, buffer_offset + j ) );
                }
            }
            /*! \brief get the first order and second order gradient, given the transformed predictions and labels */
            inline void GetGradient( const std::vector<float> &preds, 
                                     const std::vector<float> &labels, 
                                     std::vector<float> &grad,
                                     std::vector<float> &hess ){
                grad.resize( preds.size() ); hess.resize( preds.size() );
                const unsigned ndata = static_cast<unsigned>( preds.size() );
                #pragma omp parallel for schedule( static )
                for( unsigned j = 0; j < ndata; ++ j ){
                    grad[j] = mparam.FirstOrderGradient( preds[j], labels[j] );
                    hess[j] = mparam.SecondOrderGradient( preds[j], labels[j] );
                }
            }
        private:
            enum LossType{
                kLinearSquare = 0,
                kLogisticNeglik = 1,
                kLogisticClassify = 2
            };
            /*! \brief training parameter for regression */
            struct ModelParam{
                /* \brief global bias */
                float base_score;
                /* \brief type of loss function */
                int loss_type;
                /* \brief number of features  */
                int num_feature;
                /*! \brief reserved field */
                int reserved[ 16 ];
                /*! \brief constructor */
                ModelParam( void ){
                    base_score = 0.5f;
                    loss_type  = 0;
                    num_feature = 0;
                    memset( reserved, 0, sizeof( reserved ) );
                }
                /*! 
                * \brief set parameters from outside 
                * \param name name of the parameter
                * \param val  value of the parameter
                */
                inline void SetParam( const char *name, const char *val ){
                    if( !strcmp("base_score", name ) )  base_score = (float)atof( val );
                    if( !strcmp("loss_type", name ) )   loss_type = atoi( val );
                    if( !strcmp("bst:num_feature", name ) ) num_feature = atoi( val );
                }
                /*! 
                * \brief adjust base_score
                */                
                inline void AdjustBase( void ){
                    if( loss_type == 1 || loss_type == 2 ){
                        utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" );
                        base_score = - logf( 1.0f / base_score - 1.0f );
                    }
                }
                /*! 
                * \brief transform the linear sum to prediction 
                * \param x linear sum of boosting ensemble
                * \return transformed prediction
                */
                inline float PredTransform( float x ){
                    switch( loss_type ){                        
                    case kLinearSquare: return x;
                    case kLogisticClassify:
                    case kLogisticNeglik: return 1.0f/(1.0f + expf(-x));
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }
                /*! 
                * \brief calculate first order gradient of loss, given transformed prediction
                * \param predt transformed prediction
                * \param label true label
                * \return first order gradient
                */
                inline float FirstOrderGradient( float predt, float label ) const{
                    switch( loss_type ){                        
                    case kLinearSquare: return predt - label;
                    case kLogisticClassify:
                    case kLogisticNeglik: return predt - label;
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }
                /*! 
                * \brief calculate second order gradient of loss, given transformed prediction
                * \param predt transformed prediction
                * \param label true label
                * \return second order gradient
                */
                inline float SecondOrderGradient( float predt, float label ) const{
                    switch( loss_type ){                        
                    case kLinearSquare: return 1.0f;
                    case kLogisticClassify:
                    case kLogisticNeglik: return predt * ( 1 - predt );
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }
                /*!
                 * \brief calculating the loss, given the predictions, labels and the loss type
                 * \param preds the given predictions
                 * \param labels the given labels
                 * \return the specified loss
                 */
                inline float Loss(const std::vector<float> &preds, const std::vector<float> &labels) const{
                    switch( loss_type ){
                    case kLinearSquare: return SquareLoss(preds,labels);
                    case kLogisticNeglik: 
                    case kLogisticClassify: return NegLoglikelihoodLoss(preds,labels);
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }
                /*!
                 * \brief calculating the square loss, given the predictions and labels
                 * \param preds the given predictions
                 * \param labels the given labels
                 * \return the summation of square loss
                 */
                inline float SquareLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
                    float ans = 0.0;
                    for(size_t i = 0; i < preds.size(); i++){
                        float dif = preds[i] - labels[i];
                        ans += dif * dif;
                    }
                    return ans;
                }
                /*!
                 * \brief calculating the square loss, given the predictions and labels
                 * \param preds the given predictions
                 * \param labels the given labels
                 * \return the summation of square loss
                 */
                inline float NegLoglikelihoodLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
                    float ans = 0.0;
                    for(size_t i = 0; i < preds.size(); i++)
                        ans -= labels[i] * logf(preds[i]) + ( 1 - labels[i] ) * logf(1 - preds[i]);
                    return ans;
                }
            };
        private:
            int silent;
            EvalSet evaluator_;
            booster::GBMBase base_gbm;
            ModelParam   mparam;
            const DMatrix *train_;
            std::vector<DMatrix *> evals_;
            std::vector<std::string> evname_;
            std::vector<unsigned> buffer_index_;
        private:
            std::vector<float> grad_, hess_, preds_;
            std::vector< std::vector<float> > eval_preds_;
        };
    }
 };
 #endif
--- a/regression/xgboost_reg_data.h
+++ b/regression/xgboost_reg_data.h
@@ -1,155 +0,0 @@
 #ifndef XGBOOST_REG_DATA_H
 #define XGBOOST_REG_DATA_H
 /*!
 * \file xgboost_reg_data.h
 * \brief input data structure for regression and binary classification task.
 *     Format:
 *        The data should contain each data instance in each line.
 *		  The format of line data is as below:
 *        label <nonzero feature dimension> [feature index:feature value]+
 * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
 */
 #include <cstdio>
 #include <vector>
 #include "../booster/xgboost_data.h"
 #include "../utils/xgboost_utils.h"
 #include "../utils/xgboost_stream.h"
 namespace xgboost{
    namespace regression{
        /*! \brief data matrix for regression content */
        struct DMatrix{
        public:
            /*! \brief maximum feature dimension */
            unsigned num_feature;
            /*! \brief feature data content */
            booster::FMatrixS data;
            /*! \brief label of each instance */
            std::vector<float> labels;
        public:
            /*! \brief default constructor */
            DMatrix( void ){}
            /*! \brief get the number of instances */
            inline size_t Size() const{
                return labels.size();
            }
            /*! 
            * \brief load from text file 
            * \param fname name of text data
            * \param silent whether print information or not
            */            
            inline void LoadText( const char* fname, bool silent = false ){
                data.Clear();
                FILE* file = utils::FopenCheck( fname, "r" );
                float label; bool init = true;
                char tmp[ 1024 ];
                std::vector<booster::bst_uint> findex;
                std::vector<booster::bst_float> fvalue;
                while( fscanf( file, "%s", tmp ) == 1 ){
                    unsigned index; float value;
                    if( sscanf( tmp, "%u:%f", &index, &value ) == 2 ){
                        findex.push_back( index ); fvalue.push_back( value );
                    }else{
                        if( !init ){
                            labels.push_back( label );
                            data.AddRow( findex, fvalue );
                        }
                        findex.clear(); fvalue.clear();
                        utils::Assert( sscanf( tmp, "%f", &label ) == 1, "invalid format" );
                        init = false;
                    }
                }
                labels.push_back( label );
                data.AddRow( findex, fvalue );
                // initialize column support as well
                data.InitData();
                if( !silent ){
                    printf("%ux%u matrix with %lu entries is loaded from %s\n", 
                           (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname );
                }
                fclose(file);
            }
            /*! 
            * \brief load from binary file 
            * \param fname name of binary data
            * \param silent whether print information or not
            * \return whether loading is success
            */
            inline bool LoadBinary( const char* fname, bool silent = false ){
                FILE *fp = fopen64( fname, "rb" );
                if( fp == NULL ) return false;                
                utils::FileStream fs( fp );
                data.LoadBinary( fs );
                labels.resize( data.NumRow() );
                utils::Assert( fs.Read( &labels[0], sizeof(float) * data.NumRow() ) != 0, "DMatrix LoadBinary" );
                fs.Close();
                // initialize column support as well
                data.InitData();
                if( !silent ){
                    printf("%ux%u matrix with %lu entries is loaded from %s\n", 
                           (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname );
                }
                return true;
            }
            /*! 
            * \brief save to binary file
            * \param fname name of binary data
            * \param silent whether print information or not
            */
            inline void SaveBinary( const char* fname, bool silent = false ){
                // initialize column support as well
                data.InitData();
                utils::FileStream fs( utils::FopenCheck( fname, "wb" ) );
                data.SaveBinary( fs );
                fs.Write( &labels[0], sizeof(float) * data.NumRow() );
                fs.Close();
                if( !silent ){
                    printf("%ux%u matrix with %lu entries is saved to %s\n", 
                           (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname );
                }
            }
            /*! 
            * \brief cache load data given a file name, if filename ends with .buffer, direct load binary
            *        otherwise the function will first check if fname + '.buffer' exists,
            *        if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file,
            *        and try to create a buffer file 
            * \param fname name of binary data
            * \param silent whether print information or not
            * \param savebuffer whether do save binary buffer if it is text
            */
            inline void CacheLoad( const char *fname, bool silent = false, bool savebuffer = true ){
                int len = strlen( fname );
                if( len > 8 && !strcmp( fname + len - 7,  ".buffer") ){
                    this->LoadBinary( fname, silent ); return;
                }
                char bname[ 1024 ];
                sprintf( bname, "%s.buffer", fname );
                if( !this->LoadBinary( bname, silent ) ){
                    this->LoadText( fname, silent );
                    if( savebuffer ) this->SaveBinary( bname, silent );
                }
            }
        private:
            /*! \brief update num_feature info */
            inline void UpdateInfo( void ){
                this->num_feature = 0;
                for( size_t i = 0; i < data.NumRow(); i ++ ){
                    booster::FMatrixS::Line sp = data[i];
                    for( unsigned j = 0; j < sp.len; j ++ ){
                        if( num_feature <= sp[j].findex ){
                            num_feature = sp[j].findex + 1;
                        }
                    }
                }
            }
        };
    };
 };
 #endif
--- a/regression/xgboost_reg_eval.h
+++ b/regression/xgboost_reg_eval.h
@@ -1,119 +0,0 @@
 #ifndef XGBOOST_REG_EVAL_H
 #define XGBOOST_REG_EVAL_H
 /*!
 * \file xgboost_reg_eval.h
 * \brief evaluation metrics for regression and classification
 * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
 */
 #include <cmath>
 #include <vector>
 #include <algorithm>
 #include "../utils/xgboost_utils.h"
 #include "../utils/xgboost_omp.h"
 namespace xgboost{
    namespace regression{
        /*! \brief evaluator that evaluates the loss metrics */
        struct IEvaluator{
            /*! 
             * \brief evaluate a specific metric 
             * \param preds prediction
             * \param labels label
             */
            virtual float Eval( const std::vector<float> &preds, 
                                const std::vector<float> &labels ) const= 0;
            /*! \return name of metric */
            virtual const char *Name( void ) const= 0;
        };
        /*! \brief RMSE */
        struct EvalRMSE : public IEvaluator{            
            virtual float Eval( const std::vector<float> &preds, 
                                const std::vector<float> &labels ) const{
                const unsigned ndata = static_cast<unsigned>( preds.size() );
                float sum = 0.0;
                #pragma omp parallel for reduction(+:sum) schedule( static )
                for( unsigned i = 0; i < ndata; ++ i ){
                    float diff = preds[i] - labels[i];
                    sum += diff * diff;
                } 
                return sqrtf( sum / ndata );
            }
            virtual const char *Name( void ) const{
                return "rmse";
            }
        };
        /*! \brief Error */
        struct EvalError : public IEvaluator{            
            virtual float Eval( const std::vector<float> &preds, 
                                const std::vector<float> &labels ) const{
                const unsigned ndata = static_cast<unsigned>( preds.size() );
                unsigned nerr = 0;
                #pragma omp parallel for reduction(+:nerr) schedule( static )
                for( unsigned i = 0; i < ndata; ++ i ){
                    if( preds[i] > 0.5f ){
                        if( labels[i] < 0.5f ) nerr += 1;
                    }else{
                        if( labels[i] > 0.5f ) nerr += 1;
                    }
                } 
                return static_cast<float>(nerr) / ndata;
            }
            virtual const char *Name( void ) const{
                return "error";
            }
        };
        /*! \brief Error */
        struct EvalLogLoss : public IEvaluator{            
            virtual float Eval( const std::vector<float> &preds, 
                                const std::vector<float> &labels ) const{
                const unsigned ndata = static_cast<unsigned>( preds.size() );
                unsigned nerr = 0;
                #pragma omp parallel for reduction(+:nerr) schedule( static )
                for( unsigned i = 0; i < ndata; ++ i ){
                    const float y = labels[i];
                    const float py = preds[i];
                    nerr -= y * std::log(py) + (1.0f-y)*std::log(1-py);
                } 
                return static_cast<float>(nerr) / ndata;
            }
            virtual const char *Name( void ) const{
                return "negllik";
            }
        };
    };
    namespace regression{
        /*! \brief a set of evaluators */
        struct EvalSet{
        public:
            inline void AddEval( const char *name ){                
                if( !strcmp( name, "rmse") ) evals_.push_back( &rmse_ );
                if( !strcmp( name, "error") ) evals_.push_back( &error_ );
                if( !strcmp( name, "logloss") ) evals_.push_back( &logloss_ );
            }
            inline void Init( void ){
                std::sort( evals_.begin(), evals_.end() );
                evals_.resize( std::unique( evals_.begin(), evals_.end() ) - evals_.begin() );
            }
            inline void Eval( FILE *fo, const char *evname,
                              const std::vector<float> &preds, 
                              const std::vector<float> &labels ) const{
                for( size_t i = 0; i < evals_.size(); ++ i ){
                    float res = evals_[i]->Eval( preds, labels );
                    fprintf( fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res ); 
                } 
            }
        private:
            EvalRMSE  rmse_;
            EvalError error_;
            EvalLogLoss logloss_;
            std::vector<const IEvaluator*> evals_;  
        };
    };
 };
 #endif
--- a/regression/xgboost_reg_main.cpp
+++ b/regression/xgboost_reg_main.cpp
@@ -1,280 +0,0 @@
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #include <ctime>
 #include <string>
 #include <cstring>
 #include "xgboost_reg.h"
 #include "../utils/xgboost_fmap.h"
 #include "../utils/xgboost_random.h"
 #include "../utils/xgboost_config.h"
 namespace xgboost{
    namespace regression{
        /*!
        * \brief wrapping the training process of the gradient boosting regression model,
        *   given the configuation
        * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.chen@gmail.com
        */
        class RegBoostTask{            
        public:
            inline int Run( int argc, char *argv[] ){
                if( argc < 2 ){
                    printf("Usage: <config>\n"); 
                    return 0;
                }
                utils::ConfigIterator itr( argv[1] );
                while( itr.Next() ){
                    this->SetParam( itr.name(), itr.val() );
                }
                for( int i = 2; i < argc; i ++ ){
                    char name[256], val[256];
                    if( sscanf( argv[i], "%[^=]=%s", name, val ) == 2 ){
                        this->SetParam( name, val );
                    }
                }
                this->InitData();
                this->InitLearner();
                if( task == "dump" ){
                    this->TaskDump();
                    return 0;
                }
                if( task == "interact" ){
                    this->TaskInteractive(); return 0;
                }
                if( task == "dumppath" ){
                    this->TaskDumpPath(); return 0;
                }
                if( task == "eval" ){
                    this->TaskEval(); return 0;
                }
                if( task == "pred" ){
                    this->TaskPred();
                }else{                  
                    this->TaskTrain();
                }
                return 0;
            }
            inline void SetParam( const char *name, const char *val ){
                if( !strcmp("silent", name ) )       silent = atoi( val );
                if( !strcmp("use_buffer", name ) )   use_buffer = atoi( val );
                if( !strcmp("seed", name ) )         random::Seed( atoi(val) );
                if( !strcmp("num_round", name ) )    num_round = atoi( val );
                if( !strcmp("save_period", name ) )  save_period = atoi( val );
                if( !strcmp("task", name ) )         task = val;
                if( !strcmp("data",  name ) )        train_path = val;
                if( !strcmp("test:data",  name ) )   test_path  = val;
                if( !strcmp("model_in",  name ) )    model_in   = val;
                if( !strcmp("model_out",  name ) )   model_out   = val;
                if( !strcmp("model_dir", name ) )    model_dir_path = val;
                if( !strcmp("fmap",  name ) )        name_fmap = val;
                if( !strcmp("name_dump",  name ) )   name_dump = val;
                if( !strcmp("name_dumppath",  name ) )   name_dumppath = val;
                if( !strcmp("name_pred",  name ) )   name_pred = val;
                if( !strcmp("dump_stats", name ) )   dump_model_stats = atoi( val );
                if( !strcmp("interact:action",  name ) )  interact_action = val;
                if( !strncmp("batch:",  name, 6 ) ){
                    cfg_batch.PushBack( name + 6, val );
                }
                if( !strncmp("eval[",  name, 5 ) ) {
                    char evname[ 256 ];
                    utils::Assert( sscanf( name, "eval[%[^]]", evname ) == 1, "must specify evaluation name for display");
                    eval_data_names.push_back( std::string( evname ) );
                    eval_data_paths.push_back( std::string( val ) );
                }
                cfg.PushBack( name, val );
            }
        public:
            RegBoostTask( void ){
                // default parameters
                silent = 0;
                use_buffer = 1;
                num_round = 10;
                save_period = 0;
                dump_model_stats = 0;
                task = "train";                
                model_in = "NULL";
                model_out = "NULL";
                name_fmap = "NULL";
                name_pred = "pred.txt";
                name_dump = "dump.txt";
                name_dumppath = "dump.path.txt";
                model_dir_path = "./";
                interact_action = "update";
            }
            ~RegBoostTask( void ){
                for( size_t i = 0; i < deval.size(); i ++ ){
                    delete deval[i];
                }
            }
        private:
            inline void InitData( void ){
                if( name_fmap != "NULL" ) fmap.LoadText( name_fmap.c_str() );
                if( task == "dump" ) return;
                if( task == "pred" || task == "dumppath" ){
                    data.CacheLoad( test_path.c_str(), silent!=0, use_buffer!=0 );
                }else{
                    // training 
                    data.CacheLoad( train_path.c_str(), silent!=0, use_buffer!=0 );
                    utils::Assert( eval_data_names.size() == eval_data_paths.size() );
                    for( size_t i = 0; i < eval_data_names.size(); ++ i ){
                        deval.push_back( new DMatrix() );
                        deval.back()->CacheLoad( eval_data_paths[i].c_str(), silent!=0, use_buffer!=0 );
                    }
                }
                learner.SetData( &data, deval, eval_data_names );
            }
            inline void InitLearner( void ){
                cfg.BeforeFirst();
                while( cfg.Next() ){
                    learner.SetParam( cfg.name(), cfg.val() );
                }
                if( model_in != "NULL" ){
                    utils::FileStream fi( utils::FopenCheck( model_in.c_str(), "rb") );
                    learner.LoadModel( fi );
                    fi.Close();
                }else{
                    utils::Assert( task == "train", "model_in not specified" );
                    learner.InitModel();
                }
                learner.InitTrainer();
            }
            inline void TaskTrain( void ){
                const time_t start    = time( NULL );
                unsigned long elapsed = 0;
                for( int i = 0; i < num_round; ++ i ){
                    elapsed = (unsigned long)(time(NULL) - start); 
                    if( !silent ) printf("boosting round %d, %lu sec elapsed\n", i , elapsed );
                    learner.UpdateOneIter( i );
                    learner.EvalOneIter( i );
                    if( save_period != 0 && (i+1) % save_period == 0 ){
                        this->SaveModel( i );
                    }
                    elapsed = (unsigned long)(time(NULL) - start); 
                }
                // always save final round
                if( save_period == 0 || num_round % save_period != 0 ){
                    if( model_out == "NULL" ){
                        this->SaveModel( num_round - 1 );
                    }else{
                        this->SaveModel( model_out.c_str() );
                    }
                }
                if( !silent ){
                    printf("\nupdating end, %lu sec in all\n", elapsed );
                }
            }
            inline void TaskEval( void ){
                learner.EvalOneIter( 0 );
            }
            inline void TaskInteractive( void ){
                const time_t start    = time( NULL );
                unsigned long elapsed = 0;
                int batch_action = 0;
                cfg_batch.BeforeFirst();
                while( cfg_batch.Next() ){
                    if( !strcmp( cfg_batch.name(), "run" ) ){
                        learner.UpdateInteract( interact_action );
                        batch_action += 1;
                    } else{
                        learner.SetParam( cfg_batch.name(), cfg_batch.val() );
                    }
                }
                if( batch_action == 0 ){
                    learner.UpdateInteract( interact_action );
                }
                utils::Assert( model_out != "NULL", "interactive mode must specify model_out" );
                this->SaveModel( model_out.c_str() );
                elapsed = (unsigned long)(time(NULL) - start); 
                if( !silent ){
                    printf("\ninteractive update, %d batch actions, %lu sec in all\n", batch_action, elapsed );
                }
            }
            inline void TaskDump( void ){
                FILE *fo = utils::FopenCheck( name_dump.c_str(), "w" );
                learner.DumpModel( fo, fmap, dump_model_stats != 0 );
                fclose( fo );
            }
            inline void TaskDumpPath( void ){
                FILE *fo = utils::FopenCheck( name_dumppath.c_str(), "w" );
                learner.DumpPath( fo, data );
                fclose( fo );
            }
            inline void SaveModel( const char *fname ) const{
                utils::FileStream fo( utils::FopenCheck( fname, "wb" ) );
                learner.SaveModel( fo );
                fo.Close();
            }
            inline void SaveModel( int i ) const{
                char fname[256];
                sprintf( fname ,"%s/%04d.model", model_dir_path.c_str(), i+1 );
                this->SaveModel( fname );
            }
            inline void TaskPred( void ){
                std::vector<float> preds;
                if( !silent ) printf("start prediction...\n");
                learner.Predict( preds, data );
                if( !silent ) printf("writing prediction to %s\n", name_pred.c_str() );
                FILE *fo = utils::FopenCheck( name_pred.c_str(), "w" );
                for( size_t i = 0; i < preds.size(); i ++ ){
                    fprintf( fo, "%f\n", preds[i] );
                }
                fclose( fo );                
            }
        private:
            /* \brief whether silent */
            int silent;
            /* \brief whether use auto binary buffer */
            int use_buffer;
            /* \brief number of boosting iterations */
            int num_round;            
            /* \brief the period to save the model, 0 means only save the final round model */
            int save_period;
            /*! \brief interfact action */
            std::string interact_action;
            /* \brief the path of training/test data set */
            std::string train_path, test_path;
            /* \brief the path of test model file, or file to restart training */
            std::string model_in;
            /* \brief the path of final model file, to be saved */
            std::string model_out;
            /* \brief the path of directory containing the saved models */
            std::string model_dir_path;
            /* \brief task to perform */
            std::string task;
            /* \brief name of predict file */
            std::string name_pred;
            /* \brief whether dump statistics along with model */
            int dump_model_stats;
            /* \brief name of feature map */
            std::string name_fmap;
            /* \brief name of dump file */
            std::string name_dump;
            /* \brief name of dump path file */
            std::string name_dumppath;
            /* \brief the paths of validation data sets */
            std::vector<std::string> eval_data_paths;            
            /* \brief the names of the evaluation data used in output log */
            std::vector<std::string> eval_data_names;            
            /*! \brief saves configurations */
            utils::ConfigSaver cfg;
            /*! \brief batch configurations */
            utils::ConfigSaver cfg_batch;
        private:
            DMatrix data;
            std::vector<DMatrix*> deval;
            utils::FeatMap fmap;
            RegBoostLearner learner;
        };
    };
 };
 int main( int argc, char *argv[] ){
    xgboost::random::Seed( 0 );
    xgboost::regression::RegBoostTask tsk;
    return tsk.Run( argc, argv );
 }
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -0,0 +1,26 @@
 export CC  = gcc
 export CXX = g++
 export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fopenmp
 # specify tensor path
 BIN = xgcombine_buffer
 OBJ = 
 .PHONY: clean all
 all: $(BIN) $(OBJ)
 export LDFLAGS= -pthread -lm 
 xgcombine_buffer : xgcombine_buffer.cpp
 $(BIN) : 
 	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
 $(OBJ) : 
 	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
 install:
 	cp -f -r $(BIN)  $(INSTALL_PATH)
 clean:
 	$(RM) $(OBJ) $(BIN) *~
--- a/tools/xgcombine_buffer.cpp
+++ b/tools/xgcombine_buffer.cpp
@@ -0,0 +1,248 @@
 /*!
 * a tool to combine different set of features into binary buffer
 * not well organized code, but does it's job
 * \author Tianqi Chen: tianqi.tchen@gmail.com
 */
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #include <cstdio>
 #include <cstring>
 #include <ctime>
 #include <cmath>
 #include "../regrank/xgboost_regrank_data.h"
 #include "../utils/xgboost_utils.h"
 using namespace xgboost;
 using namespace xgboost::booster;
 using namespace xgboost::regrank;
 // header in dataset
 struct Header{
    FILE *fi;
    int   tmp_num;
    int   base;
    int   num_feat;
    // whether it's dense format
    bool  is_dense;
 	bool  warned;
 	Header( void ){ this->warned = false; this->is_dense = false; }
 	inline void CheckBase( unsigned findex ){
 		if( findex >= (unsigned)num_feat && ! warned ) {
 			fprintf( stderr, "warning:some feature exceed bound, num_feat=%d\n", num_feat );
 			warned = true;
 		}
 	}
 };
 inline int norm( std::vector<Header> &vec, int base = 0 ){
    int n = base;
    for( size_t i = 0; i < vec.size(); i ++ ){
        if( vec[i].is_dense ) vec[i].num_feat = 1;
        vec[i].base = n; n += vec[i].num_feat;
    }
    return n;        
 }
 inline void vclose( std::vector<Header> &vec ){
    for( size_t i = 0; i < vec.size(); i ++ ){
        fclose( vec[i].fi );
    }
 }
 inline int readnum( std::vector<Header> &vec ){
    int n = 0;
    for( size_t i = 0; i < vec.size(); i ++ ){
        if( !vec[i].is_dense ){
            utils::Assert( fscanf( vec[i].fi, "%d", &vec[i].tmp_num ) == 1, "load num" );
            n += vec[i].tmp_num;
        }else{
            n ++;
        }
    }
    return n;        
 }
 inline void vskip( std::vector<Header> &vec ){
    for( size_t i = 0; i < vec.size(); i ++ ){
        if( !vec[i].is_dense ){
            utils::Assert( fscanf( vec[i].fi, "%*d%*[^\n]\n" ) >= 0 );
        }else{
            utils::Assert( fscanf( vec[i].fi, "%*f\n" ) >= 0 );
        }
    }
 }
 class DataLoader: public DMatrix{
 public:
    // whether to do node and edge feature renormalization
    int rescale;
    int linelimit;
 public:
    FILE *fp, *fwlist, *fgroup, *fweight;
    std::vector<Header> fheader;
    std::vector<FMatrixS::REntry> entry;
    DataLoader( void ){
        rescale = 0; 
        linelimit = -1;
        fp = NULL; fwlist = NULL; fgroup = NULL; fweight = NULL;
    }
 private:
    inline void Load( std::vector<unsigned> &findex, std::vector<float> &fvalue, std::vector<Header> &vec ){
        unsigned fidx; float fv;
        for( size_t i = 0; i < vec.size(); i ++ ){
            if( !vec[i].is_dense ) { 
                for( int j = 0; j < vec[i].tmp_num; j ++ ){
                    utils::Assert( fscanf ( vec[i].fi, "%u:%f", &fidx, &fv ) == 2, "Error when load feat" );  
                    vec[i].CheckBase( fidx );
                    fidx += vec[i].base;
                    findex.push_back( fidx ); fvalue.push_back( fv );
                }
            }else{
                utils::Assert( fscanf ( vec[i].fi, "%f", &fv ) == 1, "load feat" );  
                fidx = vec[i].base;
                findex.push_back( fidx ); fvalue.push_back( fv );
            }
        }
    }
    inline void DoRescale( std::vector<float> &vec ){
        double sum = 0.0;
        for( size_t i = 0; i < vec.size(); i ++ ){
            sum += vec[i] * vec[i];
        } 
        sum = sqrt( sum );
        for( size_t i = 0; i < vec.size(); i ++ ){
            vec[i] /= sum;
        } 
    }    
 public:    
    // basically we are loading all the data inside
    inline void Load( void ){
        this->data.Clear();
        float label, weight = 0.0f;
        unsigned ngleft = 0, ngacc = 0;
        if( fgroup != NULL ){
            info.group_ptr.clear(); 
            info.group_ptr.push_back(0);
        }
        while( fscanf( fp, "%f", &label ) == 1 ){            
            if( ngleft == 0 && fgroup != NULL ){
                utils::Assert( fscanf( fgroup, "%u", &ngleft ) == 1 );
            }
            if( fweight != NULL ){
                utils::Assert( fscanf( fweight, "%f", &weight ) == 1 );
            }
            ngleft -= 1; ngacc += 1;
            int pass = 1;
            if( fwlist != NULL ){
                utils::Assert( fscanf( fwlist, "%u", &pass ) ==1 );
            }
            if( pass == 0 ){
                vskip( fheader ); ngacc -= 1;
            }else{            
                const int nfeat = readnum( fheader );
                std::vector<unsigned> findex;
                std::vector<float> fvalue;
                // pairs 
                this->Load( findex, fvalue, fheader );
                utils::Assert( findex.size() == (unsigned)nfeat );
                if( rescale != 0 ) this->DoRescale( fvalue );
                // push back data :)
                this->info.labels.push_back( label );
                // push back weight if any
                if( fweight != NULL ){
                    this->info.weights.push_back( weight );                    
                }
                this->data.AddRow( findex, fvalue );
            }             
            if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){
                info.group_ptr.push_back( info.group_ptr.back() + ngacc );
                utils::Assert( info.group_ptr.back() == data.NumRow(), "group size must match num rows" );
                ngacc = 0;
            }
            // linelimit
            if( linelimit >= 0 ) {
                if( -- linelimit <= 0 ) break;
            }
        }
        if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){
            info.group_ptr.push_back( info.group_ptr.back() + ngacc );
            utils::Assert( info.group_ptr.back() == data.NumRow(), "group size must match num rows" );
        }
        this->data.InitData();
    }
 };
 const char *folder = "features";
 int main( int argc, char *argv[] ){
    if( argc < 3 ){
        printf("Usage:xgcombine_buffer <inname> <outname> [options] -f [features] -fd [densefeatures]\n"\
               "options: -rescale -linelimit -fgroup <groupfilename> -wlist <whitelistinstance>\n");
        return 0; 
    }
    DataLoader loader;
    time_t start = time( NULL );
    int mode = 0;
    for( int i = 3; i < argc; i ++ ){        
        if( !strcmp( argv[i], "-f") ){
            mode = 0; continue;
        }
        if( !strcmp( argv[i], "-fd") ){
            mode = 2; continue;
        }
        if( !strcmp( argv[i], "-rescale") ){
            loader.rescale = 1; continue;
        }
        if( !strcmp( argv[i], "-wlist") ){
            loader.fwlist = utils::FopenCheck( argv[ ++i ], "r" ); continue;
        }
        if( !strcmp( argv[i], "-fgroup") ){
            loader.fgroup = utils::FopenCheck( argv[ ++i ], "r" ); continue;
        }
        if( !strcmp( argv[i], "-fweight") ){
            loader.fweight = utils::FopenCheck( argv[ ++i ], "r" ); continue;
        }
        if( !strcmp( argv[i], "-linelimit") ){
            loader.linelimit = atoi( argv[ ++i ] ); continue;
        }
        char name[ 256 ];
        sprintf( name, "%s/%s.%s", folder, argv[1], argv[i] );
        Header h;
        h.fi = utils::FopenCheck( name, "r" );
        if( mode == 2 ){
            h.is_dense = true; h.num_feat = 1;
            loader.fheader.push_back( h );
        }else{
            utils::Assert( fscanf( h.fi, "%d", &h.num_feat ) == 1, "num feat" );
            switch( mode ){
            case 0: loader.fheader.push_back( h ); break;
            default: ;
            }             
        }
    }
    loader.fp = utils::FopenCheck( argv[1], "r" );
    printf("num_features=%d\n", norm( loader.fheader ) ); 
    printf("start creating buffer...\n");
    loader.Load();
    loader.SaveBinary( argv[2] );
    // close files
    fclose( loader.fp );
    if( loader.fwlist != NULL ) fclose( loader.fwlist );    
    if( loader.fgroup != NULL ) fclose( loader.fgroup );    
    vclose( loader.fheader );
    printf("all generation end, %lu sec used\n", (unsigned long)(time(NULL) - start) );    
    return 0;
 }
--- a/utils/xgboost_config.h
+++ b/utils/xgboost_config.h
@@ -94,7 +94,8 @@ namespace xgboost{
                    case '\"':
                        if (i == 0){
                            ParseStr(tok); ch_buf = fgetc(fi); return new_line;
-                        }else{
+                        }
                        else{
                            Error("token followed directly by string");
                        }
                    case '=':
@@ -102,7 +103,8 @@ namespace xgboost{
                            ch_buf = fgetc(fi);
                            tok[0] = '=';
                            tok[1] = '\0';
-                        }else{
+                        }
                        else{
                            tok[i] = '\0';
                        }
                        return new_line;
@@ -155,7 +157,8 @@ namespace xgboost{
                if (priority == 0){
                    names.push_back(std::string(name));
                    values.push_back(std::string(val));
-                }else{
+                }
                else{
                    names_high.push_back(std::string(name));
                    values_high.push_back(std::string(val));
                }
@@ -184,7 +187,8 @@ namespace xgboost{
                size_t i = idx - 1;
                if (i >= names.size()){
                    return names_high[i - names.size()].c_str();
-                }else{
+                }
                else{
                    return names[i].c_str();
                }
            }
@@ -197,7 +201,8 @@ namespace xgboost{
                size_t i = idx - 1;
                if (i >= values.size()){
                    return values_high[i - values.size()].c_str();
-                }else{
+                }
                else{
                    return values[i].c_str();
                }
            }
--- a/utils/xgboost_fmap.h
+++ b/utils/xgboost_fmap.h
@@ -31,8 +31,8 @@ namespace xgboost{
            /*! \brief load feature map from text format */
            inline void LoadText(FILE *fi){
                int fid;
-                char fname[256], ftype[256];
+                char fname[1256], ftype[1256];
-                while( fscanf( fi, "%d%s%s", &fid, fname, ftype ) == 3 ){
+                while (fscanf(fi, "%d\t%[^\t]\t%s\n", &fid, fname, ftype) == 3){
                    utils::Assert(fid == (int)names_.size(), "invalid fmap format");
                    names_.push_back(std::string(fname));
                    types_.push_back(GetType(ftype));
--- a/utils/xgboost_matrix_csr.h
+++ b/utils/xgboost_matrix_csr.h
@@ -50,7 +50,8 @@ namespace xgboost{
                if (!UseAcList){
                    rptr.clear();
                    rptr.resize(nrows + 1, 0);
-                }else{
+                }
                else{
                    Assert(nrows + 1 == rptr.size(), "rptr must be initialized already");
                    this->Cleanup();
                }
@@ -79,7 +80,8 @@ namespace xgboost{
                        rptr[i] = start;
                        start += rlen;
                    }
-                }else{
+                }
                else{
                    // case with active list
                    std::sort(aclist.begin(), aclist.end());
--- a/utils/xgboost_random.h
+++ b/utils/xgboost_random.h
@@ -88,7 +88,8 @@ namespace xgboost{
                    u = NextDouble();
                } while (u == 0.0);
                return SampleGamma(alpha + 1.0, beta) * pow(u, 1.0 / alpha);
-            } else {
+            }
            else {
                double d, c, x, v, u;
                d = alpha - 1.0 / 3.0;
                c = 1.0 / sqrt(9.0 * d);
@@ -126,6 +127,22 @@ namespace xgboost{
            Shuffle(&data[0], data.size());
        }
    };
    namespace random{
        /*! \brief random number generator with independent random number seed*/
        struct Random{
            /*! \brief set random number seed */
            inline void Seed( unsigned sd ){
                this->rseed = sd;
            }
            /*! \brief return a real number uniform in [0,1) */
            inline double RandDouble( void ){               
                return static_cast<double>( rand_r( &rseed ) ) / (static_cast<double>( RAND_MAX )+1.0);
            }
            // random number seed
            unsigned rseed;
        };
    };
 };
 #endif
--- a/utils/xgboost_utils.h
+++ b/utils/xgboost_utils.h
@@ -38,6 +38,7 @@ namespace xgboost{
    namespace utils{
        inline void Error(const char *msg){
            fprintf(stderr, "Error:%s\n", msg);
            fflush(stderr);
            exit(-1);
        }
@@ -58,6 +59,7 @@ namespace xgboost{
            FILE *fp = fopen64(fname, flag);
            if (fp == NULL){
                fprintf(stderr, "can not open file \"%s\" \n", fname);
                fflush(stderr);
                exit(-1);
            }
            return fp;
Author	SHA1	Message	Date
tqchen@graphlab.com	56b1a3301f	Merge branch 'master' of ssh://github.com/tqchen/xgboost	2014-08-15 13:36:56 -07:00
tqchen@graphlab.com	920f9f3565	save name_obj from now	2014-08-15 13:36:19 -07:00
Tianqi Chen	c1a868e7ff	Update README.md	2014-08-12 14:57:28 -07:00
Tianqi Chen	63c4025656	Update README.md	2014-08-12 14:57:05 -07:00
Tianqi Chen	4a622da67b	Update README.md	2014-08-12 14:56:51 -07:00
Tianqi Chen	b10efa2e4b	Update README.md	2014-08-12 14:56:12 -07:00
tqchen	0d6b977395	support for multiclass output prob	2014-08-01 11:21:17 -07:00
Tianqi Chen	ca4b3b7541	Update xgboost_regrank.h	2014-07-12 10:14:30 -07:00
Tianqi Chen	4a98205ef1	Merge pull request #16 from smly/minor-leak fix (trivial) leak in xgboost_regrank, Thanks for the fix	2014-07-12 09:58:07 -07:00
Kohei Ozaki	982d16b2b6	fix (trivial) leak in xgboost_regrank	2014-07-12 17:29:49 +09:00
tqchen	fde318716f	fix combine buffer	2014-05-25 16:46:03 -07:00
tqchen	094d0a4497	add rand seeds back	2014-05-25 10:18:04 -07:00
tqchen	d8b0edf133	ok	2014-05-25 10:15:57 -07:00
Tianqi Chen	bf5fcec8e8	change rank order output to follow kaggle convention	2014-05-25 10:08:38 -07:00
tqchen	278b788b34	make python random seed invariant in each round	2014-05-24 20:57:39 -07:00
tqchen	76c44072d1	fix sometimes python cachelist problem	2014-05-20 15:42:19 -07:00
tqchen	ccde443590	more clean demo	2014-05-20 08:33:35 -07:00
tqchen	cf710bfa59	fix bug in classification, scale_pos_weight initialization	2014-05-20 08:30:19 -07:00
tqchen	be2c3d299e	chg	2014-05-19 10:02:01 -07:00
Tianqi Chen	2eba59000a	Merge pull request #7 from jrings/master Compatibility with both Python 2(.7) and 3	2014-05-19 09:48:34 -07:00
Joerg Rings	a958fe8d52	Compatibility with both Python 2(.7) and 3	2014-05-19 11:23:53 -05:00
Tianqi Chen	96667b8bad	Merge pull request #6 from tqchen/dev Fix the bug in MAC	2014-05-17 11:07:42 -07:00
tqchen	95f4052aae	add omp flag back	2014-05-17 11:07:12 -07:00
tqchen	e9e3e0281d	use back g++	2014-05-17 11:06:36 -07:00
tqchen	c23d8c8b88	force handle as void_p, seems fix mac problem	2014-05-17 11:03:21 -07:00
Tianqi Chen	e59f4d5a18	Merge pull request #5 from tqchen/dev add return type for xgboost, don't know if it is mac problem. #4	2014-05-17 09:19:20 -07:00
tqchen	e267f4c5f9	add return type for xgboost, don't know if it is mac problem	2014-05-17 09:13:54 -07:00
Tianqi Chen	505e65ac08	Update README.md	2014-05-16 22:54:24 -07:00
Tianqi Chen	13fc48623e	Merge pull request #2 from tqchen/dev fix loss_type	2014-05-16 21:30:09 -07:00
tqchen	591a43ac0e	some cleanup	2014-05-16 21:29:14 -07:00
tqchen	5375ac5c23	fix for loss_type problem in outside reset base	2014-05-16 21:28:03 -07:00
tqchen	6930758294	Merge branch 'master' of ssh://github.com/tqchen/xgboost	2014-05-16 20:58:03 -07:00
tqchen	e09d6ab9de	chg	2014-05-16 20:57:54 -07:00
antinucleon	db4a100f6b	del	2014-05-17 03:57:38 +00:00
Tianqi Chen	495e37e0dc	Merge pull request #1 from tqchen/dev 2.0 version, lots of changes	2014-05-16 20:53:19 -07:00
Tianqi Chen	b56b34944e	Update README.md	2014-05-16 20:49:05 -07:00
tqchen	d4530b7a47	Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev	2014-05-16 20:46:18 -07:00
tqchen	334cf5de9b	add ignore	2014-05-16 20:46:08 -07:00
tqchen	004e8d811e	final check	2014-05-16 20:44:02 -07:00
Tianqi Chen	4baefd857e	Update README.md	2014-05-16 20:41:59 -07:00
Tianqi Chen	b52f01d61d	Update README.md	2014-05-16 20:41:43 -07:00
Tianqi Chen	35f9ef684a	Update README.md	2014-05-16 20:41:21 -07:00
Tianqi Chen	6f34096613	Update README.md	2014-05-16 20:41:05 -07:00
tqchen	31c5d7843f	Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev	2014-05-16 20:37:55 -07:00
tqchen	f60dbe299e	ok	2014-05-16 20:37:45 -07:00
yepyao	a77debc0c5	Merge branch 'dev' of https://github.com/tqchen/xgboost into dev	2014-05-17 11:36:12 +08:00
yepyao	dc2b9c86e6	small change	2014-05-17 11:35:43 +08:00
yepyao	73bc8c0de4	small change	2014-05-17 11:34:24 +08:00
tqchen	ad8eb21fcd	Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev	2014-05-16 20:29:17 -07:00
tqchen	416050d5c0	fix softmax	2014-05-16 20:28:07 -07:00
antinucleon	d5f6fba82d	chg	2014-05-16 21:27:37 -06:00
tqchen	23f4c41035	chg	2014-05-16 20:18:34 -07:00
Tianqi Chen	7ea988a76b	Update train.py	2014-05-16 20:16:10 -07:00
tqchen	d3c0ed14f3	multi class	2014-05-16 20:12:04 -07:00
antinucleon	2fcd875675	demo	2014-05-16 21:05:11 -06:00
antinucleon	615074efb6	Merge branch 'dev' of github.com:tqchen/xgboost into dev	2014-05-16 21:03:32 -06:00
Tianqi Chen	945b336fc6	Update README.md	2014-05-16 20:00:20 -07:00
antinucleon	8e8b8a8ee3	demo	2014-05-17 02:59:10 +00:00
antinucleon	42267807f5	demo	2014-05-16 20:57:42 -06:00
tqchen	df23464a20	do not need to dump in rank	2014-05-16 19:52:39 -07:00
tqchen	2ea8d9c511	Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev	2014-05-16 19:51:41 -07:00
tqchen	3206235a5e	before commit	2014-05-16 19:51:33 -07:00
yepyao	956fc09da0	small change	2014-05-17 10:50:15 +08:00
yepyao	da482500c7	Merge branch 'dev' of https://github.com/tqchen/xgboost into dev Conflicts: demo/rank/mq2008.conf demo/rank/runexp.sh regrank/xgboost_regrank_obj.h	2014-05-17 10:40:12 +08:00
yepyao	b19f2bfda8	fix small bug	2014-05-17 10:35:10 +08:00
tqchen	21b21e69de	add bing to author list	2014-05-16 19:33:59 -07:00
Tianqi Chen	b90d1dc92b	Update demo.py	2014-05-16 19:30:32 -07:00
tqchen	3429ab3447	chgs	2014-05-16 19:24:53 -07:00
tqchen	ebcce4a2bf	chg all settings to obj	2014-05-16 19:10:52 -07:00
tqchen	1839e6efe9	pre-release version	2014-05-16 18:49:02 -07:00
tqchen	9bc6e83afe	chg scripts	2014-05-16 18:46:43 -07:00
tqchen	fd2774e133	cleanup	2014-05-16 18:40:46 -07:00
tqchen	72d3a6a3cc	chg rank demo	2014-05-16 18:38:40 -07:00
tqchen	5febbecd88	Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev	2014-05-16 18:29:37 -07:00
tqchen	b3c3ecd9c9	chng few things	2014-05-16 18:25:01 -07:00
tqchen	c28a1be34c	minor changes	2014-05-16 18:19:57 -07:00
antinucleon	ae70b9b152	new speed test	2014-05-16 18:05:17 -06:00
antinucleon	e0a0343ae6	speedtest	2014-05-16 17:48:03 -06:00
yepyao	0e0d3efd6a	use ndcg@all in lambdarank for ndcg	2014-05-16 23:06:24 +08:00
yepyao	a3bd5000ba	small change	2014-05-16 21:20:41 +08:00
yepyao	dd71c0e070	Download data set from web site	2014-05-16 21:18:32 +08:00
kalenhaha	d9ea324057	Impement new Lambda rank interface	2014-05-16 20:42:46 +08:00
tqchen	0d29610c40	new lambda rank interface	2014-05-16 00:02:26 -07:00
Bing Xu	0af2c92d3b	Update README.md	2014-05-16 01:30:29 -04:00
tqchen	f9cdce077b	ok	2014-05-15 21:17:17 -07:00
tqchen	59183b9ed8	a correct version	2014-05-15 21:11:46 -07:00
tqchen	6ff272eec6	fix numpy convert	2014-05-15 20:28:34 -07:00
tqchen	c8073e13e4	ok	2014-05-15 20:05:22 -07:00
tqchen	698fa87bc3	ok	2014-05-15 18:56:28 -07:00
tqchen	8f56671901	bug fix in pairwise rank	2014-05-15 15:37:58 -07:00
tqchen	9ea9a7a01e	cleanup code	2014-05-15 15:01:41 -07:00
tqchen	d59940f1d5	add xgcombine_buffer with weights	2014-05-15 14:41:11 -07:00
tqchen	6aa190e10c	change data format to include weight in binary file, add get weight to python	2014-05-15 14:37:56 -07:00
tqchen	54c486bcf1	ok	2014-05-15 14:25:44 -07:00
tqchen	88ff293de5	add ams	2014-05-14 23:23:27 -07:00
tqchen	50af92e29e	some fix	2014-05-14 16:55:59 -07:00
tqchen	bbe4957cd2	add AMS metric	2014-05-14 11:30:45 -07:00
kalenhaha	789ad18d36	add in grad and hess rescale in lambdarank	2014-05-14 23:13:27 +08:00
kalenhaha	2b34d5a25e	small bug in ndcg eval	2014-05-13 14:30:42 +08:00
kalenhaha	bd574e4967	Merge branch 'dev' of https://github.com/tqchen/xgboost into dev	2014-05-12 22:22:32 +08:00
kalenhaha	e8d81c1da5	Add LETOR MQ2008 for rank demo	2014-05-12 22:21:07 +08:00
kalenhaha	c84bbc91d1	remove sampler	2014-05-11 14:31:57 +08:00
kalenhaha	61e3d1562c	small change	2014-05-11 14:25:30 +08:00
kalenhaha	97db8c29f2	small change	2014-05-11 14:03:21 +08:00
tqchen	f2552f8ef2	simple chgs	2014-05-09 20:39:15 -07:00
kalenhaha	2563b6d2d6	fix some warnings	2014-05-09 14:14:43 +08:00
kalenhaha	e90ffece67	Merge branch 'dev' of https://github.com/tqchen/xgboost into dev	2014-05-09 14:07:06 +08:00
kalenhaha	85f92681f9	Separating Lambda MAP and Lambda NDCG	2014-05-09 14:05:52 +08:00
tqchen	5e0d52cb8c	add python o3	2014-05-08 20:15:23 -07:00
tqchen	c9d156d99e	faster convert to numpy array	2014-05-08 19:35:06 -07:00
tqchen	ecf6e8f49f	commit the fix	2014-05-08 19:31:32 -07:00
tqchen	93778aa4aa	Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev	2014-05-07 12:00:17 -07:00
tqchen	f8cacc7308	fix omp for bug in obj	2014-05-07 11:52:12 -07:00
kalenhaha	c0e1e9fe7a	Merge branch 'dev' of https://github.com/tqchen/xgboost into dev Conflicts: regrank/xgboost_regrank_obj.hpp	2014-05-07 22:15:59 +08:00
tqchen	fa5afe2141	fix	2014-05-06 16:53:37 -07:00
tqchen	f7789ecf14	Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev	2014-05-06 16:51:18 -07:00
tqchen	a57fbe091a	Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev Conflicts: regrank/xgboost_regrank_data.h	2014-05-06 16:51:11 -07:00
tqchen	9f82b53366	add regrank utils	2014-05-06 16:50:46 -07:00
tqchen	248b2cf74d	right group size	2014-05-06 16:49:10 -07:00
tqchen	5fb9376af4	add cutomized training	2014-05-04 13:57:10 -07:00
tqchen	9c2bb12cd1	add cutomized training	2014-05-04 13:55:58 -07:00
tqchen	ebde99bde8	add boost group support to xgboost. now have beta multi-class classification	2014-05-04 12:10:03 -07:00
kalenhaha	ef7be5398d	c++11 features removed	2014-05-04 16:58:44 +08:00
kalenhaha	2ef61bf982	c++11 features removed	2014-05-04 16:56:57 +08:00
tqchen	d4d141347a	fix	2014-05-04 00:09:16 -07:00
tqchen	e18ba04751	add interact mode	2014-05-03 23:24:22 -07:00
tqchen	3388d1a8b5	add python interface for xgboost	2014-05-03 23:04:02 -07:00
tqchen	65917bb831	finish python lib	2014-05-03 22:18:25 -07:00
tqchen	140499ac9e	finish matrix	2014-05-03 17:12:25 -07:00
tqchen	ccd037292d	good	2014-05-03 16:15:44 -07:00
tqchen	59939d0b14	ok	2014-05-03 14:24:00 -07:00
tqchen	9a2c00554d	important change to regrank interface, need some more test	2014-05-03 14:20:27 -07:00
tqchen	ee30c1728b	try python	2014-05-03 10:54:08 -07:00
tqchen	8f75b0ef75	pass test	2014-05-02 18:04:45 -07:00
tqchen	3128e718e2	add new combine tool as promised	2014-05-02 12:55:34 -07:00
tqchen	657c617215	Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev	2014-05-01 11:01:05 -07:00
tqchen	439d4725a0	cleanup of evaluation metric, move c++11 codes into sample.h for backup, add lambda in a clean way latter	2014-05-01 11:00:50 -07:00
Tianqi Chen	8491bb3651	Update xgboost_omp.h	2014-05-01 10:16:05 -07:00
kalenhaha	cce96e8f41	fix some bugs in linux	2014-05-02 00:16:12 +08:00
kalenhaha	f02dd68713	lambda rank added	2014-05-01 22:17:26 +08:00
tqchen	ec14d32756	add softmax	2014-04-30 22:11:26 -07:00
tqchen	38577d45b0	add pre @ n	2014-04-30 22:00:53 -07:00
tqchen	ab0e7a3ddc	use omp parallel sortting	2014-04-30 09:48:41 -07:00
tqchen	bbd952a021	add rank	2014-04-30 09:32:42 -07:00
tqchen	77e3051b1d	add pairwise rank first version	2014-04-29 21:12:30 -07:00
tqchen	924e164c14	new AUC code	2014-04-29 17:26:58 -07:00
tqchen	25ff5ef169	new AUC evaluator, now compatible with weighted loss	2014-04-29 17:03:34 -07:00
tqchen	3ea29eccae	make regression module compatible with rank loss, now support weighted loss	2014-04-29 16:16:02 -07:00
tqchen	0f8a3d21a5	chg fmap format	2014-04-29 09:59:10 -07:00
tqchen	7487c2f668	add auc evaluation metric	2014-04-24 22:20:40 -07:00
tqchen	88787b8573	remove unwanted private field	2014-04-21 10:42:19 -07:00
tqchen	17559a90f9	expose fmatrixs	2014-04-18 18:18:19 -07:00
tqchen	24696071a8	Merge branch 'master' of ssh://github.com/tqchen/xgboost Conflicts: regression/xgboost_reg_data.h	2014-04-18 17:46:44 -07:00
tqchen	cca67af8d7	simplify data	2014-04-18 17:43:44 -07:00
kalenhaha	2beb92745f	Lambda rank added	2014-04-11 10:50:13 +08:00
kalenhaha	d6b582dc70	Merge branch 'master' of https://github.com/tqchen/xgboost	2014-04-11 10:48:45 +08:00
kalenhaha	218320daf2	Lambda rank added	2014-04-10 22:11:15 +08:00
kalenhaha	f83942d3e9	lambda rank added	2014-04-10 22:09:19 +08:00
Tianqi Chen	60d79eb2e7	Update xgboost_utils.h	2014-04-07 16:25:21 -07:00
kalenhaha	1136c71e64	rank pass toy	2014-04-07 23:25:35 +08:00
tqchen	1bbbb0cf7f	add deleted main back	2014-04-06 09:32:27 -07:00
kalenhaha	1756fde0c6	small fix	2014-04-06 22:54:41 +08:00
kalenhaha	7f30fc1468	compiled	2014-04-06 22:51:52 +08:00
tqchen	d5607fbb55	add dev	2014-04-04 10:42:13 -07:00
kalenhaha	05d984d83d	pairwise ranking implemented	2014-04-05 00:14:55 +08:00
kalenhaha	1110ae7421	Adding ranking task	2014-04-03 16:22:55 +08:00
		`@@ -0,0 +1,3 @@`
							`python wrapper for xgboost using ctypes`

							`see example for usage`
		`@@ -0,0 +1,3 @@`
							`example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format`

							`for usage: see demo.py and comments in demo.py`