Merge branch 'dev' of https://github.com/tqchen/xgboost into dev

Conflicts: regrank/xgboost_regrank_obj.hpp
2014-05-07 22:15:59 +08:00
parent 8eae8d956d 833cf29867
commit 8b3fc78999
17 changed files with 8756 additions and 109 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,4 @@
 *buffer
 *model
 xgboost
 *pyc
--- a/booster/xgboost_gbmbase.h
+++ b/booster/xgboost_gbmbase.h
@@ -88,8 +88,8 @@ namespace xgboost{
                    }
                }
                if (mparam.num_pbuffer != 0){
-                    pred_buffer.resize(mparam.num_pbuffer);
+                    pred_buffer.resize(mparam.PredBufferSize());
-                    pred_counter.resize(mparam.num_pbuffer);
+                    pred_counter.resize(mparam.PredBufferSize());
                    utils::Assert(fi.Read(&pred_buffer[0], pred_buffer.size()*sizeof(float)) != 0);
                    utils::Assert(fi.Read(&pred_counter[0], pred_counter.size()*sizeof(unsigned)) != 0);
                }
@@ -117,8 +117,8 @@ namespace xgboost{
             */
            inline void InitModel(void){
                pred_buffer.clear(); pred_counter.clear();
-                pred_buffer.resize(mparam.num_pbuffer, 0.0);
+                pred_buffer.resize(mparam.PredBufferSize(), 0.0);
-                pred_counter.resize(mparam.num_pbuffer, 0);
+                pred_counter.resize(mparam.PredBufferSize(), 0);
                utils::Assert(mparam.num_boosters == 0);
                utils::Assert(boosters.size() == 0);
            }
@@ -130,6 +130,7 @@ namespace xgboost{
                if (tparam.nthread != 0){
                    omp_set_num_threads(tparam.nthread);
                }
                if (mparam.num_booster_group == 0) mparam.num_booster_group = 1;
                // make sure all the boosters get the latest parameters
                for (size_t i = 0; i < this->boosters.size(); i++){
                    this->ConfigBooster(this->boosters[i]);
@@ -175,12 +176,14 @@ namespace xgboost{
             * \param feats features of each instance
             * \param root_index pre-partitioned root index of each instance,
             *          root_index.size() can be 0 which indicates that no pre-partition involved
             * \param bst_group which booster group it belongs to, by default, we only have 1 booster group, and leave this parameter as default
             */
            inline void DoBoost(std::vector<float> &grad,
                                std::vector<float> &hess,
                                const booster::FMatrixS &feats,
-                                const std::vector<unsigned> &root_index) {
+                                const std::vector<unsigned> &root_index,
-                booster::IBooster *bst = this->GetUpdateBooster();
+                                int bst_group = 0 ) {
                booster::IBooster *bst = this->GetUpdateBooster( bst_group );
                bst->DoBoost(grad, hess, feats, root_index);
            }
            /*!
@@ -190,26 +193,30 @@ namespace xgboost{
             * \param row_index  row index in the feature matrix
             * \param buffer_index the buffer index of the current feature line, default -1 means no buffer assigned
             * \param root_index root id of current instance, default = 0
             * \param bst_group booster group index 
             * \return prediction
             */
-            inline float Predict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){
+            inline float Predict(const FMatrixS &feats, bst_uint row_index, 
-                size_t istart = 0;
+                                 int buffer_index = -1, unsigned root_index = 0, int bst_group = 0 ){
                size_t itop = 0;
                float  psum = 0.0f;
                const int bid = mparam.BufferOffset(buffer_index, bst_group);
                // load buffered results if any
-                if (mparam.do_reboost == 0 && buffer_index >= 0){
+                if (mparam.do_reboost == 0 && bid >= 0){
-                    utils::Assert(buffer_index < mparam.num_pbuffer, "buffer index exceed num_pbuffer");
+                    itop = this->pred_counter[bid];
-                    istart = this->pred_counter[buffer_index];
+                    psum = this->pred_buffer[bid];
                    psum = this->pred_buffer[buffer_index];
                }
-                for (size_t i = istart; i < this->boosters.size(); i++){
+                for (size_t i = itop; i < this->boosters.size(); ++i ){
                    if( booster_info[i] == bst_group ){
                        psum += this->boosters[i]->Predict(feats, row_index, root_index);
                    }
                }
                // updated the buffered results
-                if (mparam.do_reboost == 0 && buffer_index >= 0){
+                if (mparam.do_reboost == 0 && bid >= 0){
-                    this->pred_counter[buffer_index] = static_cast<unsigned>(boosters.size());
+                    this->pred_counter[bid] = static_cast<unsigned>(boosters.size());
-                    this->pred_buffer[buffer_index] = psum;
+                    this->pred_buffer[bid] = psum;
                }
                return psum;
            }
@@ -217,6 +224,11 @@ namespace xgboost{
            inline int NumBoosters(void) const{
                return mparam.num_boosters;
            }
            /*! \return number of booster groups */
            inline int NumBoosterGroup(void) const{
                if( mparam.num_booster_group == 0 ) return 1;
                return mparam.num_booster_group;
            }
        public:
            //--------trial code for interactive update an existing booster------
            //-------- usually not needed, ignore this region ---------
@@ -224,14 +236,17 @@ namespace xgboost{
             * \brief same as Predict, but removes the prediction of booster to be updated
             *        this function must be called once and only once for every data with pbuffer
             */
-            inline float InteractPredict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){
+            inline float InteractPredict(const FMatrixS &feats, bst_uint row_index, 
                                         int buffer_index = -1, unsigned root_index = 0, int bst_group = 0){
                float psum = this->Predict(feats, row_index, buffer_index, root_index);
                if (tparam.reupdate_booster != -1){
                    const int bid = tparam.reupdate_booster;
                    utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
                    if( bst_group == booster_info[bid] ){
                        psum -= boosters[bid]->Predict(feats, row_index, root_index);
                    }
                    if (mparam.do_reboost == 0 && buffer_index >= 0){
-                        this->pred_buffer[buffer_index] = psum;
+                        this->pred_buffer[mparam.BufferOffset(buffer_index,bst_group)] = psum;
                    }
                }
                return psum;
@@ -247,14 +262,20 @@ namespace xgboost{
                }
                boosters.resize(mparam.num_boosters -= 1);
                booster_info.resize(boosters.size());                
                // update pred counter
                for( size_t i = 0; i < pred_counter.size(); ++ i ){
                    if( pred_counter[i] > (unsigned)bid ) pred_counter[i] -= 1;                    
                }
            }
            /*! \brief update the prediction buffer, after booster have been updated */
-            inline void InteractRePredict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){
+            inline void InteractRePredict(const FMatrixS &feats, bst_uint row_index, 
                                          int buffer_index = -1, unsigned root_index = 0, int bst_group = 0 ){
                if (tparam.reupdate_booster != -1){
                    const int bid = tparam.reupdate_booster;
                    if( booster_info[bid]  != bst_group ) return;
                    utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
                    if (mparam.do_reboost == 0 && buffer_index >= 0){
-                        this->pred_buffer[buffer_index] += boosters[bid]->Predict(feats, row_index, root_index);
+                        this->pred_buffer[mparam.BufferOffset(buffer_index,bst_group)] += boosters[bid]->Predict(feats, row_index, root_index);
                    }
                }
            }
@@ -278,18 +299,19 @@ namespace xgboost{
             * \brief get a booster to update
             * \return the booster created
             */
-            inline booster::IBooster *GetUpdateBooster(void){
+            inline booster::IBooster *GetUpdateBooster(int bst_group){
                if (tparam.reupdate_booster != -1){
                    const int bid = tparam.reupdate_booster;
                    utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
                    this->ConfigBooster(boosters[bid]);
                    utils::Assert( bst_group == booster_info[bid], "booster group must match existing reupdate booster");
                    return boosters[bid];
                }
                if (mparam.do_reboost == 0 || boosters.size() == 0){
                    mparam.num_boosters += 1;
                    boosters.push_back(booster::CreateBooster<FMatrixS>(mparam.booster_type));
-                    booster_info.push_back(0);
+                    booster_info.push_back(bst_group);
                    this->ConfigBooster(boosters.back());
                    boosters.back()->InitModel();
                }
@@ -316,8 +338,13 @@ namespace xgboost{
                 *        set to 1 for linear booster, so that regularization term can be considered
                 */
                int do_reboost;
                /*! 
                 * \brief number of booster group, how many predictions a single 
                 *        input instance could corresponds to
                 */
                int num_booster_group;
                /*! \brief reserved parameters */
-                int reserved[32];
+                int reserved[31];
                /*! \brief constructor */
                ModelParam(void){
                    num_boosters = 0;
@@ -325,6 +352,7 @@ namespace xgboost{
                    num_roots = num_feature = 0;
                    do_reboost = 0;
                    num_pbuffer = 0;
                    num_booster_group = 1;
                    memset(reserved, 0, sizeof(reserved));
                }
                /*!
@@ -340,9 +368,20 @@ namespace xgboost{
                    }
                    if (!strcmp("num_pbuffer", name))       num_pbuffer = atoi(val);
                    if (!strcmp("do_reboost", name))        do_reboost = atoi(val);
                    if (!strcmp("num_booster_group", name)) num_booster_group = atoi(val);
                    if (!strcmp("bst:num_roots", name))     num_roots = atoi(val);
                    if (!strcmp("bst:num_feature", name))   num_feature = atoi(val);
                }
                inline int PredBufferSize(void) const{
                    if (num_booster_group == 0) return num_pbuffer;
                    else return num_booster_group * num_pbuffer;
                }
                inline int BufferOffset( int buffer_index, int bst_group ) const{
                    if( buffer_index < 0 ) return -1;
                    utils::Assert( buffer_index < num_pbuffer, "buffer_indexexceed num_pbuffer" ); 
                    return buffer_index + num_pbuffer * bst_group;
                }
            };
            /*! \brief training parameters */
            struct TrainParam{
--- a/python/README.md
+++ b/python/README.md
@@ -1,2 +1,4 @@
 beta version:
 python wrapper for xgboost using ctypes
 see example for usage
--- a/python/example/README.md
+++ b/python/example/README.md
@@ -0,0 +1,3 @@
 example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format
 for usage: see demo.py and comments in demo.py
--- a/python/example/agaricus.txt.test
+++ b/python/example/agaricus.txt.test
--- a/python/example/agaricus.txt.train
+++ b/python/example/agaricus.txt.train
--- a/python/example/demo.py
+++ b/python/example/demo.py
@@ -0,0 +1,101 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 import scipy.sparse
 # append the path to xgboost
 sys.path.append('../')
 import xgboost as xgb
 ### simple example
 # load file from text file, also binary buffer generated by xgboost
 dtrain = xgb.DMatrix('agaricus.txt.train')
 dtest = xgb.DMatrix('agaricus.txt.test')
 # specify parameters via map, definition are same as c++ version
 param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'loss_type':2 }
 # specify validations set to watch performance
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 num_round = 2
 bst = xgb.train( param, dtrain, num_round, evallist )
 # this is prediction
 preds = bst.predict( dtest )
 labels = dtest.get_label()
 print 'error=%f' % (  sum(1 for i in xrange(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))
 bst.save_model('0001.model')
 # dump model
 bst.dump_model('dump.raw.txt')
 # dump model with feature map
 bst.dump_model('dump.raw.txt','featmap.txt')
 # beta: interact mode
 bst.set_param('bst:interact:expand',4)
 bst.update_interact( dtrain, 'update', 0)
 bst.dump_model('dump.raw2.txt')
 ###
 # build dmatrix in python iteratively
 #
 print 'start running example of build DMatrix in python'
 dtrain = xgb.DMatrix()
 labels = []
 for l in open('agaricus.txt.train'):
    arr = l.split()
    labels.append( int(arr[0]))
    feats = []
    for it in arr[1:]:
        k,v = it.split(':')
        feats.append( (int(k), float(v)) )
    dtrain.add_row( feats )
 dtrain.set_label( labels )
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
 ###
 # build dmatrix from scipy.sparse
 print 'start running example of build DMatrix from scipy.sparse'
 labels = []
 row = []; col = []; dat = []
 i = 0
 for l in open('agaricus.txt.train'):
    arr = l.split()
    labels.append( int(arr[0]))
    for it in arr[1:]:
        k,v = it.split(':')
        row.append(i); col.append(int(k)); dat.append(float(v))
    i += 1
 csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
 dtrain = xgb.DMatrix( csr )
 dtrain.set_label(labels)
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
 print 'start running example of build DMatrix from numpy array'
 # NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
 npymat = csr.todense()
 dtrain = xgb.DMatrix( npymat )
 dtrain.set_label(labels)
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
 ###
 # cutomsized loss function, set loss_type to 0, so that predict get untransformed score
 # 
 print 'start running example to used cutomized objective function'
 # note: set loss_type properly, loss_type=2 means the prediction will get logistic transformed
 #       in most case, we may want to set loss_type = 0, to get untransformed score to compute gradient
 bst = param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'loss_type':2 }
 # user define objective function, given prediction, return gradient and second order gradient
 def logregobj( preds, dtrain ):
    labels = dtrain.get_label()
    grad = preds - labels
    hess = preds * (1.0-preds)
    return grad, hess
 # training with customized objective, we can also do step by step training, simply look at xgboost.py's implementation of train
 bst = xgb.train( param, dtrain, num_round, evallist, logregobj )
--- a/python/example/featmap.txt
+++ b/python/example/featmap.txt
@@ -0,0 +1,126 @@
 0	cap-shape=bell	i
 1	cap-shape=conical	i
 2	cap-shape=convex	i
 3	cap-shape=flat	i
 4	cap-shape=knobbed	i
 5	cap-shape=sunken	i
 6	cap-surface=fibrous	i
 7	cap-surface=grooves	i
 8	cap-surface=scaly	i
 9	cap-surface=smooth	i
 10	cap-color=brown	i
 11	cap-color=buff	i
 12	cap-color=cinnamon	i
 13	cap-color=gray	i
 14	cap-color=green	i
 15	cap-color=pink	i
 16	cap-color=purple	i
 17	cap-color=red	i
 18	cap-color=white	i
 19	cap-color=yellow	i
 20	bruises?=bruises	i
 21	bruises?=no	i
 22	odor=almond	i
 23	odor=anise	i
 24	odor=creosote	i
 25	odor=fishy	i
 26	odor=foul	i
 27	odor=musty	i
 28	odor=none	i
 29	odor=pungent	i
 30	odor=spicy	i
 31	gill-attachment=attached	i
 32	gill-attachment=descending	i
 33	gill-attachment=free	i
 34	gill-attachment=notched	i
 35	gill-spacing=close	i
 36	gill-spacing=crowded	i
 37	gill-spacing=distant	i
 38	gill-size=broad	i
 39	gill-size=narrow	i
 40	gill-color=black	i
 41	gill-color=brown	i
 42	gill-color=buff	i
 43	gill-color=chocolate	i
 44	gill-color=gray	i
 45	gill-color=green	i
 46	gill-color=orange	i
 47	gill-color=pink	i
 48	gill-color=purple	i
 49	gill-color=red	i
 50	gill-color=white	i
 51	gill-color=yellow	i
 52	stalk-shape=enlarging	i
 53	stalk-shape=tapering	i
 54	stalk-root=bulbous	i
 55	stalk-root=club	i
 56	stalk-root=cup	i
 57	stalk-root=equal	i
 58	stalk-root=rhizomorphs	i
 59	stalk-root=rooted	i
 60	stalk-root=missing	i
 61	stalk-surface-above-ring=fibrous	i
 62	stalk-surface-above-ring=scaly	i
 63	stalk-surface-above-ring=silky	i
 64	stalk-surface-above-ring=smooth	i
 65	stalk-surface-below-ring=fibrous	i
 66	stalk-surface-below-ring=scaly	i
 67	stalk-surface-below-ring=silky	i
 68	stalk-surface-below-ring=smooth	i
 69	stalk-color-above-ring=brown	i
 70	stalk-color-above-ring=buff	i
 71	stalk-color-above-ring=cinnamon	i
 72	stalk-color-above-ring=gray	i
 73	stalk-color-above-ring=orange	i
 74	stalk-color-above-ring=pink	i
 75	stalk-color-above-ring=red	i
 76	stalk-color-above-ring=white	i
 77	stalk-color-above-ring=yellow	i
 78	stalk-color-below-ring=brown	i
 79	stalk-color-below-ring=buff	i
 80	stalk-color-below-ring=cinnamon	i
 81	stalk-color-below-ring=gray	i
 82	stalk-color-below-ring=orange	i
 83	stalk-color-below-ring=pink	i
 84	stalk-color-below-ring=red	i
 85	stalk-color-below-ring=white	i
 86	stalk-color-below-ring=yellow	i
 87	veil-type=partial	i
 88	veil-type=universal	i
 89	veil-color=brown	i
 90	veil-color=orange	i
 91	veil-color=white	i
 92	veil-color=yellow	i
 93	ring-number=none	i
 94	ring-number=one	i
 95	ring-number=two	i
 96	ring-type=cobwebby	i
 97	ring-type=evanescent	i
 98	ring-type=flaring	i
 99	ring-type=large	i
 100	ring-type=none	i
 101	ring-type=pendant	i
 102	ring-type=sheathing	i
 103	ring-type=zone	i
 104	spore-print-color=black	i
 105	spore-print-color=brown	i
 106	spore-print-color=buff	i
 107	spore-print-color=chocolate	i
 108	spore-print-color=green	i
 109	spore-print-color=orange	i
 110	spore-print-color=purple	i
 111	spore-print-color=white	i
 112	spore-print-color=yellow	i
 113	population=abundant	i
 114	population=clustered	i
 115	population=numerous	i
 116	population=scattered	i
 117	population=several	i
 118	population=solitary	i
 119	habitat=grasses	i
 120	habitat=leaves	i
 121	habitat=meadows	i
 122	habitat=paths	i
 123	habitat=urban	i
 124	habitat=waste	i
 125	habitat=woods	i
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -1,10 +1,13 @@
 # module for xgboost
 import ctypes 
 import os
 # optinally have scipy sparse, though not necessary
-import numpy as np
+import numpy
 import numpy.ctypeslib 
 import scipy.sparse as scp
 # set this line correctly
-XGBOOST_PATH = './libxgboostpy.so'
+XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostpy.so'
 # entry type of sparse matrix
 class REntry(ctypes.Structure):
@@ -34,9 +37,9 @@ class DMatrix:
        else:
            try:
                csr = scp.csr_matrix(data)
-                self.__init_from_csr(data)
+                self.__init_from_csr(csr)
            except:
-                raise "DMatrix", "can not intialize DMatrix from"+type(data)                
+                raise Exception, "can not intialize DMatrix from"+str(type(data))
        if label != None:
            self.set_label(label)
@@ -69,8 +72,8 @@ class DMatrix:
    # get label from dmatrix
    def get_label(self):
        length = ctypes.c_ulong()
-        labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length));
+        labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length))
-        return [ labels[i] for i in xrange(length.value) ]
+        return numpy.array( [labels[i] for i in xrange(length.value)] )
    # clear everything
    def clear(self):
        xglib.XGDMatrixClear(self.handle)
@@ -93,12 +96,36 @@ class Booster:
            assert isinstance(d,DMatrix)
        dmats = ( ctypes.c_void_p  * len(cache) )(*[ ctypes.c_void_p(d.handle) for d in cache])
        self.handle = xglib.XGBoosterCreate( dmats, len(cache) )
        self.set_param( params )
    def __del__(self):
        xglib.XGBoosterFree(self.handle) 
    def set_param(self, params,pv=None):
        if isinstance(params,dict):
            for k, v in params.iteritems():
                xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(k), ctypes.c_char_p(str(v)) )        
        elif isinstance(params,str) and pv != None:
            xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(params), ctypes.c_char_p(str(pv)) )
        else:
            for k, v in params:
                xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(k), ctypes.c_char_p(str(v)) )             
    def update(self, dtrain):
        """ update """
        assert isinstance(dtrain, DMatrix)
        xglib.XGBoosterUpdateOneIter( self.handle, dtrain.handle )
    def boost(self, dtrain, grad, hess, bst_group = -1):
        """ update """
        assert len(grad) == len(hess)
        assert isinstance(dtrain, DMatrix)
        xglib.XGBoosterBoostOneIter( self.handle, dtrain.handle,
                                     (ctypes.c_float*len(grad))(*grad),
                                     (ctypes.c_float*len(hess))(*hess),
                                     len(grad), bst_group )
    def update_interact(self, dtrain, action, booster_index=None):
        """ beta: update with specified action"""
        assert isinstance(dtrain, DMatrix)
        if booster_index != None:
            self.set_param('interact:booster_index', str(booster_index))
        xglib.XGBoosterUpdateInteract( self.handle, dtrain.handle, ctypes.c_char_p(str(action)) )
    def eval_set(self, evals, it = 0):
        for d in evals:
            assert isinstance(d[0], DMatrix)
@@ -108,10 +135,10 @@ class Booster:
        xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) )
    def eval(self, mat, name = 'eval', it = 0 ):
        self.eval_set( [(mat,name)], it)
-    def predict(self, data):
+    def predict(self, data, bst_group = -1):
        length = ctypes.c_ulong()
-        preds = xglib.XGBoosterPredict( self.handle, data.handle, ctypes.byref(length))
+        preds = xglib.XGBoosterPredict( self.handle, data.handle, ctypes.byref(length), bst_group)
-        return [ preds[i] for i in xrange(length.value) ]        
+        return numpy.array( [ preds[i] for i in xrange(length.value)])
    def save_model(self, fname):
        """ save model to file """
        xglib.XGBoosterSaveModel( self.handle, ctypes.c_char_p(fname) )
@@ -122,12 +149,21 @@ class Booster:
        """dump model into text file"""
        xglib.XGBoosterDumpModel( self.handle, ctypes.c_char_p(fname), ctypes.c_char_p(fmap) )
-def train(params, dtrain, num_boost_round = 10, evals = []):
+def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
    """ train a booster with given paramaters """
    bst = Booster(params, [dtrain] )
    if obj == None:
        for i in xrange(num_boost_round):
            bst.update( dtrain )
            if len(evals) != 0:
                bst.eval_set( evals, i )
    else:
        # try customized objective function
        for i in xrange(num_boost_round):
            pred = bst.predict( dtrain )
            grad, hess = obj( pred, dtrain )
            bst.boost( dtrain, grad, hess )
            if len(evals) != 0:
                bst.eval_set( evals, i )        
    return bst
--- a/python/xgboost_python.cpp
+++ b/python/xgboost_python.cpp
@@ -32,6 +32,7 @@ namespace xgboost{
                mat.row_data_.resize( mat.row_ptr_.back() + len );
                memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len );
                mat.row_ptr_.push_back( mat.row_ptr_.back() + len );
                init_col_ = false;
            }
            inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{
                const xgboost::booster::FMatrixS &mat = this->data;
@@ -72,7 +73,7 @@ namespace xgboost{
                return &(this->info.labels[0]);
            }
            inline void CheckInit(void){
-                if(!this->data.HaveColAccess()){
+                if(!init_col_){
                    this->data.InitData();
                }
                utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
@@ -101,11 +102,34 @@ namespace xgboost{
                xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
                this->init_model = true;
            }
-            const float *Pred( const DMatrix &dmat, size_t *len ){
+            const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){
-                this->Predict( this->preds_, dmat );
+                this->CheckInit();
                this->Predict( this->preds_, dmat, bst_group );
                *len = this->preds_.size();
                return &this->preds_[0];
            }
            inline void BoostOneIter( const DMatrix &train, 
                                      float *grad, float *hess, size_t len, int bst_group ){
                this->grad_.resize( len ); this->hess_.resize( len );
                memcpy( &this->grad_[0], grad, sizeof(float)*len );
                memcpy( &this->hess_[0], hess, sizeof(float)*len );
                if( grad_.size() == train.Size() ){
                    if( bst_group < 0 ) bst_group = 0;
                    base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index, bst_group);
                }else{
                    utils::Assert( bst_group == -1, "must set bst_group to -1 to support all group boosting" );
                    int ngroup = base_gbm.NumBoosterGroup();
                    utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" );
                    std::vector<float> tgrad( train.Size() ), thess( train.Size() );
                    for( int g = 0; g < ngroup; ++ g ){
                        memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() );
                        memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() );
                        base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g );
                    }
                }                
            }
        };
    };
 };
@@ -163,10 +187,15 @@ extern "C"{
    void *XGBoosterCreate( void *dmats[], size_t len ){
        std::vector<const xgboost::regrank::DMatrix*> mats;
        for( size_t i = 0; i < len; ++i ){
-            mats.push_back( static_cast<DMatrix*>(dmats[i]) );
+            DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
            dtr->CheckInit();
            mats.push_back( dtr );
        }
        return new Booster( mats );
    }
    void XGBoosterFree( void *handle ){
        delete  static_cast<Booster*>(handle);
    }
    void XGBoosterSetParam( void *handle, const char *name, const char *value ){
        static_cast<Booster*>(handle)->SetParam( name, value );
    }
@@ -176,6 +205,13 @@ extern "C"{
        bst->CheckInit(); dtr->CheckInit(); 
        bst->UpdateOneIter( *dtr );
    }    
    void XGBoosterBoostOneIter( void *handle, void *dtrain, 
                                float *grad, float *hess, size_t len, int bst_group ){
        Booster *bst = static_cast<Booster*>(handle);
        DMatrix *dtr = static_cast<DMatrix*>(dtrain);
        bst->CheckInit(); dtr->CheckInit(); 
        bst->BoostOneIter( *dtr, grad, hess, len, bst_group );
    }      
    void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len ){
        Booster *bst = static_cast<Booster*>(handle);
        bst->CheckInit();
@@ -188,8 +224,8 @@ extern "C"{
        }
        bst->EvalOneIter( iter, mats, names, stdout );
    }
-    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len ){
+    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group ){
-        return static_cast<Booster*>(handle)->Pred( *static_cast<DMatrix*>(dmat), len );
+        return static_cast<Booster*>(handle)->Pred( *static_cast<DMatrix*>(dmat), len, bst_group );
    }
    void XGBoosterLoadModel( void *handle, const char *fname ){        
        static_cast<Booster*>(handle)->LoadModel( fname );        
@@ -207,5 +243,13 @@ extern "C"{
        static_cast<Booster*>(handle)->DumpModel( fo, featmap, false );
        fclose( fo );
    }
    void XGBoosterUpdateInteract( void *handle, void *dtrain, const char *action ){
        Booster *bst = static_cast<Booster*>(handle);
        DMatrix *dtr = static_cast<DMatrix*>(dtrain);        
        bst->CheckInit(); dtr->CheckInit(); 
        std::string act( action );
        bst->UpdateInteract( act, *dtr );
    }
 };
--- a/python/xgboost_python.h
+++ b/python/xgboost_python.h
@@ -109,6 +109,11 @@ extern "C"{
     * \param create a booster
     */
    void *XGBoosterCreate( void* dmats[], size_t len ); 
    /*! 
     * \brief free obj in handle 
     * \param handle handle to be freed
     */
    void XGBoosterFree( void* handle ); 
    /*! 
     * \brief set parameters 
     * \param handle handle
@@ -122,6 +127,19 @@ extern "C"{
     * \param dtrain training data
     */        
    void XGBoosterUpdateOneIter( void *handle, void *dtrain );   
    /*!
     * \brief update the model, by directly specify gradient and second order gradient, 
     *        this can be used to replace UpdateOneIter, to support customized loss function
     * \param handle handle
     * \param dtrain training data
     * \param grad gradient statistics
     * \param hess second order gradient statistics
     * \param len length of grad/hess array
     * \param bst_group boost group we are working at, default = -1
     */
    void XGBoosterBoostOneIter( void *handle, void *dtrain, 
                                float *grad, float *hess, size_t len, int bst_group );   
    /*! 
     * \brief print evaluation statistics to stdout for xgboost
     * \param handle handle
@@ -136,8 +154,9 @@ extern "C"{
     * \param handle handle
     * \param dmat data matrix
     * \param len used to store length of returning result
     * \param bst_group booster group, if model contains multiple booster group, default = -1 means predict for all groups 
     */    
-    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len );
+    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group );
    /*! 
     * \brief load model from existing file
     * \param handle handle
@@ -157,6 +176,13 @@ extern "C"{
     * \param fmap  name to fmap can be empty string
     */    
    void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap );
    /*! 
     * \brief interactively update model: beta
     * \param handle handle
     * \param dtrain training data
     * \param action action name
     */        
    void XGBoosterUpdateInteract( void *handle, void *dtrain, const char* action );   
 };
 #endif
--- a/regrank/xgboost_regrank.h
+++ b/regrank/xgboost_regrank.h
@@ -86,6 +86,7 @@ namespace xgboost{
                if (!strcmp(name, "silent"))  silent = atoi(val);
                if (!strcmp(name, "eval_metric"))  evaluator_.AddEval(val);
                if (!strcmp(name, "objective") )   name_obj_ = val;
                if (!strcmp(name, "num_class") )   base_gbm.SetParam("num_booster_group", val );
                mparam.SetParam(name, val);
                base_gbm.SetParam(name, val);
                cfg_.push_back( std::make_pair( std::string(name), std::string(val) ) );
@@ -95,6 +96,12 @@ namespace xgboost{
            * this function is reserved for solver to allocate necessary space and do other preparation
            */
            inline void InitTrainer(void){
                if( mparam.num_class != 0 ){
                    if( name_obj_ != "softmax" ){
                        name_obj_ = "softmax";
                        printf("auto select objective=softmax to support multi-class classification\n" );
                    }
                }
                base_gbm.InitTrainer();                
                obj_ = CreateObjFunction( name_obj_.c_str() );
                for( size_t i = 0; i < cfg_.size(); ++ i ){
@@ -166,9 +173,18 @@ namespace xgboost{
            inline void UpdateOneIter(const DMatrix &train){
                this->PredictRaw(preds_, train);
                obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_);
-                // do boost
+                if( grad_.size() == train.Size() ){
-                std::vector<unsigned> root_index;
+                    base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index);
-                base_gbm.DoBoost(grad_, hess_, train.data, root_index);
+                }else{
                    int ngroup = base_gbm.NumBoosterGroup();
                    utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" );
                    std::vector<float> tgrad( train.Size() ), thess( train.Size() );
                    for( int g = 0; g < ngroup; ++ g ){
                        memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() );
                        memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() );
                        base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g );
                    }
                }
            }
            /*!
             * \brief evaluate the model for specific iteration
@@ -190,9 +206,14 @@ namespace xgboost{
                fprintf(fo, "\n");
                fflush(fo);
            }
-            /*! \brief get prediction, without buffering */
+            /*! 
-            inline void Predict(std::vector<float> &preds, const DMatrix &data){
+             * \brief get prediction
-                this->PredictRaw(preds,data);
+             * \param storage to store prediction
             * \param data input data
             * \param bst_group booster group we are in
             */
            inline void Predict(std::vector<float> &preds, const DMatrix &data, int bst_group = -1){
                this->PredictRaw( preds, data, bst_group );
                obj_->PredTransform( preds );
            }            
        public:
@@ -241,24 +262,32 @@ namespace xgboost{
                    base_gbm.InteractRePredict(data.data, j, buffer_offset + j);
                }
            }
        private:
            /*! \brief get un-transformed prediction*/
-            inline void PredictRaw(std::vector<float> &preds, const DMatrix &data){
+            inline void PredictRaw(std::vector<float> &preds, const DMatrix &data, int bst_group = -1 ){
-                this->PredictBuffer(preds, data, this->FindBufferOffset(data) );
+                int buffer_offset =  this->FindBufferOffset(data);
                if( bst_group < 0 ){
                    int ngroup = base_gbm.NumBoosterGroup();
                    preds.resize( data.Size() * ngroup );
                    for( int g = 0; g < ngroup; ++ g ){ 
                        this->PredictBuffer(&preds[ data.Size() * g ], data, buffer_offset, g );
                    }
                }else{
                    preds.resize( data.Size() );
                    this->PredictBuffer(&preds[0], data, buffer_offset, bst_group );
                }
            }
            /*! \brief get the un-transformed predictions, given data */
-            inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, int buffer_offset){
+            inline void PredictBuffer(float *preds, const DMatrix &data, int buffer_offset, int bst_group ){
                preds.resize(data.Size());
                const unsigned ndata = static_cast<unsigned>(data.Size());
                if( buffer_offset >= 0 ){  
                    #pragma omp parallel for schedule( static )
                    for (unsigned j = 0; j < ndata; ++j){
-                        preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j);
+                        preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j, data.info.GetRoot(j), bst_group );
                    }
                }else
                    #pragma omp parallel for schedule( static )
                    for (unsigned j = 0; j < ndata; ++j){
-                        preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, -1);
+                        preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, -1, data.info.GetRoot(j), bst_group );
                    }{
                }
            }
@@ -271,13 +300,16 @@ namespace xgboost{
                int loss_type;
                /* \brief number of features  */
                int num_feature;  
                /* \brief number of class, if it is multi-class classification  */
                int num_class; 
                /*! \brief reserved field */
-                int reserved[16];
+                int reserved[15];
                /*! \brief constructor */
                ModelParam(void){
                    base_score = 0.5f;
                    loss_type = 0;
                    num_feature = 0;
                    num_class = 0;
                    memset(reserved, 0, sizeof(reserved));
                }
                /*!
@@ -288,6 +320,7 @@ namespace xgboost{
                inline void SetParam(const char *name, const char *val){
                    if (!strcmp("base_score", name))  base_score = (float)atof(val);
                    if (!strcmp("loss_type", name))   loss_type = atoi(val);
                    if (!strcmp("num_class", name))   num_class = atoi(val);
                    if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
                }
                /*!
--- a/regrank/xgboost_regrank_data.h
+++ b/regrank/xgboost_regrank_data.h
@@ -35,11 +35,17 @@ namespace xgboost{
                std::vector<unsigned> group_ptr;
                /*! \brief weights of each instance, optional */            
                std::vector<float> weights;
                /*! \brief specified root index of each instance, can be used for multi task setting*/
                std::vector<unsigned> root_index;
                /*! \brief get weight of each instances */
                inline float GetWeight( size_t i ) const{
                    if( weights.size() != 0 ) return weights[i];
                    else return 1.0f;
                }
                inline float GetRoot( size_t i ) const{
                    if( root_index.size() != 0 ) return root_index[i];
                    else return 0;
                }
            };
        public:
            /*! \brief feature data content */
@@ -112,7 +118,10 @@ namespace xgboost{
                    unsigned ngptr;
                    if( fs.Read(&ngptr, sizeof(unsigned) ) != 0 ){
                        info.group_ptr.resize( ngptr );
                        if( ngptr != 0 ){
                            utils::Assert( fs.Read(&info.group_ptr[0], sizeof(unsigned) * ngptr) != 0, "Load group file");
                            utils::Assert( info.group_ptr.back() == data.NumRow(), "number of group must match number of record" );
                        }
                    }
                }
                fs.Close();
@@ -121,7 +130,7 @@ namespace xgboost{
                    printf("%ux%u matrix with %lu entries is loaded from %s\n",
                           (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
                    if( info.group_ptr.size() != 0 ){
-                        printf("data contains %u groups\n", (unsigned)info.group_ptr.size() );
+                        printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1 );
                    }
                }
                this->TryLoadWeight(fname, silent);
@@ -143,14 +152,16 @@ namespace xgboost{
                {// write out group ptr
                    unsigned ngptr = static_cast<unsigned>( info.group_ptr.size() );
                    fs.Write(&ngptr, sizeof(unsigned) );
                    if( ngptr != 0 ){
                        fs.Write(&info.group_ptr[0], sizeof(unsigned) * ngptr);                    
                    }
                }
                fs.Close();
                if (!silent){
                    printf("%ux%u matrix with %lu entries is saved to %s\n",
                       (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
                    if( info.group_ptr.size() != 0 ){
-                        printf("data contains %u groups\n", (unsigned)info.group_ptr.size() );
+                        printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1 );
                    }
                }
            }
--- a/regrank/xgboost_regrank_eval.h
+++ b/regrank/xgboost_regrank_eval.h
@@ -13,6 +13,7 @@
 #include "../utils/xgboost_omp.h"
 #include "../utils/xgboost_random.h"
 #include "xgboost_regrank_data.h"
 #include "xgboost_regrank_utils.h"
 namespace xgboost{
    namespace regrank{
@@ -31,17 +32,11 @@ namespace xgboost{
            virtual ~IEvaluator(void){}
        };
        inline static bool CmpFirst(const std::pair<float, unsigned> &a, const std::pair<float, unsigned> &b){
            return a.first > b.first;
        }
        inline static bool CmpSecond(const std::pair<float, unsigned> &a, const std::pair<float, unsigned> &b){
            return a.second > b.second;
        }
        /*! \brief RMSE */
        struct EvalRMSE : public IEvaluator{
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                const unsigned ndata = static_cast<unsigned>(preds.size());
                float sum = 0.0, wsum = 0.0;
                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
@@ -62,6 +57,7 @@ namespace xgboost{
        struct EvalLogLoss : public IEvaluator{
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                const unsigned ndata = static_cast<unsigned>(preds.size());
                float sum = 0.0f, wsum = 0.0f;
                #pragma omp parallel for reduction(+:sum,wsum) schedule( static )
@@ -107,6 +103,7 @@ namespace xgboost{
        struct EvalAuc : public IEvaluator{
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
                const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
                utils::Assert(gptr.back() == preds.size(), "EvalAuc: group structure must match number of prediction");
@@ -159,8 +156,10 @@ namespace xgboost{
        public:
            virtual float Eval(const std::vector<float> &preds,
                               const DMatrix::Info &info) const {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                const std::vector<unsigned> &gptr = info.group_ptr;
-                utils::Assert(gptr.size() != 0 && gptr.back() == preds.size(), "EvalAuc: group structure must match number of prediction");
+                utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
                utils::Assert( gptr.back() == preds.size(), "EvalRanklist: group structure must match number of prediction");
                const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
                double sum_metric = 0.0f;
--- a/regrank/xgboost_regrank_obj.h
+++ b/regrank/xgboost_regrank_obj.h
@@ -106,8 +106,9 @@ namespace xgboost{
    namespace regrank{        
        IObjFunction* CreateObjFunction( const char *name ){
            if( !strcmp("reg", name ) ) return new RegressionObj();
-            if( !strcmp("rank", name ) ) return new PairwiseRankObj();
+            if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
-            if( !strcmp("softmax", name ) ) return new SoftmaxObj();
+            if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
            if( !strcmp("softmax", name ) ) return new SoftmaxMultiClassObj();
            utils::Error("unknown objective function type");
            return NULL;
        }
--- a/regrank/xgboost_regrank_obj.hpp
+++ b/regrank/xgboost_regrank_obj.hpp
@@ -1,7 +1,7 @@
 #ifndef XGBOOST_REGRANK_OBJ_HPP
 #define XGBOOST_REGRANK_OBJ_HPP
 /*!
- * \file xgboost_regrank_obj.h
+ * \file xgboost_regrank_obj.hpp
 * \brief implementation of objective functions
 * \author Tianqi Chen, Kailong Chen
 */
@@ -9,6 +9,8 @@
 #include <vector>
 #include <functional>
 #include "xgboost_regrank_sample.h"
 #include "xgboost_regrank_utils.h"
 namespace xgboost{
    namespace regrank{        
        class RegressionObj : public IObjFunction{
@@ -25,6 +27,7 @@ namespace xgboost{
                                     int iter,
                                     std::vector<float> &grad, 
                                     std::vector<float> &hess ) {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                grad.resize(preds.size()); hess.resize(preds.size());
                const unsigned ndata = static_cast<unsigned>(preds.size());
@@ -53,11 +56,11 @@ namespace xgboost{
    namespace regrank{
        // simple softmax rak
-        class SoftmaxObj : public IObjFunction{
+        class SoftmaxRankObj : public IObjFunction{
        public:
-            SoftmaxObj(void){
+            SoftmaxRankObj(void){
            }
-            virtual ~SoftmaxObj(){}
+            virtual ~SoftmaxRankObj(){}
            virtual void SetParam(const char *name, const char *val){
            }
            virtual void GetGradient(const std::vector<float>& preds,  
@@ -65,6 +68,7 @@ namespace xgboost{
                                     int iter,
                                     std::vector<float> &grad, 
                                     std::vector<float> &hess ) {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );
                grad.resize(preds.size()); hess.resize(preds.size());
                const std::vector<unsigned> &gptr = info.group_ptr;
                utils::Assert( gptr.size() != 0 && gptr.back() == preds.size(), "rank loss must have group file" );
@@ -98,21 +102,74 @@ namespace xgboost{
            virtual const char* DefaultEvalMetric(void) {
                return "pre@1";
            }
        };
        // simple softmax multi-class classification
        class SoftmaxMultiClassObj : public IObjFunction{
        public:
            SoftmaxMultiClassObj(void){
                nclass = 0;
            }
            virtual ~SoftmaxMultiClassObj(){}
            virtual void SetParam(const char *name, const char *val){
                if( !strcmp( "num_class", name ) ) nclass = atoi(val); 
            }
            virtual void GetGradient(const std::vector<float>& preds,  
                                     const DMatrix::Info &info,
                                     int iter,
                                     std::vector<float> &grad, 
                                     std::vector<float> &hess ) {
                utils::Assert( nclass != 0, "must set num_class to use softmax" );
                utils::Assert( preds.size() == (size_t)nclass * info.labels.size(), "SoftmaxMultiClassObj: label size and pred size does not match" );
                grad.resize(preds.size()); hess.resize(preds.size());
                const unsigned ndata = static_cast<unsigned>(info.labels.size());
                #pragma omp parallel
                {
                    std::vector<float> rec(nclass);
                    #pragma for schedule(static)
                    for (unsigned j = 0; j < ndata; ++j){
                        for( int k = 0; k < nclass; ++ k ){
                            rec[k] = preds[j + k * ndata];
                        }
                        Softmax( rec );
                        int label = static_cast<int>(info.labels[j]);
                        utils::Assert( label < nclass, "SoftmaxMultiClassObj: label exceed num_class" );
                        for( int k = 0; k < nclass; ++ k ){
                            float p = rec[ k ];
                            if( label == k ){
                                grad[j+k*ndata] = p - 1.0f;
                            }else{
                                grad[j+k*ndata] = p;
                            }
                            hess[j+k*ndata] = 2.0f * p * ( 1.0f - p );
                        }  
                    }
                }
            }
            virtual void PredTransform(std::vector<float> &preds){
                utils::Assert( nclass != 0, "must set num_class to use softmax" );
                utils::Assert( preds.size() % nclass == 0, "SoftmaxMultiClassObj: label size and pred size does not match" );                
                const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
                #pragma omp parallel
                {
                    std::vector<float> rec(nclass);
                    #pragma for schedule(static)
                    for (unsigned j = 0; j < ndata; ++j){
                        for( int k = 0; k < nclass; ++ k ){
                            rec[k] = preds[j + k * ndata];
                        }
                        Softmax( rec );
                        preds[j] = FindMaxIndex( rec );
                    }
                }
                preds.resize( ndata );
            }
            virtual const char* DefaultEvalMetric(void) {
                return "error";
            }
        private:
-            inline static void Softmax( std::vector<float>& rec ){
+            int nclass;
                float wmax = rec[0];
                for( size_t i = 1; i < rec.size(); ++ i ){
                    wmax = std::max( rec[i], wmax );
                }
                double wsum = 0.0f;
                for( size_t i = 0; i < rec.size(); ++ i ){
                    rec[i] = expf(rec[i]-wmax);
                    wsum += rec[i];
                }
                for( size_t i = 0; i < rec.size(); ++ i ){
                    rec[i] /= wsum;
                }                
            }
        };
    };
@@ -134,6 +191,7 @@ namespace xgboost{
                                     int iter,
                                     std::vector<float> &grad, 
                                     std::vector<float> &hess ) {
                utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" );              
                grad.resize(preds.size()); hess.resize(preds.size());
                const std::vector<unsigned> &gptr = info.group_ptr;
                utils::Assert( gptr.size() != 0 && gptr.back() == preds.size(), "rank loss must have group file" );
--- a/regrank/xgboost_regrank_utils.h
+++ b/regrank/xgboost_regrank_utils.h
@@ -0,0 +1,43 @@
 #ifndef XGBOOST_REGRANK_UTILS_H
 #define XGBOOST_REGRANK_UTILS_H
 /*!
 * \file xgboost_regrank_utils.h
 * \brief useful helper functions
 * \author Tianqi Chen, Kailong Chen
 */
 namespace xgboost{
    namespace regrank{
        // simple helper function to do softmax
        inline static void Softmax( std::vector<float>& rec ){
            float wmax = rec[0];
            for( size_t i = 1; i < rec.size(); ++ i ){
                wmax = std::max( rec[i], wmax );
            }
            double wsum = 0.0f;
            for( size_t i = 0; i < rec.size(); ++ i ){
                rec[i] = expf(rec[i]-wmax);
                    wsum += rec[i];
            }
            for( size_t i = 0; i < rec.size(); ++ i ){
                rec[i] /= wsum;
            }                
        }        
        // simple helper function to do softmax
        inline static int FindMaxIndex( std::vector<float>& rec ){
            size_t mxid = 0;
            for( size_t i = 1; i < rec.size(); ++ i ){
                if( rec[i] > rec[mxid] ) mxid = i;
            }
            return (int)mxid;
        }        
        inline static bool CmpFirst(const std::pair<float, unsigned> &a, const std::pair<float, unsigned> &b){
            return a.first > b.first;
        }
        inline static bool CmpSecond(const std::pair<float, unsigned> &a, const std::pair<float, unsigned> &b){
            return a.second > b.second;
        }
    };
 };
 #endif
		`@@ -0,0 +1,3 @@`
							`example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format`

							`for usage: see demo.py and comments in demo.py`