From 5177fa02e4ee08ac71a7fd84ace812819d94554f Mon Sep 17 00:00:00 2001
From: antinucleon <antinucleon@gmail.com>
Date: Tue, 2 Sep 2014 15:22:08 -0600
Subject: [PATCH 01/22] adjust weight

---
 wrapper/xgboost.py | 315 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 284 insertions(+), 31 deletions(-)

diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index a6999a39f..34c4bfde7 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -3,10 +3,11 @@
 import ctypes
 import os
 # optinally have scipy sparse, though not necessary
-import numpy
+import numpy as np
 import sys
 import numpy.ctypeslib
 import scipy.sparse as scp
+import random
 
 # set this line correctly
 if os.name == 'nt':
@@ -32,18 +33,30 @@ xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
 
 
 def ctypes2numpy(cptr, length, dtype):
-    # convert a ctypes pointer array to numpy
+    """convert a ctypes pointer array to numpy array """
     assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
     res = numpy.zeros(length, dtype=dtype)
     assert ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0])
     return res
 
-# data matrix used in xgboost
 class DMatrix:
+    """data matrix used in xgboost"""
     # constructor
     def __init__(self, data, label=None, missing=0.0, weight = None):
+        """ constructor of DMatrix
+
+            Args:
+                data: string/numpy array/scipy.sparse
+                      data source, string type is the path of svmlight format txt file or xgb buffer
+                label: list or numpy 1d array, optional
+                       label of training data
+                missing: float
+                         value in data which need to be present as missing value
+                weight: list or numpy 1d array, optional
+                        weight for each instances
+        """
         # force into void_p, mac need to pass things in as void_p
-        if data == None:
+        if data is None:
             self.handle = None
             return
         if isinstance(data, str):
@@ -63,22 +76,25 @@ class DMatrix:
             self.set_label(label)
         if weight !=None:
             self.set_weight(weight)
-    # convert data from csr matrix
+
     def __init_from_csr(self, csr):
+        """convert data from csr matrix"""
         assert len(csr.indices) == len(csr.data)
         self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSR(
             (ctypes.c_ulong  * len(csr.indptr))(*csr.indptr),
             (ctypes.c_uint  * len(csr.indices))(*csr.indices),
             (ctypes.c_float * len(csr.data))(*csr.data),
             len(csr.indptr), len(csr.data)))
-    # convert data from numpy matrix
+
     def __init_from_npy2d(self,mat,missing):
+        """convert data from numpy matrix"""
         data = numpy.array(mat.reshape(mat.size), dtype='float32')
         self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromMat(
             data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
             mat.shape[0], mat.shape[1], ctypes.c_float(missing)))
-    # destructor
+
     def __del__(self):
+        """destructor"""
         xglib.XGDMatrixFree(self.handle)
     def get_float_info(self, field):
         length = ctypes.c_ulong()
@@ -96,16 +112,39 @@ class DMatrix:
     def set_uint_info(self, field, data):
         xglib.XGDMatrixSetUIntInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
                                    (ctypes.c_uint*len(data))(*data), len(data))
-    # load data from file
+
     def save_binary(self, fname, silent=True):
+        """save DMatrix to XGBoost buffer
+            Args:
+                fname: string
+                       name of buffer file
+                slient: bool, option
+                       whether print info
+           Returns:
+                None
+        """
         xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
-    # set label of dmatrix
+
     def set_label(self, label):
+        """set label of dmatrix
+            Args:
+                label: list
+                       label for DMatrix
+            Returns:
+                None
+        """
         self.set_float_info('label', label)
-    # set weight of each instances
+
     def set_weight(self, weight):
+        """set weight of each instances
+            Args:
+                weight: float
+                        weight for positive instance
+            Returns:
+                None
+        """
         self.set_float_info('weight', weight)
-    # set initialized margin prediction
+
     def set_base_margin(self, margin):
         """
         set base margin of booster to start from
@@ -116,31 +155,149 @@ class DMatrix:
         see also example/demo.py
         """
         self.set_float_info('base_margin', margin)
-    # set group size of dmatrix, used for rank
+
     def set_group(self, group):
+        """set group size of dmatrix, used for rank
+            Args:
+                group:
+
+            Returns:
+                None
+        """
         xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
-    # get label from dmatrix
+
     def get_label(self):
+        """get label from dmatrix
+            Args:
+                None
+            Returns:
+                list, label of data
+        """
         return self.get_float_info('label')
-    # get weight from dmatrix
+
     def get_weight(self):
+        """get weight from dmatrix
+            Args:
+                None
+            Returns:
+                float, weight
+        """
         return self.get_float_info('weight')
-    # get base_margin from dmatrix
     def get_base_margin(self):
+        """get base_margin from dmatrix
+            Args:
+                None
+            Returns:
+                float, base margin
+        """
         return self.get_float_info('base_margin')
     def num_row(self):
+        """get number of rows
+            Args:
+                None
+            Returns:
+                int, num rows
+        """
         return xglib.XGDMatrixNumRow(self.handle)
-    # slice the DMatrix to return a new DMatrix that only contains rindex
     def slice(self, rindex):
+        """slice the DMatrix to return a new DMatrix that only contains rindex
+            Args:
+                rindex: list
+                        list of index to be chosen
+            Returns:
+                res: DMatrix
+                     new DMatrix with chosen index
+        """
         res = DMatrix(None)
         res.handle = ctypes.c_void_p(xglib.XGDMatrixSliceDMatrix(
             self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex)))
         return res
 
+class CVPack:
+    def __init__(self, dtrain, dtest, param):
+        self.dtrain = dtrain
+        self.dtest = dtest
+        self.watchlist = watchlist = [ (dtrain,'train'), (dtest, 'test') ]
+        self.bst = Booster(param, [dtrain,dtest])
+    def update(self,r):
+        self.bst.update(self.dtrain, r)
+    def eval(self,r):
+        return self.bst.eval_set(self.watchlist, r)
+
+def mknfold(dall, nfold, param, seed, weightscale=None, evals=[], set_pos_weight=None):
+    """
+    mk nfold list of cvpack from randidx
+    """
+    randidx = range(dall.num_row())
+    random.seed(seed)
+    random.shuffle(randidx)
+
+    idxset = []
+    kstep = len(randidx) / nfold
+    for i in range(nfold):
+        idxset.append(randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ])
+
+    ret = []
+    for k in range(nfold):
+        trainlst = []
+        for j in range(nfold):
+            if j == k:
+                testlst = idxset[j]
+            else:
+                trainlst += idxset[j]
+        dtrain = dall.slice(trainlst)
+        dtest = dall.slice(testlst)
+        # rescale weight of dtrain and dtest
+        if weightscale != None:
+            dtrain.set_weight( dtrain.get_weight() * weightscale * dall.num_row() / dtrain.num_row() )
+            dtest.set_weight( dtest.get_weight() * weightscale * dall.num_row() / dtest.num_row() )
+        if set_pos_weight != None:
+            label = dtrain.get_label()
+            weight = dtrain.get_weight()
+            sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
+            sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )
+            param['scale_pos_weight'] = sum_wneg/sum_wpos
+        plst = param.items() + [('eval_metric', itm) for itm in evals]
+        ret.append(CVPack(dtrain, dtest, plst))
+    return ret
+
+def aggcv(rlist):
+    """
+    aggregate cross validation results
+    """
+    cvmap = {}
+    arr = rlist[0].split()
+    ret = arr[0]
+    for it in arr[1:]:
+        k, v  = it.split(':')
+        cvmap[k] = [float(v)]
+    for line in rlist[1:]:
+        arr = line.split()
+        assert ret == arr[0]
+        for it in arr[1:]:
+            k, v  = it.split(':')
+            cvmap[k].append(float(v))
+
+    for k, v in sorted(cvmap.items(), key = lambda x:x[0]):
+        v = np.array(v)
+        ret += '\t%s:%f+%f' % (k, np.mean(v), np.std(v))
+    return ret
+
+
 class Booster:
     """learner class """
     def __init__(self, params={}, cache=[], model_file = None):
-        """ constructor, param: """
+        """ constructor
+            Args:
+                params: dict
+                        params for boosters
+                cache: list
+                        list of cache item
+                model_file: string
+                        path of model file
+            Returns:
+                None
+        """
         for d in cache:
             assert isinstance(d, DMatrix)
         dmats = (ctypes.c_void_p  * len(cache))(*[ d.handle for d in cache])
@@ -166,16 +323,30 @@ class Booster:
                 xglib.XGBoosterSetParam(
                     self.handle, ctypes.c_char_p(k.encode('utf-8')),
                     ctypes.c_char_p(str(v).encode('utf-8')))
+
     def update(self, dtrain, it):
         """
         update
-          dtrain: the training DMatrix
-          it: current iteration number
+            Args:
+                dtrain: DMatrix
+                        the training DMatrix
+                it: int
+                    current iteration number
+            Returns:
+                None
         """
         assert isinstance(dtrain, DMatrix)
         xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
     def boost(self, dtrain, grad, hess):
-        """ update """
+        """ update
+            Args:
+                dtrain: DMatrix
+                        the training DMatrix
+                grad: list
+                        the first order of gradient
+                hess: list
+                        the second order of gradient
+        """
         assert len(grad) == len(hess)
         assert isinstance(dtrain, DMatrix)
         xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
@@ -183,6 +354,14 @@ class Booster:
                                     (ctypes.c_float*len(hess))(*hess),
                                     len(grad))
     def eval_set(self, evals, it = 0):
+        """evaluates by metric
+            Args:
+                evals: list of tuple (DMatrix, string)
+                       lists of items to be evaluated
+                it: int
+            Returns:
+                evals result
+        """
         for d in evals:
             assert isinstance(d[0], DMatrix)
             assert isinstance(d[1], str)
@@ -192,25 +371,49 @@ class Booster:
         return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
     def eval(self, mat, name = 'eval', it = 0):
         return self.eval_set( [(mat,name)], it)
-    def predict(self, data, output_margin=False, ntree_limit=0):
+    def predict(self, data, output_margin=False):
         """
         predict with data
-            data: the dmatrix storing the input
-            output_margin: whether output raw margin value that is untransformed
-            ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees
+            Args:
+                data: DMatrix
+                      the dmatrix storing the input
+                output_margin: bool
+                               whether output raw margin value that is untransformed
+            Returns:
+                numpy array of prediction
         """
         length = ctypes.c_ulong()
         preds = xglib.XGBoosterPredict(self.handle, data.handle,
-                                       int(output_margin), ntree_limit, ctypes.byref(length))
+                                       int(output_margin), ctypes.byref(length))
         return ctypes2numpy(preds, length.value, 'float32')
     def save_model(self, fname):
-        """ save model to file """
+        """ save model to file
+            Args:
+                fname: string
+                       file name of saving model
+            Returns:
+                None
+        """
         xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
     def load_model(self, fname):
-        """load model from file"""
+        """load model from file
+            Args:
+                fname: string
+                       file name of saving model
+            Returns:
+                None
+        """
         xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
     def dump_model(self, fo, fmap=''):
-        """dump model into text file"""
+        """dump model into text file
+            Args:
+                fo: string
+                    file name to be dumped
+                fmap: string, optional
+                      file name of feature map names
+            Returns:
+                None
+        """
         if isinstance(fo,str):
             fo = open(fo,'w')
             need_close = True
@@ -249,7 +452,17 @@ class Booster:
         return fmap
 
 def evaluate(bst, evals, it, feval = None):
-    """evaluation on eval set"""
+    """evaluation on eval set
+        Args:
+            bst: XGBoost object
+                 object of XGBoost model
+            evals: list of tuple (DMatrix, string)
+                obj need to be evaluated
+            it: int
+            feval: optional
+        Returns:
+            eval result
+    """
     if feval != None:
         res = '[%d]' % it
         for dm, evname in evals:
@@ -260,10 +473,24 @@ def evaluate(bst, evals, it, feval = None):
 
     return res
 
+
+
 def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
-    """ train a booster with given paramaters """
+    """ train a booster with given paramaters
+        Args:
+            params: dict
+                    params of booster
+            dtrain: DMatrix
+                    data to be trained
+            num_boost_round: int
+                             num of round to be boosted
+            evals: list
+                   list of items to be evaluated
+            obj:
+            feval:
+    """
     bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
-    if obj == None:
+    if obj is None:
         for i in range(num_boost_round):
             bst.update( dtrain, i )
             if len(evals) != 0:
@@ -277,3 +504,29 @@ def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None
             if len(evals) != 0:
                 sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
     return bst
+
+def cv(params, dtrain, num_boost_round = 10, nfold=3, evals = [], \
+        weightscale=None, obj=None, feval=None, set_pos_weight=None):
+    """ cross validation  with given paramaters
+        Args:
+            params: dict
+                    params of booster
+            dtrain: DMatrix
+                    data to be trained
+            num_boost_round: int
+                             num of round to be boosted
+            nfold: int
+                   folds to do cv
+            evals: list
+                   list of items to be evaluated
+            obj:
+            feval:
+            set_pos_weight: bool, optional
+                            Adjust pos weight by number
+    """
+    cvfolds = mknfold(dtrain, nfold, params, 0, weightscale, evals)
+    for i in range(num_boost_round):
+        for f in cvfolds:
+            f.update(i)
+        res = aggcv([f.eval(i) for f in cvfolds])
+        sys.stderr.write(res+'\n')

From e4817bb4c3f0b8d395e5343382e1cba5fe2ec577 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 2 Sep 2014 15:05:49 -0700
Subject: [PATCH 02/22] fix ntreelimit

---
 wrapper/xgboost.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index 34c4bfde7..a0a88af47 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -371,7 +371,7 @@ class Booster:
         return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
     def eval(self, mat, name = 'eval', it = 0):
         return self.eval_set( [(mat,name)], it)
-    def predict(self, data, output_margin=False):
+    def predict(self, data, output_margin=False, ntree_limit=0):
         """
         predict with data
             Args:
@@ -379,12 +379,14 @@ class Booster:
                       the dmatrix storing the input
                 output_margin: bool
                                whether output raw margin value that is untransformed
+                               
+                ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees
             Returns:
                 numpy array of prediction
         """
         length = ctypes.c_ulong()
         preds = xglib.XGBoosterPredict(self.handle, data.handle,
-                                       int(output_margin), ctypes.byref(length))
+                                       int(output_margin), ntree_limit, ctypes.byref(length))
         return ctypes2numpy(preds, length.value, 'float32')
     def save_model(self, fname):
         """ save model to file

From 65340ffda6c3712e532776697d2187f64d7fd3fa Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 2 Sep 2014 17:51:05 -0700
Subject: [PATCH 03/22] quick lint

---
 src/utils/utils.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/utils/utils.h b/src/utils/utils.h
index 5c3342d8e..c319c5ab7 100644
--- a/src/utils/utils.h
+++ b/src/utils/utils.h
@@ -86,7 +86,7 @@ void HandlePrint(const char *msg);
 #endif
 #endif
 #ifdef XGBOOST_STRICT_CXX98_
-// these function pointers are to be assigned 
+// these function pointers are to be assigned
 extern "C" void (*Printf)(const char *fmt, ...);
 extern "C" int (*SPrintf)(char *buf, size_t size, const char *fmt, ...);
 extern "C" void (*Assert)(int exp, const char *fmt, ...);
@@ -94,7 +94,7 @@ extern "C" void (*Check)(int exp, const char *fmt, ...);
 extern "C" void (*Error)(const char *fmt, ...);
 #else
 /*! \brief printf, print message to the console */
-inline void Printf(const char *fmt, ...) {  
+inline void Printf(const char *fmt, ...) {
   std::string msg(kPrintBuffer, '\0');
   va_list args;
   va_start(args, fmt);
@@ -103,7 +103,7 @@ inline void Printf(const char *fmt, ...) {
   HandlePrint(msg.c_str());
 }
 /*! \brief portable version of snprintf */
-inline int SPrintf(char *buf, size_t size, const char *fmt, ...) {  
+inline int SPrintf(char *buf, size_t size, const char *fmt, ...) {
   va_list args;
   va_start(args, fmt);
   int ret = vsnprintf(buf, size, fmt, args);
@@ -154,7 +154,7 @@ inline FILE *FopenCheck(const char *fname, const char *flag) {
   Check(fp != NULL, "can not open file \"%s\"\n", fname);
   return fp;
 }
-} // namespace utils
+}  // namespace utils
 // easy utils that can be directly acessed in xgboost
 /*! \brief get the beginning address of a vector */
 template<typename T>

From 1dbcebb6fedc0410ff2bc0338c8614bde0538862 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 2 Sep 2014 22:12:28 -0700
Subject: [PATCH 04/22] fix cxx98

---
 src/io/simple_dmatrix-inl.hpp     |  2 +-
 src/tree/updater_colmaker-inl.hpp | 25 +++++++++++++------------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index 0883955fe..9a88a6bfa 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -216,7 +216,7 @@ class DMatrixSimple : public DataMatrix {
       return;
     }
     char bname[1024];
-    snprintf(bname, sizeof(bname), "%s.buffer", fname);
+    utils::SPrintf(bname, sizeof(bname), "%s.buffer", fname);
     if (!this->LoadBinary(bname, silent)) {
       this->LoadText(fname, silent);
       if (savebuffer) this->SaveBinary(bname, silent);
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index a8cf6ea7f..2d7c5311e 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -81,18 +81,18 @@ class ColMaker: public IUpdater {
                         const BoosterInfo &info,
                         RegTree *p_tree) {
       this->InitData(gpair, *p_fmat, info.root_index, *p_tree);
-      this->InitNewNode(qexpand, gpair, *p_fmat, info, *p_tree);
+      this->InitNewNode(qexpand_, gpair, *p_fmat, info, *p_tree);
       for (int depth = 0; depth < param.max_depth; ++depth) {
-        this->FindSplit(depth, this->qexpand, gpair, p_fmat, info, p_tree);
-        this->ResetPosition(this->qexpand, p_fmat, *p_tree);
-        this->UpdateQueueExpand(*p_tree, &this->qexpand);
-        this->InitNewNode(qexpand, gpair, *p_fmat, info, *p_tree);
+        this->FindSplit(depth, qexpand_, gpair, p_fmat, info, p_tree);
+        this->ResetPosition(qexpand_, p_fmat, *p_tree);
+        this->UpdateQueueExpand(*p_tree, &qexpand_);
+        this->InitNewNode(qexpand_, gpair, *p_fmat, info, *p_tree);
         // if nothing left to be expand, break
-        if (qexpand.size() == 0) break;
+        if (qexpand_.size() == 0) break;
       }
       // set all the rest expanding nodes to leaf
-      for (size_t i = 0; i < qexpand.size(); ++i) {
-        const int nid = qexpand[i];
+      for (size_t i = 0; i < qexpand_.size(); ++i) {
+        const int nid = qexpand_[i];
         (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate);
       }
       // remember auxiliary statistics in the tree node
@@ -165,9 +165,9 @@ class ColMaker: public IUpdater {
         snode.reserve(256);
       }
       {// expand query
-        qexpand.reserve(256); qexpand.clear();
+        qexpand_.reserve(256); qexpand_.clear();
         for (int i = 0; i < tree.param.num_roots; ++i) {
-          qexpand.push_back(i);
+          qexpand_.push_back(i);
         }
       }
     }
@@ -228,6 +228,7 @@ class ColMaker: public IUpdater {
                                const std::vector<bst_gpair> &gpair,
                                const BoosterInfo &info,
                                std::vector<ThreadEntry> &temp) {
+      const std::vector<int> &qexpand = qexpand_;
       // clear all the temp statistics
       for (size_t j = 0; j < qexpand.size(); ++j) {
         temp[qexpand[j]].stats.Clear();
@@ -248,7 +249,7 @@ class ColMaker: public IUpdater {
           e.last_fvalue = fvalue;
         } else {
           // try to find a split
-          if (fabsf(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
+          if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
             c.SetSubstract(snode[nid].stats, e.stats);
             if (c.sum_hess >= param.min_child_weight) {
               bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
@@ -391,7 +392,7 @@ class ColMaker: public IUpdater {
     /*! \brief TreeNode Data: statistics for each constructed node */
     std::vector<NodeEntry> snode;
     /*! \brief queue of nodes to be expanded */
-    std::vector<int> qexpand;
+    std::vector<int> qexpand_;
   };
 };
 

From 10648a1ca7eee583459f3baf3d5f105959626735 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 2 Sep 2014 22:43:19 -0700
Subject: [PATCH 05/22] remove using std from cpp

---
 src/gbm/gblinear-inl.hpp       |  5 ++++-
 src/gbm/gbm.cpp                |  2 +-
 src/gbm/gbtree-inl.hpp         |  9 ++++++---
 src/io/io.cpp                  |  1 -
 src/io/simple_dmatrix-inl.hpp  |  8 +++++---
 src/learner/dmatrix.h          |  4 ++++
 src/learner/evaluation-inl.hpp |  7 +++++--
 src/learner/evaluation.h       |  2 ++
 src/learner/learner-inl.hpp    |  4 +++-
 src/learner/objective-inl.hpp  |  3 +++
 src/learner/objective.h        |  1 +
 src/tree/model.h               |  3 ++-
 src/tree/param.h               |  1 +
 src/tree/updater.cpp           |  2 +-
 src/tree/updater_prune-inl.hpp |  1 +
 src/utils/fmap.h               |  9 +++++----
 src/utils/io.h                 | 12 ++++++------
 src/utils/random.h             |  2 +-
 src/utils/utils.h              |  4 ++--
 19 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp
index a9d4c8d62..624f15c28 100644
--- a/src/gbm/gblinear-inl.hpp
+++ b/src/gbm/gblinear-inl.hpp
@@ -24,6 +24,7 @@ class GBLinear : public IGradBooster {
   }
   // set model parameters
   virtual void SetParam(const char *name, const char *val) {
+    using namespace std;
     if (!strncmp(name, "bst:", 4)) {
       param.SetParam(name + 4, val);
     }
@@ -166,6 +167,7 @@ class GBLinear : public IGradBooster {
       learning_rate = 1.0f;
     }
     inline void SetParam(const char *name, const char *val) {
+      using namespace std;
       // sync-names
       if (!strcmp("eta", name)) learning_rate = static_cast<float>(atof(val));
       if (!strcmp("lambda", name)) reg_lambda = static_cast<float>(atof(val));
@@ -207,9 +209,10 @@ class GBLinear : public IGradBooster {
       Param(void) {
         num_feature = 0;
         num_output_group = 1;
-        memset(reserved, 0, sizeof(reserved));
+        std::memset(reserved, 0, sizeof(reserved));
       }
       inline void SetParam(const char *name, const char *val) {
+        using namespace std;
         if (!strcmp(name, "bst:num_feature")) num_feature = atoi(val);
         if (!strcmp(name, "num_output_group")) num_output_group = atoi(val);
       }
diff --git a/src/gbm/gbm.cpp b/src/gbm/gbm.cpp
index 4713838e9..e280fdd4a 100644
--- a/src/gbm/gbm.cpp
+++ b/src/gbm/gbm.cpp
@@ -1,7 +1,6 @@
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #include <cstring>
-using namespace std;
 #include "./gbm.h"
 #include "./gbtree-inl.hpp"
 #include "./gblinear-inl.hpp"
@@ -9,6 +8,7 @@ using namespace std;
 namespace xgboost {
 namespace gbm {
 IGradBooster* CreateGradBooster(const char *name) {
+  using namespace std;
   if (!strcmp("gbtree", name)) return new GBTree();
   if (!strcmp("gblinear", name)) return new GBLinear();
   utils::Error("unknown booster type: %s", name);
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index 8fea28727..ed52afa7d 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -23,6 +23,7 @@ class GBTree : public IGradBooster {
     this->Clear();
   }
   virtual void SetParam(const char *name, const char *val) {
+    using namespace std;
     if (!strncmp(name, "bst:", 4)) {
       cfg.push_back(std::make_pair(std::string(name+4), std::string(val)));
       // set into updaters, if already intialized
@@ -171,14 +172,14 @@ class GBTree : public IGradBooster {
     updaters.clear();
     std::string tval = tparam.updater_seq;
     char *pstr;
-    pstr = strtok(&tval[0], ",");
+    pstr = std::strtok(&tval[0], ",");
     while (pstr != NULL) {
       updaters.push_back(tree::CreateUpdater(pstr));
       for (size_t j = 0; j < cfg.size(); ++j) {
         // set parameters
         updaters.back()->SetParam(cfg[j].first.c_str(), cfg[j].second.c_str());
       }
-      pstr = strtok(NULL, ",");
+      pstr = std::strtok(NULL, ",");
     }
     tparam.updater_initialized = 1;
   }
@@ -279,6 +280,7 @@ class GBTree : public IGradBooster {
       updater_initialized = 0;
     }
     inline void SetParam(const char *name, const char *val){
+      using namespace std;
       if (!strcmp(name, "updater") &&
           strcmp(updater_seq.c_str(), val) != 0) {
         updater_seq = val;
@@ -319,7 +321,7 @@ class GBTree : public IGradBooster {
       num_pbuffer = 0;
       num_output_group = 1;
       size_leaf_vector = 0;
-      memset(reserved, 0, sizeof(reserved));
+      std::memset(reserved, 0, sizeof(reserved));
     }
     /*!
      * \brief set parameters from outside
@@ -327,6 +329,7 @@ class GBTree : public IGradBooster {
      * \param val  value of the parameter
      */
     inline void SetParam(const char *name, const char *val) {
+      using namespace std;
       if (!strcmp("num_pbuffer", name)) num_pbuffer = atol(val);
       if (!strcmp("num_output_group", name)) num_output_group = atol(val);
       if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
diff --git a/src/io/io.cpp b/src/io/io.cpp
index dead398f7..d251d7a96 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -1,7 +1,6 @@
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #include <string>
-using namespace std;
 #include "./io.h"
 #include "../utils/io.h"
 #include "../utils/utils.h"
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index 9a88a6bfa..374d621e9 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -55,8 +55,8 @@ class DMatrixSimple : public DataMatrix {
         RowBatch::Inst inst = batch[i];
         row_data_.resize(row_data_.size() + inst.length);
         if (inst.length != 0) {
-          memcpy(&row_data_[row_ptr_.back()], inst.data,
-                 sizeof(RowBatch::Entry) * inst.length);
+          std::memcpy(&row_data_[row_ptr_.back()], inst.data,
+                      sizeof(RowBatch::Entry) * inst.length);
         }
         row_ptr_.push_back(row_ptr_.back() + inst.length);
       }
@@ -82,6 +82,7 @@ class DMatrixSimple : public DataMatrix {
    * \param silent whether print information or not
    */
   inline void LoadText(const char* fname, bool silent = false) {
+    using namespace std;
     this->Clear();
     FILE* file = utils::FopenCheck(fname, "r");
     float label; bool init = true;
@@ -135,7 +136,7 @@ class DMatrixSimple : public DataMatrix {
    * \return whether loading is success
    */
   inline bool LoadBinary(const char* fname, bool silent = false) {
-    FILE *fp = fopen64(fname, "rb");
+    std::FILE *fp = fopen64(fname, "rb");
     if (fp == NULL) return false;
     utils::FileStream fs(fp);
     this->LoadBinary(fs, silent, fname);
@@ -208,6 +209,7 @@ class DMatrixSimple : public DataMatrix {
    * \param savebuffer whether do save binary buffer if it is text
    */
   inline void CacheLoad(const char *fname, bool silent = false, bool savebuffer = true) {
+    using namespace std;
     size_t len = strlen(fname);
     if (len > 8 && !strcmp(fname + len - 7, ".buffer")) {
       if (!this->LoadBinary(fname, silent)) {
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index bef84900a..b58f7b2bb 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -90,6 +90,7 @@ struct MetaInfo {
   }
   // try to load group information from file, if exists
   inline bool TryLoadGroup(const char* fname, bool silent = false) {
+    using namespace std;
     FILE *fi = fopen64(fname, "r");
     if (fi == NULL) return false;
     group_ptr.push_back(0);
@@ -105,6 +106,7 @@ struct MetaInfo {
     return true;
   }
   inline std::vector<float>& GetFloatInfo(const char *field) {
+    using namespace std;
     if (!strcmp(field, "label")) return labels;
     if (!strcmp(field, "weight")) return weights;
     if (!strcmp(field, "base_margin")) return base_margin;
@@ -115,6 +117,7 @@ struct MetaInfo {
     return ((MetaInfo*)this)->GetFloatInfo(field);
   }
   inline std::vector<unsigned> &GetUIntInfo(const char *field) {
+    using namespace std;
     if (!strcmp(field, "root_index")) return info.root_index;
     if (!strcmp(field, "fold_index")) return info.fold_index;
     utils::Error("unknown field %s", field);
@@ -125,6 +128,7 @@ struct MetaInfo {
   }
   // try to load weight information from file, if exists
   inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
+    using namespace std;
     std::vector<float> &data = this->GetFloatInfo(field);
     FILE *fi = fopen64(fname, "r");
     if (fi == NULL) return false;
diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index 52877e17b..fb0b8953d 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -147,10 +147,11 @@ struct EvalAMS : public IEvaluator {
   explicit EvalAMS(const char *name) {
     name_ = name;
     // note: ams@0 will automatically select which ratio to go
-    utils::Check(sscanf(name, "ams@%f", &ratio_) == 1, "invalid ams format");
+    utils::Check(std::sscanf(name, "ams@%f", &ratio_) == 1, "invalid ams format");
   }
   virtual float Eval(const std::vector<float> &preds,
                      const MetaInfo &info) const {
+    using namespace std;
     const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
 
     utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
@@ -202,6 +203,7 @@ struct EvalAMS : public IEvaluator {
 struct EvalPrecisionRatio : public IEvaluator{
  public:
   explicit EvalPrecisionRatio(const char *name) : name_(name) {
+    using namespace std;
     if (sscanf(name, "apratio@%f", &ratio_) == 1) {
       use_ap = 1;
     } else {
@@ -342,6 +344,7 @@ struct EvalRankList : public IEvaluator {
 
  protected:
   explicit EvalRankList(const char *name) {
+    using namespace std;
     name_ = name;
     minus_ = false;
     if (sscanf(name, "%*[^@]@%u[-]?", &topn_) != 1) {
@@ -388,7 +391,7 @@ struct EvalNDCG : public EvalRankList{
     for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
       const unsigned rel = rec[i].second;
       if (rel != 0) { 
-        sumdcg += ((1 << rel) - 1) / log(i + 2.0);
+        sumdcg += ((1 << rel) - 1) / std::log(i + 2.0);
       }
     }
     return static_cast<float>(sumdcg);
diff --git a/src/learner/evaluation.h b/src/learner/evaluation.h
index ec37e1f4a..f34d832c8 100644
--- a/src/learner/evaluation.h
+++ b/src/learner/evaluation.h
@@ -36,6 +36,7 @@ struct IEvaluator{
 namespace xgboost {
 namespace learner {
 inline IEvaluator* CreateEvaluator(const char *name) {
+  using namespace std;
   if (!strcmp(name, "rmse")) return new EvalRMSE();
   if (!strcmp(name, "error")) return new EvalError();
   if (!strcmp(name, "merror")) return new EvalMatchError();
@@ -56,6 +57,7 @@ inline IEvaluator* CreateEvaluator(const char *name) {
 class EvalSet{
  public:
   inline void AddEval(const char *name) {
+    using namespace std;
     for (size_t i = 0; i < evals_.size(); ++i) {
       if (!strcmp(name, evals_[i]->Name())) return;
     }
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 5d7c9d06a..05519de8b 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -79,6 +79,7 @@ class BoostLearner {
    * \param val  value of the parameter
    */
   inline void SetParam(const char *name, const char *val) {
+    using namespace std;
     // in this version, bst: prefix is no longer required 
     if (strncmp(name, "bst:", 4) != 0) {
       std::string n = "bst:"; n += name;
@@ -290,7 +291,7 @@ class BoostLearner {
       base_score = 0.5f;
       num_feature = 0;
       num_class = 0;
-      memset(reserved, 0, sizeof(reserved));
+      std::memset(reserved, 0, sizeof(reserved));
     }
     /*!
      * \brief set parameters from outside
@@ -298,6 +299,7 @@ class BoostLearner {
      * \param val value of the parameter
      */
     inline void SetParam(const char *name, const char *val) {
+      using namespace std;
       if (!strcmp("base_score", name)) base_score = static_cast<float>(atof(val));
       if (!strcmp("num_class", name)) num_class = atoi(val);
       if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index 576549eac..96aacf12d 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -101,6 +101,7 @@ class RegLossObj : public IObjFunction{
   }
   virtual ~RegLossObj(void) {}
   virtual void SetParam(const char *name, const char *val) {
+    using namespace std;
     if (!strcmp("scale_pos_weight", name)) {
       scale_pos_weight = static_cast<float>(atof(val));
     }
@@ -156,6 +157,7 @@ class SoftmaxMultiClassObj : public IObjFunction {
   }
   virtual ~SoftmaxMultiClassObj(void) {}
   virtual void SetParam(const char *name, const char *val) {
+    using namespace std;
     if (!strcmp( "num_class", name )) nclass = atoi(val);
   }
   virtual void GetGradient(const std::vector<float> &preds,
@@ -247,6 +249,7 @@ class LambdaRankObj : public IObjFunction {
   }
   virtual ~LambdaRankObj(void) {}
   virtual void SetParam(const char *name, const char *val) {
+    using namespace std;
     if (!strcmp( "loss_type", name )) loss.loss_type = atoi(val);
     if (!strcmp( "fix_list_weight", name)) fix_list_weight = static_cast<float>(atof(val));
     if (!strcmp( "num_pairsample", name)) num_pairsample = atoi(val);
diff --git a/src/learner/objective.h b/src/learner/objective.h
index d741ba61f..6b11b7d18 100644
--- a/src/learner/objective.h
+++ b/src/learner/objective.h
@@ -67,6 +67,7 @@ namespace xgboost {
 namespace learner {
 /*! \brief factory funciton to create objective function by name */
 inline IObjFunction* CreateObjFunction(const char *name) {
+  using namespace std;
   if (!strcmp("reg:linear", name)) return new RegLossObj(LossType::kLinearSquare);
   if (!strcmp("reg:logistic", name)) return new RegLossObj(LossType::kLogisticNeglik);
   if (!strcmp("binary:logistic", name)) return new RegLossObj(LossType::kLogisticClassify);
diff --git a/src/tree/model.h b/src/tree/model.h
index 6d885faa7..8049a1608 100644
--- a/src/tree/model.h
+++ b/src/tree/model.h
@@ -53,7 +53,7 @@ class TreeModel {
     Param(void) {
       max_depth = 0;
       size_leaf_vector = 0;
-      memset(reserved, 0, sizeof(reserved));
+      std::memset(reserved, 0, sizeof(reserved));
     }
     /*! 
      * \brief set parameters from outside 
@@ -61,6 +61,7 @@ class TreeModel {
      * \param val  value of the parameter
      */
     inline void SetParam(const char *name, const char *val) {
+      using namespace std;
       if (!strcmp("num_roots", name)) num_roots = atoi(val);
       if (!strcmp("num_feature", name)) num_feature = atoi(val);
       if (!strcmp("size_leaf_vector", name)) size_leaf_vector = atoi(val);
diff --git a/src/tree/param.h b/src/tree/param.h
index 79bc162c3..04ea5277f 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -62,6 +62,7 @@ struct TrainParam{
    * \param val  value of the parameter
    */            
   inline void SetParam(const char *name, const char *val) {
+    using namespace std;
     // sync-names
     if (!strcmp(name, "gamma")) min_split_loss = static_cast<float>(atof(val));
     if (!strcmp(name, "eta")) learning_rate = static_cast<float>(atof(val));
diff --git a/src/tree/updater.cpp b/src/tree/updater.cpp
index 09b63eb49..2cb6552fe 100644
--- a/src/tree/updater.cpp
+++ b/src/tree/updater.cpp
@@ -1,7 +1,6 @@
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #include <cstring>
-using namespace std;
 #include "./updater.h"
 #include "./updater_prune-inl.hpp"
 #include "./updater_refresh-inl.hpp"
@@ -10,6 +9,7 @@ using namespace std;
 namespace xgboost {
 namespace tree {
 IUpdater* CreateUpdater(const char *name) {
+  using namespace std;
   if (!strcmp(name, "prune")) return new TreePruner();
   if (!strcmp(name, "refresh")) return new TreeRefresher<GradStats>();
   if (!strcmp(name, "grow_colmaker")) return new ColMaker<GradStats>();
diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp
index 98fdf5ee4..726999f55 100644
--- a/src/tree/updater_prune-inl.hpp
+++ b/src/tree/updater_prune-inl.hpp
@@ -17,6 +17,7 @@ class TreePruner: public IUpdater {
   virtual ~TreePruner(void) {}
   // set training parameter
   virtual void SetParam(const char *name, const char *val) {
+    using namespace std;
     param.SetParam(name, val);
     if (!strcmp(name, "silent")) silent = atoi(val);
   }
diff --git a/src/utils/fmap.h b/src/utils/fmap.h
index f9437cc6c..607f37013 100644
--- a/src/utils/fmap.h
+++ b/src/utils/fmap.h
@@ -24,15 +24,15 @@ class FeatMap {
   // function definitions
   /*! \brief load feature map from text format */
   inline void LoadText(const char *fname) {
-    FILE *fi = utils::FopenCheck(fname, "r");
+    std::FILE *fi = utils::FopenCheck(fname, "r");
     this->LoadText(fi);
-    fclose(fi);
+    std::fclose(fi);
   }
   /*! \brief load feature map from text format */
-  inline void LoadText(FILE *fi) {
+  inline void LoadText(std::FILE *fi) {
     int fid;
     char fname[1256], ftype[1256];
-    while (fscanf(fi, "%d\t%[^\t]\t%s\n", &fid, fname, ftype) == 3) {
+    while (std::fscanf(fi, "%d\t%[^\t]\t%s\n", &fid, fname, ftype) == 3) {
       this->PushBack(fid, fname, ftype);
     }
   }
@@ -62,6 +62,7 @@ class FeatMap {
 
  private:
   inline static Type GetType(const char *tname) {
+    using namespace std;
     if (!strcmp("i", tname)) return kIndicator;
     if (!strcmp("q", tname)) return kQuantitive;
     if (!strcmp("int", tname)) return kInteger;
diff --git a/src/utils/io.h b/src/utils/io.h
index a15e2f0ce..026e3fec7 100644
--- a/src/utils/io.h
+++ b/src/utils/io.h
@@ -91,21 +91,21 @@ class IStream {
 /*! \brief implementation of file i/o stream */
 class FileStream : public IStream {
  private:
-  FILE *fp;
+  std::FILE *fp;
  public:
-  explicit FileStream(FILE *fp) : fp(fp) {
+  explicit FileStream(std::FILE *fp) : fp(fp) {
   }
   virtual size_t Read(void *ptr, size_t size) {
-    return fread(ptr, size, 1, fp);
+    return std::fread(ptr, size, 1, fp);
   }
   virtual void Write(const void *ptr, size_t size) {
-    fwrite(ptr, size, 1, fp);
+    std::fwrite(ptr, size, 1, fp);
   }
   inline void Seek(size_t pos) {
-    fseek(fp, 0, SEEK_SET);
+    std::fseek(fp, 0, SEEK_SET);
   }
   inline void Close(void) {
-    fclose(fp);
+    std::fclose(fp);
   }
 };
 
diff --git a/src/utils/random.h b/src/utils/random.h
index 57e1f243d..1e3e617f9 100644
--- a/src/utils/random.h
+++ b/src/utils/random.h
@@ -53,7 +53,7 @@ inline double NextDouble(void) {
 }
 /*! \brief return a random number in n */
 inline uint32_t NextUInt32(uint32_t n) {
-  return (uint32_t)floor(NextDouble() * n);
+  return (uint32_t)std::floor(NextDouble() * n);
 }
 /*! \brief return  x~N(mu,sigma^2) */
 inline double SampleNormal(double mu, double sigma) {
diff --git a/src/utils/utils.h b/src/utils/utils.h
index c319c5ab7..afe17f64c 100644
--- a/src/utils/utils.h
+++ b/src/utils/utils.h
@@ -149,8 +149,8 @@ inline void Error(const char *fmt, ...) {
 #endif
 
 /*! \brief replace fopen, report error when the file open fails */
-inline FILE *FopenCheck(const char *fname, const char *flag) {
-  FILE *fp = fopen64(fname, flag);
+inline std::FILE *FopenCheck(const char *fname, const char *flag) {
+  std::FILE *fp = fopen64(fname, flag);
   Check(fp != NULL, "can not open file \"%s\"\n", fname);
   return fp;
 }

From ac8958b2844c3fbbf1570ae1b18633360ca84082 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 2 Sep 2014 23:07:50 -0700
Subject: [PATCH 06/22] move custom obj build in into booster

---
 wrapper/xgboost.py | 235 ++++++++++++++++++++-------------------------
 1 file changed, 106 insertions(+), 129 deletions(-)

diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index a0a88af47..2ae12c341 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -213,77 +213,6 @@ class DMatrix:
             self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex)))
         return res
 
-class CVPack:
-    def __init__(self, dtrain, dtest, param):
-        self.dtrain = dtrain
-        self.dtest = dtest
-        self.watchlist = watchlist = [ (dtrain,'train'), (dtest, 'test') ]
-        self.bst = Booster(param, [dtrain,dtest])
-    def update(self,r):
-        self.bst.update(self.dtrain, r)
-    def eval(self,r):
-        return self.bst.eval_set(self.watchlist, r)
-
-def mknfold(dall, nfold, param, seed, weightscale=None, evals=[], set_pos_weight=None):
-    """
-    mk nfold list of cvpack from randidx
-    """
-    randidx = range(dall.num_row())
-    random.seed(seed)
-    random.shuffle(randidx)
-
-    idxset = []
-    kstep = len(randidx) / nfold
-    for i in range(nfold):
-        idxset.append(randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ])
-
-    ret = []
-    for k in range(nfold):
-        trainlst = []
-        for j in range(nfold):
-            if j == k:
-                testlst = idxset[j]
-            else:
-                trainlst += idxset[j]
-        dtrain = dall.slice(trainlst)
-        dtest = dall.slice(testlst)
-        # rescale weight of dtrain and dtest
-        if weightscale != None:
-            dtrain.set_weight( dtrain.get_weight() * weightscale * dall.num_row() / dtrain.num_row() )
-            dtest.set_weight( dtest.get_weight() * weightscale * dall.num_row() / dtest.num_row() )
-        if set_pos_weight != None:
-            label = dtrain.get_label()
-            weight = dtrain.get_weight()
-            sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
-            sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )
-            param['scale_pos_weight'] = sum_wneg/sum_wpos
-        plst = param.items() + [('eval_metric', itm) for itm in evals]
-        ret.append(CVPack(dtrain, dtest, plst))
-    return ret
-
-def aggcv(rlist):
-    """
-    aggregate cross validation results
-    """
-    cvmap = {}
-    arr = rlist[0].split()
-    ret = arr[0]
-    for it in arr[1:]:
-        k, v  = it.split(':')
-        cvmap[k] = [float(v)]
-    for line in rlist[1:]:
-        arr = line.split()
-        assert ret == arr[0]
-        for it in arr[1:]:
-            k, v  = it.split(':')
-            cvmap[k].append(float(v))
-
-    for k, v in sorted(cvmap.items(), key = lambda x:x[0]):
-        v = np.array(v)
-        ret += '\t%s:%f+%f' % (k, np.mean(v), np.std(v))
-    return ret
-
-
 class Booster:
     """learner class """
     def __init__(self, params={}, cache=[], model_file = None):
@@ -324,7 +253,7 @@ class Booster:
                     self.handle, ctypes.c_char_p(k.encode('utf-8')),
                     ctypes.c_char_p(str(v).encode('utf-8')))
 
-    def update(self, dtrain, it):
+    def update(self, dtrain, it, fobj=None):
         """
         update
             Args:
@@ -332,11 +261,19 @@ class Booster:
                         the training DMatrix
                 it: int
                     current iteration number
+                fobj: function
+                    cutomzied objective function
             Returns:
                 None
         """
         assert isinstance(dtrain, DMatrix)
-        xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
+        if fobj is None:
+            xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
+        else:
+            pred = self.predict( dtrain )
+            grad, hess = fobj( pred, dtrain )
+            self.boost( dtrain, grad, hess )
+
     def boost(self, dtrain, grad, hess):
         """ update
             Args:
@@ -353,22 +290,31 @@ class Booster:
                                     (ctypes.c_float*len(grad))(*grad),
                                     (ctypes.c_float*len(hess))(*hess),
                                     len(grad))
-    def eval_set(self, evals, it = 0):
+    def eval_set(self, evals, it = 0, feval = None):
         """evaluates by metric
             Args:
                 evals: list of tuple (DMatrix, string)
                        lists of items to be evaluated
                 it: int
+                feval: function 
+                       custom evaluation function
             Returns:
                 evals result
         """
-        for d in evals:
-            assert isinstance(d[0], DMatrix)
-            assert isinstance(d[1], str)
-        dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
-        evnames = (ctypes.c_char_p * len(evals))(
-            * [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
-        return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
+        if feval is None:
+            for d in evals:
+                assert isinstance(d[0], DMatrix)
+                assert isinstance(d[1], str)
+            dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
+            evnames = (ctypes.c_char_p * len(evals))(
+                * [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
+            return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
+        else:
+            res = '[%d]' % it
+            for dm, evname in evals:
+                name, val = feval(self.predict(dm), dm)
+                res += '\t%s-%s:%f' % (evname, name, val)
+            return res
     def eval(self, mat, name = 'eval', it = 0):
         return self.eval_set( [(mat,name)], it)
     def predict(self, data, output_margin=False, ntree_limit=0):
@@ -453,31 +399,7 @@ class Booster:
                     fmap[fid]+= 1
         return fmap
 
-def evaluate(bst, evals, it, feval = None):
-    """evaluation on eval set
-        Args:
-            bst: XGBoost object
-                 object of XGBoost model
-            evals: list of tuple (DMatrix, string)
-                obj need to be evaluated
-            it: int
-            feval: optional
-        Returns:
-            eval result
-    """
-    if feval != None:
-        res = '[%d]' % it
-        for dm, evname in evals:
-            name, val = feval(bst.predict(dm), dm)
-            res += '\t%s-%s:%f' % (evname, name, val)
-    else:
-        res = bst.eval_set(evals, it)
-
-    return res
-
-
-
-def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
+def train(params, dtrain, num_boost_round = 10, evals = [], fobj=None, feval=None):
     """ train a booster with given paramaters
         Args:
             params: dict
@@ -488,27 +410,84 @@ def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None
                              num of round to be boosted
             evals: list
                    list of items to be evaluated
-            obj:
-            feval:
+            fobj:  function
+                   cutomized objective function
+            feval: function
+                   cutomized evaluation function
     """
     bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
-    if obj is None:
-        for i in range(num_boost_round):
-            bst.update( dtrain, i )
-            if len(evals) != 0:
-                sys.stderr.write(evaluate(bst, evals, i, feval).decode()+'\n')
-    else:
-        # try customized objective function
-        for i in range(num_boost_round):
-            pred = bst.predict( dtrain )
-            grad, hess = obj( pred, dtrain )
-            bst.boost( dtrain, grad, hess )
-            if len(evals) != 0:
-                sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
+    for i in range(num_boost_round):
+        bst.update( dtrain, i, fobj )
+        if len(evals) != 0:
+            sys.stderr.write(bst.eval_set(evals, i, feval).decode()+'\n')
     return bst
 
-def cv(params, dtrain, num_boost_round = 10, nfold=3, evals = [], \
-        weightscale=None, obj=None, feval=None, set_pos_weight=None):
+class CVPack:
+    def __init__(self, dtrain, dtest, param):
+        self.dtrain = dtrain
+        self.dtest = dtest
+        self.watchlist = watchlist = [ (dtrain,'train'), (dtest, 'test') ]
+        self.bst = Booster(param, [dtrain,dtest])
+    def update(self, r, fobj):
+        self.bst.update(self.dtrain, r, fobj)
+    def eval(self, r, fval):
+        return self.bst.eval_set(self.watchlist, r, feval)
+
+def mknfold(dall, nfold, param, seed, weightscale=None, evals=[]):
+    """
+    mk nfold list of cvpack from randidx
+    """
+    randidx = range(dall.num_row())
+    random.seed(seed)
+    random.shuffle(randidx)
+
+    idxset = []
+    kstep = len(randidx) / nfold
+    for i in range(nfold):
+        idxset.append(randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ])
+
+    ret = []
+    for k in range(nfold):
+        trainlst = []
+        for j in range(nfold):
+            if j == k:
+                testlst = idxset[j]
+            else:
+                trainlst += idxset[j]
+        dtrain = dall.slice(trainlst)
+        dtest = dall.slice(testlst)
+        # rescale weight of dtrain and dtest
+        if weightscale != None:
+            dtrain.set_weight( dtrain.get_weight() * weightscale * dall.num_row() / dtrain.num_row() )
+            dtest.set_weight( dtest.get_weight() * weightscale * dall.num_row() / dtest.num_row() )
+        plst = param.items() + [('eval_metric', itm) for itm in evals]
+        ret.append(CVPack(dtrain, dtest, plst))
+    return ret
+
+def aggcv(rlist):
+    """
+    aggregate cross validation results
+    """
+    cvmap = {}
+    arr = rlist[0].split()
+    ret = arr[0]
+    for it in arr[1:]:
+        k, v  = it.split(':')
+        cvmap[k] = [float(v)]
+    for line in rlist[1:]:
+        arr = line.split()
+        assert ret == arr[0]
+        for it in arr[1:]:
+            k, v  = it.split(':')
+            cvmap[k].append(float(v))
+
+    for k, v in sorted(cvmap.items(), key = lambda x:x[0]):
+        v = np.array(v)
+        ret += '\t%s:%f+%f' % (k, np.mean(v), np.std(v))
+    return ret
+
+def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metrics = [], \
+        weightscale=None, fobj=None, feval=None):
     """ cross validation  with given paramaters
         Args:
             params: dict
@@ -521,14 +500,12 @@ def cv(params, dtrain, num_boost_round = 10, nfold=3, evals = [], \
                    folds to do cv
             evals: list
                    list of items to be evaluated
-            obj:
+            fobj:
             feval:
-            set_pos_weight: bool, optional
-                            Adjust pos weight by number
     """
-    cvfolds = mknfold(dtrain, nfold, params, 0, weightscale, evals)
+    cvfolds = mknfold(dtrain, nfold, params, 0, weightscale, evals_metrics)
     for i in range(num_boost_round):
         for f in cvfolds:
-            f.update(i)
-        res = aggcv([f.eval(i) for f in cvfolds])
+            f.update(i, fobj)
+        res = aggcv([f.eval(i, fval) for f in cvfolds])
         sys.stderr.write(res+'\n')

From 06b5533209fc14c2c6b3a1d4491be6939272a9f4 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 2 Sep 2014 23:15:41 -0700
Subject: [PATCH 07/22] chg fobj back to obj, to keep parameter name unchanged

---
 wrapper/xgboost.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index 2ae12c341..6b9bc83c6 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -399,7 +399,7 @@ class Booster:
                     fmap[fid]+= 1
         return fmap
 
-def train(params, dtrain, num_boost_round = 10, evals = [], fobj=None, feval=None):
+def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
     """ train a booster with given paramaters
         Args:
             params: dict
@@ -410,14 +410,14 @@ def train(params, dtrain, num_boost_round = 10, evals = [], fobj=None, feval=Non
                              num of round to be boosted
             evals: list
                    list of items to be evaluated
-            fobj:  function
+            obj:  function
                    cutomized objective function
             feval: function
                    cutomized evaluation function
     """
     bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
     for i in range(num_boost_round):
-        bst.update( dtrain, i, fobj )
+        bst.update( dtrain, i, obj )
         if len(evals) != 0:
             sys.stderr.write(bst.eval_set(evals, i, feval).decode()+'\n')
     return bst
@@ -487,7 +487,7 @@ def aggcv(rlist):
     return ret
 
 def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metrics = [], \
-        weightscale=None, fobj=None, feval=None):
+        weightscale=None, obj=None, feval=None):
     """ cross validation  with given paramaters
         Args:
             params: dict
@@ -500,12 +500,12 @@ def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metrics = [], \
                    folds to do cv
             evals: list
                    list of items to be evaluated
-            fobj:
+            obj:
             feval:
     """
     cvfolds = mknfold(dtrain, nfold, params, 0, weightscale, evals_metrics)
     for i in range(num_boost_round):
         for f in cvfolds:
-            f.update(i, fobj)
+            f.update(i, obj)
         res = aggcv([f.eval(i, fval) for f in cvfolds])
         sys.stderr.write(res+'\n')

From 582ef2f9d58f865b91e0df1a412beb27157e84ba Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Tue, 2 Sep 2014 23:29:48 -0700
Subject: [PATCH 08/22] Update DESCRIPTION

---
 R-package/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 40705e317..baf5912f4 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -12,7 +12,7 @@ Description: This package is a R wrapper of xgboost, which is short for eXtreme
     parallel computation with OpenMP, and it can be more than 10 times faster
     than existing gradient boosting packages such as gbm. It supports various
     objective functions, including regression, classification and ranking. The
-    package is made to be extensible, so that user are also allowed to define
+    package is made to be extensible, so that users are also allowed to define
     their own objectives easily.
 License: Apache License (== 2.0) | file LICENSE
 URL: https://github.com/tqchen/xgboost

From 642b5bda0a6928588619678f0fa323bfe6011d92 Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Tue, 2 Sep 2014 23:30:53 -0700
Subject: [PATCH 09/22] Update DESCRIPTION

---
 R-package/DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index baf5912f4..33258bf5c 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,14 +1,14 @@
 Package: xgboost
 Type: Package
 Title: eXtreme Gradient Boosting
-Version: 0.3-0
+Version: 0.3-1
 Date: 2014-08-23
 Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
 Maintainer: Tong He <hetong007@gmail.com>
 Description: This package is a R wrapper of xgboost, which is short for eXtreme
     Gradient Boosting. It is an efficient and scalable implementation of
     gradient boosting framework. The package includes efficient linear model
-    solver and tree learning algorithm. The package can automatically do
+    solver and tree learning algorithms. The package can automatically do
     parallel computation with OpenMP, and it can be more than 10 times faster
     than existing gradient boosting packages such as gbm. It supports various
     objective functions, including regression, classification and ranking. The

From 85dbaf638bfbb75c023203893cd851920f948cd9 Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Tue, 2 Sep 2014 23:33:04 -0700
Subject: [PATCH 10/22] Update xgboost.Rnw

---
 R-package/vignettes/xgboost.Rnw | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw
index 19254abaf..9ecceca17 100644
--- a/R-package/vignettes/xgboost.Rnw
+++ b/R-package/vignettes/xgboost.Rnw
@@ -52,8 +52,7 @@ This is an introductory document of using the \verb@xgboost@ package in R.
  and scalable implementation of gradient boosting framework by \citep{friedman2001greedy}. 
 The package includes efficient linear model solver and tree learning algorithm.
 It supports various objective functions, including regression, classification
-and ranking. The package is made to be extendible, so that user are also allowed
-to define there own objectives easily. It has several features:
+and ranking. The package is made to be extendible, so that users are also allowed to define their own objectives easily. It has several features:
 \begin{enumerate}
     \item{Speed: }{\verb@xgboost@ can automatically do parallel computation on 
     Windows and Linux, with openmp. It is generally over 10 times faster than
@@ -137,13 +136,10 @@ diris = xgb.DMatrix('iris.xgb.DMatrix')
 
 \section{Advanced Examples}
 
-The function \verb@xgboost@ is a simple function with less parameters, in order
-to be R-friendly. The core training function is wrapped in \verb@xgb.train@. It
-is more flexible than \verb@xgboost@, but it requires users to read the document
-a bit more carefully.
+The function \verb@xgboost@ is a simple function with less parameter, in order
+to be R-friendly. The core training function is wrapped in \verb@xgb.train@. It is more flexible than \verb@xgboost@, but it requires users to read the document a bit more carefully.
 
-\verb@xgb.train@ only accept a \verb@xgb.DMatrix@ object as its input, while it 
-supports advanced features as custom objective and evaluation functions.
+\verb@xgb.train@ only accept a \verb@xgb.DMatrix@ object as its input, while it supports advanced features as custom objective and evaluation functions.
 
 <<Customized loss function>>=
 logregobj <- function(preds, dtrain) {
@@ -213,3 +209,4 @@ competition.
 \bibliography{xgboost}
 
 \end{document}
+

From 02dd8d121295c18564b62d191c5f9262011f1cc7 Mon Sep 17 00:00:00 2001
From: antinucleon <antinucleon@gmail.com>
Date: Wed, 3 Sep 2014 00:37:55 -0600
Subject: [PATCH 11/22] chg

---
 wrapper/xgboost.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index 6b9bc83c6..2eea30483 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -296,7 +296,7 @@ class Booster:
                 evals: list of tuple (DMatrix, string)
                        lists of items to be evaluated
                 it: int
-                feval: function 
+                feval: function
                        custom evaluation function
             Returns:
                 evals result
@@ -325,7 +325,7 @@ class Booster:
                       the dmatrix storing the input
                 output_margin: bool
                                whether output raw margin value that is untransformed
-                               
+
                 ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees
             Returns:
                 numpy array of prediction
@@ -430,10 +430,10 @@ class CVPack:
         self.bst = Booster(param, [dtrain,dtest])
     def update(self, r, fobj):
         self.bst.update(self.dtrain, r, fobj)
-    def eval(self, r, fval):
+    def eval(self, r, feval):
         return self.bst.eval_set(self.watchlist, r, feval)
 
-def mknfold(dall, nfold, param, seed, weightscale=None, evals=[]):
+def mknfold(dall, nfold, param, seed, evals=[]):
     """
     mk nfold list of cvpack from randidx
     """
@@ -457,9 +457,6 @@ def mknfold(dall, nfold, param, seed, weightscale=None, evals=[]):
         dtrain = dall.slice(trainlst)
         dtest = dall.slice(testlst)
         # rescale weight of dtrain and dtest
-        if weightscale != None:
-            dtrain.set_weight( dtrain.get_weight() * weightscale * dall.num_row() / dtrain.num_row() )
-            dtest.set_weight( dtest.get_weight() * weightscale * dall.num_row() / dtest.num_row() )
         plst = param.items() + [('eval_metric', itm) for itm in evals]
         ret.append(CVPack(dtrain, dtest, plst))
     return ret
@@ -487,7 +484,7 @@ def aggcv(rlist):
     return ret
 
 def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metrics = [], \
-        weightscale=None, obj=None, feval=None):
+        obj=None, feval=None):
     """ cross validation  with given paramaters
         Args:
             params: dict
@@ -503,9 +500,9 @@ def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metrics = [], \
             obj:
             feval:
     """
-    cvfolds = mknfold(dtrain, nfold, params, 0, weightscale, evals_metrics)
+    cvfolds = mknfold(dtrain, nfold, params, 0, eval_metrics)
     for i in range(num_boost_round):
         for f in cvfolds:
             f.update(i, obj)
-        res = aggcv([f.eval(i, fval) for f in cvfolds])
+        res = aggcv([f.eval(i, feval) for f in cvfolds])
         sys.stderr.write(res+'\n')

From 998ca3bdc951c00453fd955a055421a95a170cf9 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 3 Sep 2014 11:46:33 -0700
Subject: [PATCH 12/22] make some changes to cv

---
 wrapper/xgboost.py | 50 +++++++++++++++++++---------------------------
 1 file changed, 20 insertions(+), 30 deletions(-)

diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index 2eea30483..6dadaf613 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -433,31 +433,22 @@ class CVPack:
     def eval(self, r, feval):
         return self.bst.eval_set(self.watchlist, r, feval)
 
-def mknfold(dall, nfold, param, seed, evals=[]):
+def mknfold(dall, nfold, param, seed, evals=[], fpreproc = None):
     """
     mk nfold list of cvpack from randidx
     """
-    randidx = range(dall.num_row())
-    random.seed(seed)
-    random.shuffle(randidx)
-
-    idxset = []
+    np.random.seed(seed)    
+    randidx = np.random.permutation(dall.num_rows()) 
     kstep = len(randidx) / nfold
-    for i in range(nfold):
-        idxset.append(randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ])
-
+    idset = [randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ] for i in range(nfold)]
     ret = []
     for k in range(nfold):
-        trainlst = []
-        for j in range(nfold):
-            if j == k:
-                testlst = idxset[j]
-            else:
-                trainlst += idxset[j]
-        dtrain = dall.slice(trainlst)
-        dtest = dall.slice(testlst)
-        # rescale weight of dtrain and dtest
-        plst = param.items() + [('eval_metric', itm) for itm in evals]
+        dtrain = dall.slice(np.concatenate([idset[i] for i in range(nfold) if k != i]))
+        dtest = all.slice(idxset[k])
+        # run preprocessing on the data set if needed
+        if fpreproc is not None:
+            dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy())
+        plst = tparam.items() + [('eval_metric', itm) for itm in evals]
         ret.append(CVPack(dtrain, dtest, plst))
     return ret
 
@@ -466,25 +457,22 @@ def aggcv(rlist):
     aggregate cross validation results
     """
     cvmap = {}
-    arr = rlist[0].split()
-    ret = arr[0]
-    for it in arr[1:]:
-        k, v  = it.split(':')
-        cvmap[k] = [float(v)]
-    for line in rlist[1:]:
+    ret = rlist[0].split()[0]
+    for line in rlist:
         arr = line.split()
         assert ret == arr[0]
         for it in arr[1:]:
             k, v  = it.split(':')
+            if k not in cvmap:
+                cvmap[k] = []
             cvmap[k].append(float(v))
-
     for k, v in sorted(cvmap.items(), key = lambda x:x[0]):
         v = np.array(v)
         ret += '\t%s:%f+%f' % (k, np.mean(v), np.std(v))
     return ret
 
-def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metrics = [], \
-        obj=None, feval=None):
+def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metric = [], \
+        obj = None, feval = None, fpreproc = None):
     """ cross validation  with given paramaters
         Args:
             params: dict
@@ -495,12 +483,14 @@ def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metrics = [], \
                              num of round to be boosted
             nfold: int
                    folds to do cv
-            evals: list
+            evals: list or 
                    list of items to be evaluated
             obj:
             feval:
+            fpreproc: preprocessing function that takes dtrain, dtest, 
+                      param and return transformed version of dtrain, dtest, param
     """
-    cvfolds = mknfold(dtrain, nfold, params, 0, eval_metrics)
+    cvfolds = mknfold(dtrain, nfold, params, 0, eval_metrics, fpreproc)
     for i in range(num_boost_round):
         for f in cvfolds:
             f.update(i, obj)

From 0c36231ea3dd2b292d5bdec483427bf9210b5fcb Mon Sep 17 00:00:00 2001
From: antinucleon <antinucleon@gmail.com>
Date: Wed, 3 Sep 2014 12:57:05 -0600
Subject: [PATCH 13/22] chg

---
 wrapper/xgboost.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index 6dadaf613..e09c6da8c 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -437,18 +437,18 @@ def mknfold(dall, nfold, param, seed, evals=[], fpreproc = None):
     """
     mk nfold list of cvpack from randidx
     """
-    np.random.seed(seed)    
-    randidx = np.random.permutation(dall.num_rows()) 
+    np.random.seed(seed)
+    randidx = np.random.permutation(dall.num_row())
     kstep = len(randidx) / nfold
     idset = [randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ] for i in range(nfold)]
     ret = []
     for k in range(nfold):
         dtrain = dall.slice(np.concatenate([idset[i] for i in range(nfold) if k != i]))
-        dtest = all.slice(idxset[k])
+        dtest = dall.slice(idset[k])
         # run preprocessing on the data set if needed
         if fpreproc is not None:
             dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy())
-        plst = tparam.items() + [('eval_metric', itm) for itm in evals]
+        plst = param.items() + [('eval_metric', itm) for itm in evals]
         ret.append(CVPack(dtrain, dtest, plst))
     return ret
 
@@ -483,14 +483,14 @@ def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metric = [], \
                              num of round to be boosted
             nfold: int
                    folds to do cv
-            evals: list or 
+            evals: list or
                    list of items to be evaluated
             obj:
             feval:
-            fpreproc: preprocessing function that takes dtrain, dtest, 
+            fpreproc: preprocessing function that takes dtrain, dtest,
                       param and return transformed version of dtrain, dtest, param
     """
-    cvfolds = mknfold(dtrain, nfold, params, 0, eval_metrics, fpreproc)
+    cvfolds = mknfold(dtrain, nfold, params, 0, eval_metric, fpreproc)
     for i in range(num_boost_round):
         for f in cvfolds:
             f.update(i, obj)

From 3192bf82d861dd55cef8a2c0e8cd71e104cf90e3 Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Wed, 3 Sep 2014 12:15:57 -0700
Subject: [PATCH 14/22] Update xgboost.py

---
 wrapper/xgboost.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index e09c6da8c..5b4eee6b8 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -448,7 +448,7 @@ def mknfold(dall, nfold, param, seed, evals=[], fpreproc = None):
         # run preprocessing on the data set if needed
         if fpreproc is not None:
             dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy())
-        plst = param.items() + [('eval_metric', itm) for itm in evals]
+        plst = tparam.items() + [('eval_metric', itm) for itm in evals]
         ret.append(CVPack(dtrain, dtest, plst))
     return ret
 

From fa11840f4b3691f56b9fe72b947393b6a06a64ad Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 3 Sep 2014 13:13:54 -0700
Subject: [PATCH 15/22] move python example

---
 demo/data/README.md                           |   2 +
 .../data}/agaricus.txt.test                   |   0
 .../data}/agaricus.txt.train                  |   0
 .../python-example => demo/data}/featmap.txt  |   0
 demo/guide-python/REAMDE.md                   |   6 +
 demo/guide-python/basic_walkthrough.py        |  70 ++++++++++
 demo/guide-python/boost_from_prediction.py    |  26 ++++
 demo/guide-python/custom_objective.py         |  44 +++++++
 demo/guide-python/predict_first_ntree.py      |  22 ++++
 demo/guide-python/runall.sh                   |   6 +
 wrapper/README.md                             |   3 +-
 wrapper/python-example/README.md              |   3 -
 wrapper/python-example/demo.py                | 121 ------------------
 13 files changed, 177 insertions(+), 126 deletions(-)
 create mode 100644 demo/data/README.md
 rename {wrapper/python-example => demo/data}/agaricus.txt.test (100%)
 rename {wrapper/python-example => demo/data}/agaricus.txt.train (100%)
 rename {wrapper/python-example => demo/data}/featmap.txt (100%)
 create mode 100644 demo/guide-python/REAMDE.md
 create mode 100755 demo/guide-python/basic_walkthrough.py
 create mode 100755 demo/guide-python/boost_from_prediction.py
 create mode 100755 demo/guide-python/custom_objective.py
 create mode 100755 demo/guide-python/predict_first_ntree.py
 create mode 100755 demo/guide-python/runall.sh
 delete mode 100644 wrapper/python-example/README.md
 delete mode 100755 wrapper/python-example/demo.py

diff --git a/demo/data/README.md b/demo/data/README.md
new file mode 100644
index 000000000..d2d63ec11
--- /dev/null
+++ b/demo/data/README.md
@@ -0,0 +1,2 @@
+This folder contains processed example dataset used by the demos.
+Copyright of the dataset belongs to the original copyright holder
diff --git a/wrapper/python-example/agaricus.txt.test b/demo/data/agaricus.txt.test
similarity index 100%
rename from wrapper/python-example/agaricus.txt.test
rename to demo/data/agaricus.txt.test
diff --git a/wrapper/python-example/agaricus.txt.train b/demo/data/agaricus.txt.train
similarity index 100%
rename from wrapper/python-example/agaricus.txt.train
rename to demo/data/agaricus.txt.train
diff --git a/wrapper/python-example/featmap.txt b/demo/data/featmap.txt
similarity index 100%
rename from wrapper/python-example/featmap.txt
rename to demo/data/featmap.txt
diff --git a/demo/guide-python/REAMDE.md b/demo/guide-python/REAMDE.md
new file mode 100644
index 000000000..7eaec6155
--- /dev/null
+++ b/demo/guide-python/REAMDE.md
@@ -0,0 +1,6 @@
+XGBoost Python Feature Walkthrough
+====
+* [Basic walkthrough of wrappers](guide-python/basic.py) 
+* [Cutomize loss function, and evaluation metric](guide-python/custom_objective.py)
+* [Boosting from existing prediction](guide-python/boost_from_prediction.py)
+* [Predicting using first n trees](guide-python/predict_first_ntree.py)
diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py
new file mode 100755
index 000000000..f542954ce
--- /dev/null
+++ b/demo/guide-python/basic_walkthrough.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+import sys
+import numpy as np
+import scipy.sparse
+# append the path to xgboost, you may need to change the following line
+# alternatively, you can add the path to PYTHONPATH environment variable
+sys.path.append('../../wrapper')
+import xgboost as xgb
+
+### simple example
+# load file from text file, also binary buffer generated by xgboost
+dtrain = xgb.DMatrix('../data/agaricus.txt.train')
+dtest = xgb.DMatrix('../data/agaricus.txt.test')
+
+# specify parameters via map, definition are same as c++ version
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
+
+# specify validations set to watch performance
+watchlist  = [(dtest,'eval'), (dtrain,'train')]
+num_round = 2
+bst = xgb.train(param, dtrain, num_round, watchlist)
+
+# this is prediction
+preds = bst.predict(dtest)
+labels = dtest.get_label()
+print ('error=%f' % (  sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
+bst.save_model('0001.model')
+# dump model
+bst.dump_model('dump.raw.txt')
+# dump model with feature map
+bst.dump_model('dump.nice.txt','../data/featmap.txt')
+
+# save dmatrix into binary buffer
+dtest.save_binary('dtest.buffer')
+bst.save_model('xgb.model')
+# load model and data in 
+bst2 = xgb.Booster(model_file='xgb.model')
+dtest2 = xgb.DMatrix('dtest.buffer')
+preds2 = bst2.predict(dtest2)
+# assert they are the same
+assert np.sum(np.abs(preds2-preds)) == 0
+
+###
+# build dmatrix from scipy.sparse
+print ('start running example of build DMatrix from scipy.sparse')
+labels = []
+row = []; col = []; dat = []
+i = 0
+for l in open('../data/agaricus.txt.train'):
+    arr = l.split()
+    labels.append( int(arr[0]))
+    for it in arr[1:]:
+        k,v = it.split(':')
+        row.append(i); col.append(int(k)); dat.append(float(v))
+    i += 1
+csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
+dtrain = xgb.DMatrix( csr )
+dtrain.set_label(labels)
+watchlist  = [(dtest,'eval'), (dtrain,'train')]
+bst = xgb.train( param, dtrain, num_round, watchlist )
+
+print ('start running example of build DMatrix from numpy array')
+# NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
+npymat = csr.todense()
+dtrain = xgb.DMatrix( npymat)
+dtrain.set_label(labels)
+watchlist  = [(dtest,'eval'), (dtrain,'train')]
+bst = xgb.train( param, dtrain, num_round, watchlist )
+
+
diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py
new file mode 100755
index 000000000..0aa2e56ab
--- /dev/null
+++ b/demo/guide-python/boost_from_prediction.py
@@ -0,0 +1,26 @@
+#!/usr/bin/python
+import sys
+import numpy as np
+sys.path.append('../../wrapper')
+import xgboost as xgb
+
+dtrain = xgb.DMatrix('../data/agaricus.txt.train')
+dtest = xgb.DMatrix('../data/agaricus.txt.test')
+watchlist  = [(dtest,'eval'), (dtrain,'train')]
+###
+# advanced: start from a initial base prediction
+#
+print ('start running example to start from a initial prediction')
+# specify parameters via map, definition are same as c++ version
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
+# train xgboost for 1 round
+bst = xgb.train( param, dtrain, 1, watchlist )
+# Note: we need the margin value instead of transformed prediction in set_base_margin
+# do predict with output_margin=True, will always give you margin values before logistic transformation
+ptrain = bst.predict(dtrain, output_margin=True)
+ptest  = bst.predict(dtest, output_margin=True)
+dtrain.set_base_margin(ptrain)
+dtest.set_base_margin(ptest)
+
+print ('this is result of running from initial prediction')
+bst = xgb.train( param, dtrain, 1, watchlist )
diff --git a/demo/guide-python/custom_objective.py b/demo/guide-python/custom_objective.py
new file mode 100755
index 000000000..5a7f110f4
--- /dev/null
+++ b/demo/guide-python/custom_objective.py
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+import sys
+import numpy as np
+sys.path.append('../../wrapper')
+import xgboost as xgb
+###
+# advanced: cutomsized loss function
+# 
+print ('start running example to used cutomized objective function')
+
+dtrain = xgb.DMatrix('../data/agaricus.txt.train')
+dtest = xgb.DMatrix('../data/agaricus.txt.test')
+
+# note: for customized objective function, we leave objective as default
+# note: what we are getting is margin value in prediction
+# you must know what you are doing
+param = {'max_depth':2, 'eta':1, 'silent':1 }
+watchlist  = [(dtest,'eval'), (dtrain,'train')]
+num_round = 2
+
+# user define objective function, given prediction, return gradient and second order gradient
+# this is loglikelihood loss
+def logregobj(preds, dtrain):
+    labels = dtrain.get_label()
+    preds = 1.0 / (1.0 + np.exp(-preds))
+    grad = preds - labels
+    hess = preds * (1.0-preds)
+    return grad, hess
+
+# user defined evaluation function, return a pair metric_name, result
+# NOTE: when you do customized loss function, the default prediction value is margin
+# this may make buildin evalution metric not function properly
+# for example, we are doing logistic loss, the prediction is score before logistic transformation
+# the buildin evaluation error assumes input is after logistic transformation
+# Take this in mind when you use the customization, and maybe you need write customized evaluation function
+def evalerror(preds, dtrain):
+    labels = dtrain.get_label()
+    # return a pair metric_name, result
+    # since preds are margin(before logistic transformation, cutoff at 0)
+    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
+
+# training with customized objective, we can also do step by step training
+# simply look at xgboost.py's implementation of train
+bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py
new file mode 100755
index 000000000..03f327e7f
--- /dev/null
+++ b/demo/guide-python/predict_first_ntree.py
@@ -0,0 +1,22 @@
+#!/usr/bin/python
+import sys
+import numpy as np
+sys.path.append('../../wrapper')
+import xgboost as xgb
+
+### load data in do training
+dtrain = xgb.DMatrix('../data/agaricus.txt.train')
+dtest = xgb.DMatrix('../data/agaricus.txt.test')
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
+watchlist  = [(dtest,'eval'), (dtrain,'train')]
+num_round = 3
+bst = xgb.train(param, dtrain, num_round, watchlist)
+
+print ('start testing prediction from first n trees')
+### predict using first 1 tree
+label = dtest.get_label()
+ypred1 = bst.predict(dtest, ntree_limit=1)
+# by default, we predict using all the trees
+ypred2 = bst.predict(dtest)
+print ('error of ypred1=%f' % (np.sum((ypred1>0.5)!=label) /float(len(label))))
+print ('error of ypred2=%f' % (np.sum((ypred2>0.5)!=label) /float(len(label))))
diff --git a/demo/guide-python/runall.sh b/demo/guide-python/runall.sh
new file mode 100755
index 000000000..6b37c68ca
--- /dev/null
+++ b/demo/guide-python/runall.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+python basic_walkthrough.py
+python custom_objective.py
+python boost_from_prediction.py
+python boost_from_prediction.py
+rm *~ *.model *.buffer 
\ No newline at end of file
diff --git a/wrapper/README.md b/wrapper/README.md
index 3f43fa629..e736b9b6a 100644
--- a/wrapper/README.md
+++ b/wrapper/README.md
@@ -2,11 +2,10 @@ Wrapper of XGBoost
 =====
 This folder provides wrapper of xgboost to other languages
 
-
 Python
 =====
 * To make the python module, type ```make``` in the root directory of project
-* Refer to the walk through example in [python-example/demo.py](python-example/demo.py)
+* Refer also to the walk through example in [demo folder](../demo/guide-python)
 
 R 
 =====
diff --git a/wrapper/python-example/README.md b/wrapper/python-example/README.md
deleted file mode 100644
index be5350dd2..000000000
--- a/wrapper/python-example/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format
-
-for usage: see demo.py and comments in demo.py
diff --git a/wrapper/python-example/demo.py b/wrapper/python-example/demo.py
deleted file mode 100755
index 687b491a4..000000000
--- a/wrapper/python-example/demo.py
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/python
-import sys
-import numpy as np
-import scipy.sparse
-# append the path to xgboost, you may need to change the following line
-# alternatively, you can add the path to PYTHONPATH environment variable
-sys.path.append('../')
-import xgboost as xgb
-
-### simple example
-# load file from text file, also binary buffer generated by xgboost
-dtrain = xgb.DMatrix('agaricus.txt.train')
-dtest = xgb.DMatrix('agaricus.txt.test')
-
-# specify parameters via map, definition are same as c++ version
-param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
-
-# specify validations set to watch performance
-evallist  = [(dtest,'eval'), (dtrain,'train')]
-num_round = 2
-bst = xgb.train(param, dtrain, num_round, evallist)
-
-# this is prediction
-preds = bst.predict(dtest)
-labels = dtest.get_label()
-print ('error=%f' % (  sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
-bst.save_model('0001.model')
-# dump model
-bst.dump_model('dump.raw.txt')
-# dump model with feature map
-bst.dump_model('dump.nice.txt','featmap.txt')
-
-# save dmatrix into binary buffer
-dtest.save_binary('dtest.buffer')
-bst.save_model('xgb.model')
-# load model and data in 
-bst2 = xgb.Booster(model_file='xgb.model')
-dtest2 = xgb.DMatrix('dtest.buffer')
-preds2 = bst2.predict(dtest2)
-# assert they are the same
-assert np.sum(np.abs(preds2-preds)) == 0
-
-###
-# build dmatrix from scipy.sparse
-print ('start running example of build DMatrix from scipy.sparse')
-labels = []
-row = []; col = []; dat = []
-i = 0
-for l in open('agaricus.txt.train'):
-    arr = l.split()
-    labels.append( int(arr[0]))
-    for it in arr[1:]:
-        k,v = it.split(':')
-        row.append(i); col.append(int(k)); dat.append(float(v))
-    i += 1
-csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
-dtrain = xgb.DMatrix( csr )
-dtrain.set_label(labels)
-evallist  = [(dtest,'eval'), (dtrain,'train')]
-bst = xgb.train( param, dtrain, num_round, evallist )
-
-print ('start running example of build DMatrix from numpy array')
-# NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
-npymat = csr.todense()
-dtrain = xgb.DMatrix( npymat)
-dtrain.set_label(labels)
-evallist  = [(dtest,'eval'), (dtrain,'train')]
-bst = xgb.train( param, dtrain, num_round, evallist )
-
-###
-# advanced: cutomsized loss function
-# 
-print ('start running example to used cutomized objective function')
-
-# note: for customized objective function, we leave objective as default
-# note: what we are getting is margin value in prediction
-# you must know what you are doing
-param = {'max_depth':2, 'eta':1, 'silent':1 }
-
-# user define objective function, given prediction, return gradient and second order gradient
-# this is loglikelihood loss
-def logregobj(preds, dtrain):
-    labels = dtrain.get_label()
-    preds = 1.0 / (1.0 + np.exp(-preds))
-    grad = preds - labels
-    hess = preds * (1.0-preds)
-    return grad, hess
-
-# user defined evaluation function, return a pair metric_name, result
-# NOTE: when you do customized loss function, the default prediction value is margin
-# this may make buildin evalution metric not function properly
-# for example, we are doing logistic loss, the prediction is score before logistic transformation
-# the buildin evaluation error assumes input is after logistic transformation
-# Take this in mind when you use the customization, and maybe you need write customized evaluation function
-def evalerror(preds, dtrain):
-    labels = dtrain.get_label()
-    # return a pair metric_name, result
-    # since preds are margin(before logistic transformation, cutoff at 0)
-    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
-
-# training with customized objective, we can also do step by step training
-# simply look at xgboost.py's implementation of train
-bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
-
-###
-# advanced: start from a initial base prediction
-#
-print ('start running example to start from a initial prediction')
-# specify parameters via map, definition are same as c++ version
-param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
-# train xgboost for 1 round
-bst = xgb.train( param, dtrain, 1, evallist )
-# Note: we need the margin value instead of transformed prediction in set_base_margin
-# do predict with output_margin=True, will always give you margin values before logistic transformation
-ptrain = bst.predict(dtrain, output_margin=True)
-ptest  = bst.predict(dtest, output_margin=True)
-dtrain.set_base_margin(ptrain)
-dtest.set_base_margin(ptest)
-
-print ('this is result of running from initial prediction')
-bst = xgb.train( param, dtrain, 1, evallist )

From c1e0ff0326c46a13b52a2d9e40f5439d9f8b7328 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 3 Sep 2014 13:15:17 -0700
Subject: [PATCH 16/22] push python examples in

---
 demo/READMDE.md                             | 25 +++++++++++++++++++++
 demo/guide-python/{REAMDE.md => READMDE.md} |  0
 2 files changed, 25 insertions(+)
 create mode 100644 demo/READMDE.md
 rename demo/guide-python/{REAMDE.md => READMDE.md} (100%)

diff --git a/demo/READMDE.md b/demo/READMDE.md
new file mode 100644
index 000000000..916a5cea1
--- /dev/null
+++ b/demo/READMDE.md
@@ -0,0 +1,25 @@
+XGBoost Examples
+====
+This folder contains the all example codes using xgboost. 
+Contribution of exampls, benchmarks is more than welcomed!
+If you like to share how you use xgboost to solve your problem, send a pull request:)
+
+Start Examples by Tasks
+====
+* [Binary classification](binary_classification)
+* [Multiclass classification](multiclass_classification)
+* [Regression](regression)
+* [Learning to Rank](rank)
+
+Features Walkthrough
+====
+This is a list of short codes introducing different functionalities of xgboost and its wrapper.
+* Basic walkthrough of wrappers. [python](guide-python/basic.py) [R](guide-R/basic.R) 
+* Cutomize loss function, and evaluation metric. [python](guide-python/custom_objective.py) [R](guide-R/custom_objective.R)
+* Boosting from existing prediction. [python](guide-python/boost_from_prediction.py) [R](guide-R/boost_from_prediction.R) 
+* Predicting using first n trees. [python](guide-python/predict_first_ntree.py) [R](guide-R/predict_first_ntree.R)
+* Cross validation(to come)
+
+Benchmarks
+====
+* [Starter script for Kaggle Higgs Boson](kaggle-higgs)
diff --git a/demo/guide-python/REAMDE.md b/demo/guide-python/READMDE.md
similarity index 100%
rename from demo/guide-python/REAMDE.md
rename to demo/guide-python/READMDE.md

From 7a61f0dca2949dceb0a1de7fd1e81ec319d926d7 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 3 Sep 2014 13:18:36 -0700
Subject: [PATCH 17/22] ok

---
 demo/READMDE.md              | 2 +-
 demo/guide-python/READMDE.md | 8 ++++----
 demo/guide-python/runall.sh  | 1 -
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/demo/READMDE.md b/demo/READMDE.md
index 916a5cea1..433324947 100644
--- a/demo/READMDE.md
+++ b/demo/READMDE.md
@@ -14,7 +14,7 @@ Start Examples by Tasks
 Features Walkthrough
 ====
 This is a list of short codes introducing different functionalities of xgboost and its wrapper.
-* Basic walkthrough of wrappers. [python](guide-python/basic.py) [R](guide-R/basic.R) 
+* Basic walkthrough of wrappers. [python](guide-python/basic_walkthrough.py) [R](guide-R/basic_walkthrough.R) 
 * Cutomize loss function, and evaluation metric. [python](guide-python/custom_objective.py) [R](guide-R/custom_objective.R)
 * Boosting from existing prediction. [python](guide-python/boost_from_prediction.py) [R](guide-R/boost_from_prediction.R) 
 * Predicting using first n trees. [python](guide-python/predict_first_ntree.py) [R](guide-R/predict_first_ntree.R)
diff --git a/demo/guide-python/READMDE.md b/demo/guide-python/READMDE.md
index 7eaec6155..b2cad6b54 100644
--- a/demo/guide-python/READMDE.md
+++ b/demo/guide-python/READMDE.md
@@ -1,6 +1,6 @@
 XGBoost Python Feature Walkthrough
 ====
-* [Basic walkthrough of wrappers](guide-python/basic.py) 
-* [Cutomize loss function, and evaluation metric](guide-python/custom_objective.py)
-* [Boosting from existing prediction](guide-python/boost_from_prediction.py)
-* [Predicting using first n trees](guide-python/predict_first_ntree.py)
+* [Basic walkthrough of wrappers](basic_walkthrough.py) 
+* [Cutomize loss function, and evaluation metric](custom_objective.py)
+* [Boosting from existing prediction](boost_from_prediction.py)
+* [Predicting using first n trees](predict_first_ntree.py)
diff --git a/demo/guide-python/runall.sh b/demo/guide-python/runall.sh
index 6b37c68ca..4386cf262 100755
--- a/demo/guide-python/runall.sh
+++ b/demo/guide-python/runall.sh
@@ -2,5 +2,4 @@
 python basic_walkthrough.py
 python custom_objective.py
 python boost_from_prediction.py
-python boost_from_prediction.py
 rm *~ *.model *.buffer 
\ No newline at end of file

From 60e1167b56d3dfe47a79fc7a9d4fe16d813942fd Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 3 Sep 2014 13:20:23 -0700
Subject: [PATCH 18/22] fix doc

---
 demo/{READMDE.md => README.md}              | 0
 demo/guide-R/README.md                      | 3 +++
 demo/guide-python/{READMDE.md => README.md} | 0
 3 files changed, 3 insertions(+)
 rename demo/{READMDE.md => README.md} (100%)
 create mode 100644 demo/guide-R/README.md
 rename demo/guide-python/{READMDE.md => README.md} (100%)

diff --git a/demo/READMDE.md b/demo/README.md
similarity index 100%
rename from demo/READMDE.md
rename to demo/README.md
diff --git a/demo/guide-R/README.md b/demo/guide-R/README.md
new file mode 100644
index 000000000..0c87198bc
--- /dev/null
+++ b/demo/guide-R/README.md
@@ -0,0 +1,3 @@
+XGBoost R Feature Walkthrough
+====
+To be finished
diff --git a/demo/guide-python/READMDE.md b/demo/guide-python/README.md
similarity index 100%
rename from demo/guide-python/READMDE.md
rename to demo/guide-python/README.md

From e6359b54845f11f00f4c6c4e9d41f01ef985a775 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 3 Sep 2014 13:23:36 -0700
Subject: [PATCH 19/22] ok

---
 demo/guide-R/runall.sh | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100755 demo/guide-R/runall.sh

diff --git a/demo/guide-R/runall.sh b/demo/guide-R/runall.sh
new file mode 100755
index 000000000..2d6cabcb2
--- /dev/null
+++ b/demo/guide-R/runall.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# todo 
+Rscript basic_walkthrough.R
+Rscript custom_objective.R
+Rscript boost_from_prediction.R

From 5cd92e33f6730206fa8918537081ef02b4bc2228 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 3 Sep 2014 13:24:34 -0700
Subject: [PATCH 20/22] remove R for now

---
 demo/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/demo/README.md b/demo/README.md
index 433324947..f5606aff1 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -14,10 +14,10 @@ Start Examples by Tasks
 Features Walkthrough
 ====
 This is a list of short codes introducing different functionalities of xgboost and its wrapper.
-* Basic walkthrough of wrappers. [python](guide-python/basic_walkthrough.py) [R](guide-R/basic_walkthrough.R) 
-* Cutomize loss function, and evaluation metric. [python](guide-python/custom_objective.py) [R](guide-R/custom_objective.R)
-* Boosting from existing prediction. [python](guide-python/boost_from_prediction.py) [R](guide-R/boost_from_prediction.R) 
-* Predicting using first n trees. [python](guide-python/predict_first_ntree.py) [R](guide-R/predict_first_ntree.R)
+* Basic walkthrough of wrappers. [python](guide-python/basic_walkthrough.py)
+* Cutomize loss function, and evaluation metric. [python](guide-python/custom_objective.py)
+* Boosting from existing prediction. [python](guide-python/boost_from_prediction.py)
+* Predicting using first n trees. [python](guide-python/predict_first_ntree.py)
 * Cross validation(to come)
 
 Benchmarks

From b2586b6130b3e6b90601b84aa32ba9fe19f53356 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 3 Sep 2014 13:27:06 -0700
Subject: [PATCH 21/22] ok

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 38291b09d..ea741072d 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,8 @@ Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
 
 Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.com/tqchen/xgboost/issues?q=is%3Aissue+label%3Aquestion)
 
+Examples Code: [demo folder](demo)
+
 Notes on the Code: [Code Guide](src)
 
 Features

From 8952d9c3576307580a79d0017e53c5cefb57f267 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 3 Sep 2014 13:28:03 -0700
Subject: [PATCH 22/22] fix

---
 demo/README.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/demo/README.md b/demo/README.md
index f5606aff1..c9145d29c 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -4,13 +4,6 @@ This folder contains the all example codes using xgboost.
 Contribution of exampls, benchmarks is more than welcomed!
 If you like to share how you use xgboost to solve your problem, send a pull request:)
 
-Start Examples by Tasks
-====
-* [Binary classification](binary_classification)
-* [Multiclass classification](multiclass_classification)
-* [Regression](regression)
-* [Learning to Rank](rank)
-
 Features Walkthrough
 ====
 This is a list of short codes introducing different functionalities of xgboost and its wrapper.
@@ -20,6 +13,13 @@ This is a list of short codes introducing different functionalities of xgboost a
 * Predicting using first n trees. [python](guide-python/predict_first_ntree.py)
 * Cross validation(to come)
 
+Basic Examples by Tasks
+====
+* [Binary classification](binary_classification)
+* [Multiclass classification](multiclass_classification)
+* [Regression](regression)
+* [Learning to Rank](rank)
+
 Benchmarks
 ====
 * [Starter script for Kaggle Higgs Boson](kaggle-higgs)