chg

2014-08-23 14:20:29 -07:00 · 2014-08-23 14:20:29 -07:00 · 08a6b92216
commit 08a6b92216
parent 3ba7995754
16 changed files with 17669 additions and 0 deletions
--- a/wrapper/R-example/agaricus.txt.test
+++ b/wrapper/R-example/agaricus.txt.test
--- a/wrapper/R-example/agaricus.txt.train
+++ b/wrapper/R-example/agaricus.txt.train
--- a/wrapper/R-example/demo.R
+++ b/wrapper/R-example/demo.R
@ -0,0 +1,14 @@
 # include xgboost library, must set chdir=TRURE
 source('../xgboost.R', chdir=TRUE)
 # test code here
 dtrain <- xgb.DMatrix("agaricus.txt.train")
 dtest <- xgb.DMatrix("agaricus.txt.test")
 param = list('bst:max_depth'=2, 'bst:eta'=1, 'silent'=1, 'objective'='binary:logistic')
 watchlist <- list('train'=dtrain,'test'=dtest)
 bst <- xgb.train(param, dtrain, watchlist=watchlist, nround=3)
 succ <- xgb.save(bst, "iter.model")
 print('finsih save model')
 bst2 <- xgb.Booster(modelfile="iter.model")
 pred = xgb.predict(bst2, dtest)
--- a/wrapper/R-example/featmap.txt
+++ b/wrapper/R-example/featmap.txt
@ -0,0 +1,126 @@
 0	cap-shape=bell	i
 1	cap-shape=conical	i
 2	cap-shape=convex	i
 3	cap-shape=flat	i
 4	cap-shape=knobbed	i
 5	cap-shape=sunken	i
 6	cap-surface=fibrous	i
 7	cap-surface=grooves	i
 8	cap-surface=scaly	i
 9	cap-surface=smooth	i
 10	cap-color=brown	i
 11	cap-color=buff	i
 12	cap-color=cinnamon	i
 13	cap-color=gray	i
 14	cap-color=green	i
 15	cap-color=pink	i
 16	cap-color=purple	i
 17	cap-color=red	i
 18	cap-color=white	i
 19	cap-color=yellow	i
 20	bruises?=bruises	i
 21	bruises?=no	i
 22	odor=almond	i
 23	odor=anise	i
 24	odor=creosote	i
 25	odor=fishy	i
 26	odor=foul	i
 27	odor=musty	i
 28	odor=none	i
 29	odor=pungent	i
 30	odor=spicy	i
 31	gill-attachment=attached	i
 32	gill-attachment=descending	i
 33	gill-attachment=free	i
 34	gill-attachment=notched	i
 35	gill-spacing=close	i
 36	gill-spacing=crowded	i
 37	gill-spacing=distant	i
 38	gill-size=broad	i
 39	gill-size=narrow	i
 40	gill-color=black	i
 41	gill-color=brown	i
 42	gill-color=buff	i
 43	gill-color=chocolate	i
 44	gill-color=gray	i
 45	gill-color=green	i
 46	gill-color=orange	i
 47	gill-color=pink	i
 48	gill-color=purple	i
 49	gill-color=red	i
 50	gill-color=white	i
 51	gill-color=yellow	i
 52	stalk-shape=enlarging	i
 53	stalk-shape=tapering	i
 54	stalk-root=bulbous	i
 55	stalk-root=club	i
 56	stalk-root=cup	i
 57	stalk-root=equal	i
 58	stalk-root=rhizomorphs	i
 59	stalk-root=rooted	i
 60	stalk-root=missing	i
 61	stalk-surface-above-ring=fibrous	i
 62	stalk-surface-above-ring=scaly	i
 63	stalk-surface-above-ring=silky	i
 64	stalk-surface-above-ring=smooth	i
 65	stalk-surface-below-ring=fibrous	i
 66	stalk-surface-below-ring=scaly	i
 67	stalk-surface-below-ring=silky	i
 68	stalk-surface-below-ring=smooth	i
 69	stalk-color-above-ring=brown	i
 70	stalk-color-above-ring=buff	i
 71	stalk-color-above-ring=cinnamon	i
 72	stalk-color-above-ring=gray	i
 73	stalk-color-above-ring=orange	i
 74	stalk-color-above-ring=pink	i
 75	stalk-color-above-ring=red	i
 76	stalk-color-above-ring=white	i
 77	stalk-color-above-ring=yellow	i
 78	stalk-color-below-ring=brown	i
 79	stalk-color-below-ring=buff	i
 80	stalk-color-below-ring=cinnamon	i
 81	stalk-color-below-ring=gray	i
 82	stalk-color-below-ring=orange	i
 83	stalk-color-below-ring=pink	i
 84	stalk-color-below-ring=red	i
 85	stalk-color-below-ring=white	i
 86	stalk-color-below-ring=yellow	i
 87	veil-type=partial	i
 88	veil-type=universal	i
 89	veil-color=brown	i
 90	veil-color=orange	i
 91	veil-color=white	i
 92	veil-color=yellow	i
 93	ring-number=none	i
 94	ring-number=one	i
 95	ring-number=two	i
 96	ring-type=cobwebby	i
 97	ring-type=evanescent	i
 98	ring-type=flaring	i
 99	ring-type=large	i
 100	ring-type=none	i
 101	ring-type=pendant	i
 102	ring-type=sheathing	i
 103	ring-type=zone	i
 104	spore-print-color=black	i
 105	spore-print-color=brown	i
 106	spore-print-color=buff	i
 107	spore-print-color=chocolate	i
 108	spore-print-color=green	i
 109	spore-print-color=orange	i
 110	spore-print-color=purple	i
 111	spore-print-color=white	i
 112	spore-print-color=yellow	i
 113	population=abundant	i
 114	population=clustered	i
 115	population=numerous	i
 116	population=scattered	i
 117	population=several	i
 118	population=solitary	i
 119	habitat=grasses	i
 120	habitat=leaves	i
 121	habitat=meadows	i
 122	habitat=paths	i
 123	habitat=urban	i
 124	habitat=waste	i
 125	habitat=woods	i
--- a/wrapper/README.md
+++ b/wrapper/README.md
@ -0,0 +1,12 @@
 Wrapper of XGBoost
 =====
 This folder provides wrapper of xgboost to other languages
 Python
 =====
 To make the python module, type ```make``` in the root directory of project
 R 
 =====
 To make the R wrapper, type ```make R``` in the root directory of project
--- a/wrapper/python-example/README.md
+++ b/wrapper/python-example/README.md
@ -0,0 +1,3 @@
 example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format
 for usage: see demo.py and comments in demo.py
--- a/wrapper/python-example/agaricus.txt.test
+++ b/wrapper/python-example/agaricus.txt.test
--- a/wrapper/python-example/agaricus.txt.train
+++ b/wrapper/python-example/agaricus.txt.train
--- a/wrapper/python-example/demo.py
+++ b/wrapper/python-example/demo.py
@ -0,0 +1,112 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 import scipy.sparse
 # append the path to xgboost, you may need to change the following line
 # alternatively, you can add the path to PYTHONPATH environment variable
 sys.path.append('../')
 import xgboost as xgb
 ### simple example
 # load file from text file, also binary buffer generated by xgboost
 dtrain = xgb.DMatrix('agaricus.txt.train')
 dtest = xgb.DMatrix('agaricus.txt.test')
 # specify parameters via map, definition are same as c++ version
 param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
 # specify validations set to watch performance
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 num_round = 2
 bst = xgb.train(param, dtrain, num_round, evallist)
 # this is prediction
 preds = bst.predict(dtest)
 labels = dtest.get_label()
 print ('error=%f' % (  sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
 bst.save_model('0001.model')
 # dump model
 bst.dump_model('dump.raw.txt')
 # dump model with feature map
 bst.dump_model('dump.nice.txt','featmap.txt')
 ###
 # build dmatrix from scipy.sparse
 print ('start running example of build DMatrix from scipy.sparse')
 labels = []
 row = []; col = []; dat = []
 i = 0
 for l in open('agaricus.txt.train'):
    arr = l.split()
    labels.append( int(arr[0]))
    for it in arr[1:]:
        k,v = it.split(':')
        row.append(i); col.append(int(k)); dat.append(float(v))
    i += 1
 csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
 dtrain = xgb.DMatrix( csr )
 dtrain.set_label(labels)
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
 print ('start running example of build DMatrix from numpy array')
 # NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
 npymat = csr.todense()
 dtrain = xgb.DMatrix( npymat)
 dtrain.set_label(labels)
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
 ###
 # advanced: cutomsized loss function, set loss_type to 0, so that predict get untransformed score
 # 
 print ('start running example to used cutomized objective function')
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
 param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
 # user define objective function, given prediction, return gradient and second order gradient
 # this is loglikelihood loss
 def logregobj(preds, dtrain):
    labels = dtrain.get_label()
    preds = 1.0 / (1.0 + np.exp(-preds))
    grad = preds - labels
    hess = preds * (1.0-preds)
    return grad, hess
 # user defined evaluation function, return a pair metric_name, result
 # NOTE: when you do customized loss function, the default prediction value is margin
 # this may make buildin evalution metric not function properly
 # for example, we are doing logistic loss, the prediction is score before logistic transformation
 # the buildin evaluation error assumes input is after logistic transformation
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
 def evalerror(preds, dtrain):
    labels = dtrain.get_label()
    # return a pair metric_name, result
    # since preds are margin(before logistic transformation, cutoff at 0)
    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
 bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
 ###
 # advanced: start from a initial base prediction
 #
 print ('start running example to start from a initial prediction')
 # specify parameters via map, definition are same as c++ version
 param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
 # train xgboost for 1 round
 bst = xgb.train( param, dtrain, 1, evallist )
 # Note: we need the margin value instead of transformed prediction in set_base_margin
 # do predict with output_margin=True, will always give you margin values before logistic transformation
 ptrain = bst.predict(dtrain, output_margin=True)
 ptest  = bst.predict(dtest, output_margin=True)
 dtrain.set_base_margin(ptrain)
 dtest.set_base_margin(ptest)
 print ('this is result of running from initial prediction')
 bst = xgb.train( param, dtrain, 1, evallist )
--- a/wrapper/python-example/featmap.txt
+++ b/wrapper/python-example/featmap.txt
@ -0,0 +1,126 @@
 0	cap-shape=bell	i
 1	cap-shape=conical	i
 2	cap-shape=convex	i
 3	cap-shape=flat	i
 4	cap-shape=knobbed	i
 5	cap-shape=sunken	i
 6	cap-surface=fibrous	i
 7	cap-surface=grooves	i
 8	cap-surface=scaly	i
 9	cap-surface=smooth	i
 10	cap-color=brown	i
 11	cap-color=buff	i
 12	cap-color=cinnamon	i
 13	cap-color=gray	i
 14	cap-color=green	i
 15	cap-color=pink	i
 16	cap-color=purple	i
 17	cap-color=red	i
 18	cap-color=white	i
 19	cap-color=yellow	i
 20	bruises?=bruises	i
 21	bruises?=no	i
 22	odor=almond	i
 23	odor=anise	i
 24	odor=creosote	i
 25	odor=fishy	i
 26	odor=foul	i
 27	odor=musty	i
 28	odor=none	i
 29	odor=pungent	i
 30	odor=spicy	i
 31	gill-attachment=attached	i
 32	gill-attachment=descending	i
 33	gill-attachment=free	i
 34	gill-attachment=notched	i
 35	gill-spacing=close	i
 36	gill-spacing=crowded	i
 37	gill-spacing=distant	i
 38	gill-size=broad	i
 39	gill-size=narrow	i
 40	gill-color=black	i
 41	gill-color=brown	i
 42	gill-color=buff	i
 43	gill-color=chocolate	i
 44	gill-color=gray	i
 45	gill-color=green	i
 46	gill-color=orange	i
 47	gill-color=pink	i
 48	gill-color=purple	i
 49	gill-color=red	i
 50	gill-color=white	i
 51	gill-color=yellow	i
 52	stalk-shape=enlarging	i
 53	stalk-shape=tapering	i
 54	stalk-root=bulbous	i
 55	stalk-root=club	i
 56	stalk-root=cup	i
 57	stalk-root=equal	i
 58	stalk-root=rhizomorphs	i
 59	stalk-root=rooted	i
 60	stalk-root=missing	i
 61	stalk-surface-above-ring=fibrous	i
 62	stalk-surface-above-ring=scaly	i
 63	stalk-surface-above-ring=silky	i
 64	stalk-surface-above-ring=smooth	i
 65	stalk-surface-below-ring=fibrous	i
 66	stalk-surface-below-ring=scaly	i
 67	stalk-surface-below-ring=silky	i
 68	stalk-surface-below-ring=smooth	i
 69	stalk-color-above-ring=brown	i
 70	stalk-color-above-ring=buff	i
 71	stalk-color-above-ring=cinnamon	i
 72	stalk-color-above-ring=gray	i
 73	stalk-color-above-ring=orange	i
 74	stalk-color-above-ring=pink	i
 75	stalk-color-above-ring=red	i
 76	stalk-color-above-ring=white	i
 77	stalk-color-above-ring=yellow	i
 78	stalk-color-below-ring=brown	i
 79	stalk-color-below-ring=buff	i
 80	stalk-color-below-ring=cinnamon	i
 81	stalk-color-below-ring=gray	i
 82	stalk-color-below-ring=orange	i
 83	stalk-color-below-ring=pink	i
 84	stalk-color-below-ring=red	i
 85	stalk-color-below-ring=white	i
 86	stalk-color-below-ring=yellow	i
 87	veil-type=partial	i
 88	veil-type=universal	i
 89	veil-color=brown	i
 90	veil-color=orange	i
 91	veil-color=white	i
 92	veil-color=yellow	i
 93	ring-number=none	i
 94	ring-number=one	i
 95	ring-number=two	i
 96	ring-type=cobwebby	i
 97	ring-type=evanescent	i
 98	ring-type=flaring	i
 99	ring-type=large	i
 100	ring-type=none	i
 101	ring-type=pendant	i
 102	ring-type=sheathing	i
 103	ring-type=zone	i
 104	spore-print-color=black	i
 105	spore-print-color=brown	i
 106	spore-print-color=buff	i
 107	spore-print-color=chocolate	i
 108	spore-print-color=green	i
 109	spore-print-color=orange	i
 110	spore-print-color=purple	i
 111	spore-print-color=white	i
 112	spore-print-color=yellow	i
 113	population=abundant	i
 114	population=clustered	i
 115	population=numerous	i
 116	population=scattered	i
 117	population=several	i
 118	population=solitary	i
 119	habitat=grasses	i
 120	habitat=leaves	i
 121	habitat=meadows	i
 122	habitat=paths	i
 123	habitat=urban	i
 124	habitat=waste	i
 125	habitat=woods	i
--- a/wrapper/xgboost.R
+++ b/wrapper/xgboost.R
@ -0,0 +1,136 @@
 # load in library
 dyn.load("./libxgboostR.so")
 # constructing DMatrix
 xgb.DMatrix <- function(data) {
  if (typeof(data) == "character") {
    handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE))
  }else {
    stop("xgb.DMatrix cannot recognize data type")
  }
  return(structure(handle, class="xgb.DMatrix"))
 }
 # construct a Booster from cachelist
 xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
  if (typeof(cachelist) != "list") {
    stop("xgb.Booster: only accepts list of DMatrix as cachelist")
  }
  for (dm in cachelist) {
    if (class(dm) != "xgb.DMatrix") {
      stop("xgb.Booster: only accepts list of DMatrix as cachelist")
    }
  }
  handle <- .Call("XGBoosterCreate_R", cachelist)
  for (i in 1:length(params)) {
    p = params[i]    
    .Call("XGBoosterSetParam_R", handle, names(p), as.character(p))
  }
  if (!is.null(modelfile)) {
    if (typeof(modelfile) != "character"){
      stop("xgb.Booster: modelfile must be character");
    }
    .Call("XGBoosterLoadModel_R", handle, modelfile)
  }
  return(structure(handle, class="xgb.Booster"))
 }
 # train a model using given parameters
 xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL) {
  if (typeof(params) != "list") {
    stop("xgb.train: first argument params must be list");
  }
  if (class(dtrain) != "xgb.DMatrix") {
    stop("xgb.train: second argument dtrain must be xgb.DMatrix");
  }
  bst <- xgb.Booster(params, append(watchlist,dtrain))
  for (i in 1:nrounds) {
    if (is.null(obj)) {
      succ <- xgb.iter.update(bst, dtrain, i-1)
    } else {
      pred = xgb.predict(bst, dtrain)
      gpair = obj(pred, dtrain)
      succ <- xgb.iter.boost(bst, dtrain, gpair)
    }    
    if (length(watchlist) != 0) {
      msg <- xgb.iter.eval(bst, watchlist, i-1)
        cat(msg); cat("\n")
    }
  }
  return(bst)
 }
 # save model or DMatrix to file 
 xgb.save <- function(handle, fname) {
  if (typeof(fname) != "character") {
    stop("xgb.save: fname must be character");
  }
  if (class(handle) == "xgb.Booster") {
    .Call("XGBoosterSaveModel_R", handle, fname);
    return(TRUE)
  }
  if (class(handle) == "xgb.DMatrix") {
    .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE))
    return(TRUE)
  }
  stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
  return(FALSE)
 }
 # predict 
 xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.iter.update: first argument must be type xgb.Booster")
  }
  if (class(dmat) != "xgb.DMatrix") {
    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
  }
  ret = .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin))
  return(ret)
 }
 ##--------------------------------------
 # the following are low level iteratively function, not needed
 # if you do not want to use them
 #---------------------------------------
 # iteratively update booster with dtrain
 xgb.iter.update <- function(booster, dtrain, iter) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.iter.update: first argument must be type xgb.Booster")
  }
  if (class(dtrain) != "xgb.DMatrix") {
    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
  }
  .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain)
  return(TRUE)
 }
 # iteratively update booster with customized statistics
 xgb.iter.boost <- function(booster, dtrain, gpair) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.iter.update: first argument must be type xgb.Booster")
  }
  if (class(dtrain) != "xgb.DMatrix") {
    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
  }
  .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess)
  return(TRUE)
 }
 # iteratively evaluate one iteration
 xgb.iter.eval <- function(booster, watchlist, iter) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.eval: first argument must be type xgb.Booster")
  }
  if (typeof(watchlist) != "list") {
    stop("xgb.eval: only accepts list of DMatrix as watchlist")
  }
  for (w in watchlist) {
    if (class(w) != "xgb.DMatrix") {
      stop("xgb.eval: watch list can only contain xgb.DMatrix")
    }
  }
  evnames <- list()
  for (i in 1:length(watchlist)) {
    w <- watchlist[i]
    if (length(names(w)) == 0) {
      stop("xgb.eval: name tag must be presented for every elements in watchlist")
    }
    evnames <- append(evnames, names(w))
  }
  msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames)
  return(msg)
 }
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@ -0,0 +1,266 @@
 # Author: Tianqi Chen, Bing Xu
 # module for xgboost
 import ctypes
 import os
 # optinally have scipy sparse, though not necessary
 import numpy
 import sys
 import numpy.ctypeslib
 import scipy.sparse as scp
 # set this line correctly
 XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
 # load in xgboost library
 xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
 xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
 xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
 xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
 xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
 xglib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
 xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
 xglib.XGBoosterCreate.restype = ctypes.c_void_p
 xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
 xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
 xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
 def ctypes2numpy(cptr, length):
    # convert a ctypes pointer array to numpy
    assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
    res = numpy.zeros(length, dtype='float32')
    assert ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0])
    return res
 # data matrix used in xgboost
 class DMatrix:
    # constructor
    def __init__(self, data, label=None, missing=0.0, weight = None):
        # force into void_p, mac need to pass things in as void_p
        if data == None:
            self.handle = None
            return
        if isinstance(data, str):
            self.handle = ctypes.c_void_p(
                xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1))
        elif isinstance(data, scp.csr_matrix):
            self.__init_from_csr(data)
        elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
            self.__init_from_npy2d(data, missing)
        else:
            try:
                csr = scp.csr_matrix(data)
                self.__init_from_csr(csr)
            except:
                raise Exception("can not intialize DMatrix from"+str(type(data)))
        if label != None:
            self.set_label(label)
        if weight !=None:
            self.set_weight(weight)
    # convert data from csr matrix
    def __init_from_csr(self, csr):
        assert len(csr.indices) == len(csr.data)
        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSR(
            (ctypes.c_ulong  * len(csr.indptr))(*csr.indptr),
            (ctypes.c_uint  * len(csr.indices))(*csr.indices),
            (ctypes.c_float * len(csr.data))(*csr.data),
            len(csr.indptr), len(csr.data)))
    # convert data from numpy matrix
    def __init_from_npy2d(self,mat,missing):
        data = numpy.array(mat.reshape(mat.size), dtype='float32')
        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromMat(
            data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
            mat.shape[0], mat.shape[1], ctypes.c_float(missing)))
    # destructor
    def __del__(self):
        xglib.XGDMatrixFree(self.handle)
    def __get_float_info(self, field):
        length = ctypes.c_ulong()
        ret = xglib.XGDMatrixGetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
                                          ctypes.byref(length))
        return ctypes2numpy(ret, length.value)
    def __set_float_info(self, field, data):
        xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')),
                                    (ctypes.c_float*len(data))(*data), len(data))
    # load data from file
    def save_binary(self, fname, silent=True):
        xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
    # set label of dmatrix
    def set_label(self, label):
        self.__set_float_info('label', label)
    # set weight of each instances
    def set_weight(self, weight):
        self.__set_float_info('weight', weight)
    # set initialized margin prediction
    def set_base_margin(self, margin):
        """
        set base margin of booster to start from
        this can be used to specify a prediction value of
        existing model to be base_margin
        However, remember margin is needed, instead of transformed prediction
        e.g. for logistic regression: need to put in value before logistic transformation
        see also example/demo.py
        """
        self.__set_float_info('base_margin', margin)
    # set group size of dmatrix, used for rank
    def set_group(self, group):
        xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
    # get label from dmatrix
    def get_label(self):
        return self.__get_float_info('label')
    # get weight from dmatrix
    def get_weight(self):
        return self.__get_float_info('weight')
    # get base_margin from dmatrix
    def get_base_margin(self):
        return self.__get_float_info('base_margin')
    def num_row(self):
        return xglib.XGDMatrixNumRow(self.handle)
    # slice the DMatrix to return a new DMatrix that only contains rindex
    def slice(self, rindex):
        res = DMatrix(None)
        res.handle = ctypes.c_void_p(xglib.XGDMatrixSliceDMatrix(
            self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex)))
        return res
 class Booster:
    """learner class """
    def __init__(self, params={}, cache=[], model_name = None):
        """ constructor, param: """
        for d in cache:
            assert isinstance(d, DMatrix)
        dmats = (ctypes.c_void_p  * len(cache))(*[ d.handle for d in cache])
        self.handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, len(cache)))
        self.set_param({'seed':0})
        self.set_param(params)
        if model_name != None:
            self.load_model(model_name)
    def __del__(self):
        xglib.XGBoosterFree(self.handle)
    def set_param(self, params, pv=None):
        if isinstance(params, dict):
            for k, v in params.items():
                xglib.XGBoosterSetParam(
                    self.handle, ctypes.c_char_p(k.encode('utf-8')),
                    ctypes.c_char_p(str(v).encode('utf-8')))
        elif isinstance(params,str) and pv != None:
            xglib.XGBoosterSetParam(
                self.handle, ctypes.c_char_p(params.encode('utf-8')),
                ctypes.c_char_p(str(pv).encode('utf-8')))
        else:
            for k, v in params:
                xglib.XGBoosterSetParam(
                    self.handle, ctypes.c_char_p(k.encode('utf-8')),
                    ctypes.c_char_p(str(v).encode('utf-8')))
    def update(self, dtrain, it):
        """
        update
          dtrain: the training DMatrix
          it: current iteration number
        """
        assert isinstance(dtrain, DMatrix)
        xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
    def boost(self, dtrain, grad, hess):
        """ update """
        assert len(grad) == len(hess)
        assert isinstance(dtrain, DMatrix)
        xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
                                    (ctypes.c_float*len(grad))(*grad),
                                    (ctypes.c_float*len(hess))(*hess),
                                    len(grad))
    def eval_set(self, evals, it = 0):
        for d in evals:
            assert isinstance(d[0], DMatrix)
            assert isinstance(d[1], str)
        dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
        evnames = (ctypes.c_char_p * len(evals))(
            * [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
        return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
    def eval(self, mat, name = 'eval', it = 0):
        return self.eval_set( [(mat,name)], it)
    def predict(self, data, output_margin=False):
        """
        predict with data
            data: the dmatrix storing the input
            output_margin: whether output raw margin value that is untransformed
        """
        length = ctypes.c_ulong()
        preds = xglib.XGBoosterPredict(self.handle, data.handle,
                                       int(output_margin), ctypes.byref(length))
        return ctypes2numpy(preds, length.value)
    def save_model(self, fname):
        """ save model to file """
        xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
    def load_model(self, fname):
        """load model from file"""
        xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
    def dump_model(self, fo, fmap=''):
        """dump model into text file"""
        if isinstance(fo,str):
            fo = open(fo,'w')
            need_close = True
        else:
            need_close = False
        ret = self.get_dump(fmap)
        for i in range(len(ret)):
            fo.write('booster[%d]:\n' %i)
            fo.write( ret[i] )
        if need_close:
            fo.close()
    def get_dump(self, fmap=''):
        """get dump of model as list of strings """
        length = ctypes.c_ulong()
        sarr = xglib.XGBoosterDumpModel(self.handle, ctypes.c_char_p(fmap.encode('utf-8')), ctypes.byref(length))
        res = []
        for i in range(length.value):
            res.append( str(sarr[i]) )
        return res
    def get_fscore(self, fmap=''):
        """ get feature importance of each feature """
        trees = self.get_dump(fmap)
        fmap = {}
        for tree in trees:
            print tree
            for l in tree.split('\n'):
                arr = l.split('[')
                if len(arr) == 1:
                    continue
                fid = arr[1].split(']')[0]
                fid = fid.split('<')[0]
                if fid not in fmap:
                    fmap[fid] = 1
                else:
                    fmap[fid]+= 1
        return fmap
 def evaluate(bst, evals, it, feval = None):
    """evaluation on eval set"""
    if feval != None:
        res = '[%d]' % it
        for dm, evname in evals:
            name, val = feval(bst.predict(dm), dm)
            res += '\t%s-%s:%f' % (evname, name, val)
    else:
        res = bst.eval_set(evals, it)
    return res
 def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
    """ train a booster with given paramaters """
    bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
    if obj == None:
        for i in range(num_boost_round):
            bst.update( dtrain, i )
            if len(evals) != 0:
                sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
    else:
        # try customized objective function
        for i in range(num_boost_round):
            pred = bst.predict( dtrain )
            grad, hess = obj( pred, dtrain )
            bst.boost( dtrain, grad, hess )
            if len(evals) != 0:
                sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
    return bst
--- a/wrapper/xgboost_R.cpp
+++ b/wrapper/xgboost_R.cpp
@ -0,0 +1,115 @@
 #include <vector>
 #include <string>
 #include "xgboost_wrapper.h"
 #include "xgboost_R.h"
 #include "../src/utils/utils.h"
 #include "../src/utils/omp.h"
 using namespace xgboost;
 extern "C" {
  void _DMatrixFinalizer(SEXP ext) {    
    if (R_ExternalPtrAddr(ext) == NULL) return;
    XGDMatrixFree(R_ExternalPtrAddr(ext));
    R_ClearExternalPtr(ext);
  }
  SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
    void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
    UNPROTECT(1);
    return ret;
  }
  void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
    XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
                        CHAR(asChar(fname)), asInteger(silent));
  }
  // functions related to booster
  void _BoosterFinalizer(SEXP ext) {    
    if (R_ExternalPtrAddr(ext) == NULL) return;
    XGBoosterFree(R_ExternalPtrAddr(ext));
    R_ClearExternalPtr(ext);
  }
  SEXP XGBoosterCreate_R(SEXP dmats) {
    int len = length(dmats);
    std::vector<void*> dvec;
    for (int i = 0; i < len; ++i){
      dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
    }
    void *handle = XGBoosterCreate(&dvec[0], dvec.size());
    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
    R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
    UNPROTECT(1);
    return ret;
  }
  void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
    XGBoosterSetParam(R_ExternalPtrAddr(handle),
                      CHAR(asChar(name)),
                      CHAR(asChar(val)));
  }
  void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
    XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
                           asInteger(iter),
                           R_ExternalPtrAddr(dtrain));
  }
  void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
    utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
    int len = length(grad);
    std::vector<float> tgrad(len), thess(len);
    #pragma omp parallel for schedule(static)
    for (int j = 0; j < len; ++j) {
      tgrad[j] = REAL(grad)[j];
      thess[j] = REAL(hess)[j];
    }
    XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
                          R_ExternalPtrAddr(dtrain),
                          &tgrad[0], &thess[0], len);
  }
  SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
    utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
    int len = length(dmats);
    std::vector<void*> vec_dmats;
    std::vector<std::string> vec_names;
    std::vector<const char*> vec_sptr;
    for (int i = 0; i < len; ++i){
      vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
      vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
      vec_sptr.push_back(vec_names.back().c_str());
    }
    return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
                                         asInteger(iter),
                                         &vec_dmats[0], &vec_sptr[0], len)); 
  }
  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
    size_t olen;
    const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
                                        R_ExternalPtrAddr(dmat),
                                        asInteger(output_margin),
                                        &olen);
    SEXP ret = PROTECT(allocVector(REALSXP, olen));
    for (size_t i = 0; i < olen; ++i) {
      REAL(ret)[i] = res[i];
    }
    UNPROTECT(1);
    return ret;
  }
  void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
    XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
  }
  void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
    XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
  }
  void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
    size_t olen;
    const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
                                          CHAR(asChar(fmap)),
                                          &olen);
    FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
    for (size_t i = 0; i < olen; ++i) {
      fprintf(fo, "booster[%lu]:\n", i);
      fprintf(fo, "%s\n", res[i]);
    }
    fclose(fo);
  }
 }
--- a/wrapper/xgboost_R.h
+++ b/wrapper/xgboost_R.h
@ -0,0 +1,91 @@
 #ifndef XGBOOST_WRAPPER_R_H_
 #define XGBOOST_WRAPPER_R_H_
 /*!
 * \file xgboost_wrapper_R.h
 * \author Tianqi Chen
 * \brief R wrapper of xgboost
 */
 extern "C" {
 #include <Rinternals.h>
 }
 extern "C" {
  /*!
   * \brief load a data matrix 
   * \param fname name of the content
   * \param silent whether print messages
   * \return a loaded data matrix
   */
  SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
  /*!
   * \brief load a data matrix into binary file
   * \param handle a instance of data matrix
   * \param fname file name
   * \param silent print statistics when saving
   */
  void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
  /*! 
   * \brief create xgboost learner 
   * \param dmats a list of dmatrix handles that will be cached
   */  
  SEXP XGBoosterCreate_R(SEXP dmats);
  /*! 
   * \brief set parameters 
   * \param handle handle
   * \param name  parameter name
   * \param val value of parameter
   */
  void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
  /*! 
   * \brief update the model in one round using dtrain
   * \param handle handle
   * \param iter current iteration rounds
   * \param dtrain training data
   */
  void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain);
  /*!
   * \brief update the model, by directly specify gradient and second order gradient,
   *        this can be used to replace UpdateOneIter, to support customized loss function
   * \param handle handle
   * \param dtrain training data
   * \param grad gradient statistics
   * \param hess second order gradient statistics
   */
  void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess);
  /*!
   * \brief get evaluation statistics for xgboost
   * \param handle handle
   * \param iter current iteration rounds
   * \param dmats list of handles to dmatrices
   * \param evname name of evaluation
   * \return the string containing evaluation stati
   */
  SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames);
  /*!
   * \brief make prediction based on dmat
   * \param handle handle
   * \param dmat data matrix
   * \param output_margin whether only output raw margin value
   */
  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
  /*!
   * \brief load model from existing file
   * \param handle handle
   * \param fname file name
   */
  void XGBoosterLoadModel_R(SEXP handle, SEXP fname);
  /*!
   * \brief save model into existing file
   * \param handle handle
   * \param fname file name
   */    
  void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
  /*!
   * \brief dump model into text file 
   * \param handle handle
   * \param fname file name of model that can be dumped into
   * \param fmap  name to fmap can be empty string
   */
  void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap);
 };
 #endif  // XGBOOST_WRAPPER_R_H_
--- a/wrapper/xgboost_wrapper.cpp
+++ b/wrapper/xgboost_wrapper.cpp
@ -0,0 +1,249 @@
 // implementations in ctypes
 #include <cstdio>
 #include <vector>
 #include <string>
 #include <cstring>
 #include <algorithm>
 #include "./xgboost_wrapper.h"
 #include "../src/data.h"
 #include "../src/learner/learner-inl.hpp"
 #include "../src/io/io.h"
 #include "../src/io/simple_dmatrix-inl.hpp"
 using namespace xgboost;
 using namespace xgboost::io;
 namespace xgboost {
 namespace wrapper {
 // booster wrapper class
 class Booster: public learner::BoostLearner<FMatrixS> {
 public:
  explicit Booster(const std::vector<DataMatrix*>& mats) {
    this->silent = 1;
    this->init_model = false;
    this->SetCacheData(mats);
  }
  const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) {
    this->CheckInitModel();
    this->Predict(dmat, output_margin, &this->preds_);
    *len = this->preds_.size();
    return &this->preds_[0];
  }
  inline void BoostOneIter(const DataMatrix &train,
                           float *grad, float *hess, size_t len) {
    this->gpair_.resize(len);
    const unsigned ndata = static_cast<unsigned>(len);
    #pragma omp parallel for schedule(static)
    for (unsigned j = 0; j < ndata; ++j) {
      gpair_[j] = bst_gpair(grad[j], hess[j]);
    }
    gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
  }
  inline void CheckInitModel(void) {
    if (!init_model) {
      this->InitModel(); init_model = true;
    }
  }
  inline void LoadModel(const char *fname) {
    learner::BoostLearner<FMatrixS>::LoadModel(fname);
    this->init_model = true;
  }
  inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
    model_dump = this->DumpModel(fmap, with_stats);
    model_dump_cptr.resize(model_dump.size());
    for (size_t i = 0; i < model_dump.size(); ++i) {
      model_dump_cptr[i] = model_dump[i].c_str();
    }
    *len = model_dump.size();
    return &model_dump_cptr[0];
  }
  // temporal fields
  // temporal data to save evaluation dump
  std::string eval_str;
  // temporal space to save model dump
  std::vector<std::string> model_dump;
  std::vector<const char*> model_dump_cptr;
 private:
  bool init_model;
 };
 }  // namespace wrapper
 }  // namespace xgboost
 using namespace xgboost::wrapper;
 extern "C"{
  void* XGDMatrixCreateFromFile(const char *fname, int silent) {
    return LoadDataMatrix(fname, silent, false);
  }
  void* XGDMatrixCreateFromCSR(const size_t *indptr,
                               const unsigned *indices,
                               const float *data,
                               size_t nindptr,
                               size_t nelem) {
    DMatrixSimple *p_mat = new DMatrixSimple();
    DMatrixSimple &mat = *p_mat;
    mat.row_ptr_.resize(nindptr);
    memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
    mat.row_data_.resize(nelem);
    for (size_t i = 0; i < nelem; ++i) {
      mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
      mat.info.num_col = std::max(mat.info.num_col,
                                  static_cast<size_t>(indices[i]+1));
    }
    mat.info.num_row = nindptr - 1;
    return p_mat;
  }
  void* XGDMatrixCreateFromMat(const float *data,
                               size_t nrow,
                               size_t ncol,
                               float  missing) {
    DMatrixSimple *p_mat = new DMatrixSimple();
    DMatrixSimple &mat = *p_mat;
    mat.info.num_row = nrow;
    mat.info.num_col = ncol;
    for (size_t i = 0; i < nrow; ++i, data += ncol) {
      size_t nelem = 0;
      for (size_t j = 0; j < ncol; ++j) {
        if (data[j] != missing) {
          mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
          ++nelem;
        }
      }
      mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
    }
    return p_mat;
  }
  void* XGDMatrixSliceDMatrix(void *handle,
                              const int *idxset,
                              size_t len) {
    DMatrixSimple tmp;
    DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
    if (dsrc.magic != DMatrixSimple::kMagic) {
      tmp.CopyFrom(dsrc);
    }
    DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
                       *static_cast<DMatrixSimple*>(handle): tmp);
    DMatrixSimple *p_ret = new DMatrixSimple();
    DMatrixSimple &ret = *p_ret;
    utils::Check(src.info.group_ptr.size() == 0,
                 "slice does not support group structure");
    ret.Clear();
    ret.info.num_row = len;
    ret.info.num_col = src.info.num_col;
    utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
    iter->BeforeFirst();
    utils::Assert(iter->Next(), "slice");
    const SparseBatch &batch = iter->Value();
    for (size_t i = 0; i < len; ++i) {
      const int ridx = idxset[i];
      SparseBatch::Inst inst = batch[ridx];
      utils::Check(static_cast<size_t>(ridx) < batch.size, "slice index exceed number of rows");
      ret.row_data_.resize(ret.row_data_.size() + inst.length);
      memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
             sizeof(SparseBatch::Entry) * inst.length);
      ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
      if (src.info.labels.size() != 0) {
        ret.info.labels.push_back(src.info.labels[ridx]);
      }
      if (src.info.weights.size() != 0) {
        ret.info.weights.push_back(src.info.weights[ridx]);
      }
      if (src.info.info.root_index.size() != 0) {
        ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
      }
    }
    return p_ret;
  }
  void XGDMatrixFree(void *handle) {
    delete static_cast<DataMatrix*>(handle);
  }
  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
    SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
  }
  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) {
    std::vector<float> &vec = 
        static_cast<DataMatrix*>(handle)->info.GetInfo(field);
    vec.resize(len);
    memcpy(&vec[0], info, sizeof(float) * len);
  }
  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) {
    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
    pmat->info.group_ptr.resize(len + 1);
    pmat->info.group_ptr[0] = 0;
    for (size_t i = 0; i < len; ++i) {
      pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
    }
  }
  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) {
    const std::vector<float> &vec =
        static_cast<const DataMatrix*>(handle)->info.GetInfo(field);
    *len = vec.size();
    return &vec[0];
  }
  size_t XGDMatrixNumRow(const void *handle) {
    return static_cast<const DataMatrix*>(handle)->info.num_row;
  }
  // xgboost implementation
  void *XGBoosterCreate(void *dmats[], size_t len) {
    std::vector<DataMatrix*> mats;
    for (size_t i = 0; i < len; ++i) {
      DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
      mats.push_back(dtr);
    }
    return new Booster(mats);
  }
  void XGBoosterFree(void *handle) {
    delete static_cast<Booster*>(handle);
  }
  void XGBoosterSetParam(void *handle, const char *name, const char *value) {
    static_cast<Booster*>(handle)->SetParam(name, value);
  }
  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
    Booster *bst = static_cast<Booster*>(handle);
    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
    bst->CheckInitModel();
    bst->CheckInit(dtr);
    bst->UpdateOneIter(iter, *dtr);
  }
  void XGBoosterBoostOneIter(void *handle, void *dtrain,
                             float *grad, float *hess, size_t len) {
    Booster *bst = static_cast<Booster*>(handle);
    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
    bst->CheckInitModel();
    bst->CheckInit(dtr);
    bst->BoostOneIter(*dtr, grad, hess, len);
  }
  const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
                                   const char *evnames[], size_t len) {
    Booster *bst = static_cast<Booster*>(handle);
    std::vector<std::string> names;
    std::vector<const DataMatrix*> mats;
    for (size_t i = 0; i < len; ++i) {
      mats.push_back(static_cast<DataMatrix*>(dmats[i]));
      names.push_back(std::string(evnames[i]));
    }
    bst->CheckInitModel();
    bst->eval_str = bst->EvalOneIter(iter, mats, names);
    return bst->eval_str.c_str();
  }
  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) {
    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
  }
  void XGBoosterLoadModel(void *handle, const char *fname) {
    static_cast<Booster*>(handle)->LoadModel(fname);
  }
  void XGBoosterSaveModel(const void *handle, const char *fname) {
    static_cast<const Booster*>(handle)->SaveModel(fname);
  }
  const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
    utils::FeatMap featmap;
    if (strlen(fmap) != 0) {
      featmap.LoadText(fmap);
    }
    return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len);
  }
 };
--- a/wrapper/xgboost_wrapper.h
+++ b/wrapper/xgboost_wrapper.h
@ -0,0 +1,171 @@
 #ifndef XGBOOST_WRAPPER_H_
 #define XGBOOST_WRAPPER_H_
 /*!
 * \file xgboost_wrapperh
 * \author Tianqi Chen
 * \brief a C style wrapper of xgboost
 *  can be used to create wrapper of other languages
 */
 #include <cstdio>
 extern "C" {
  /*!
   * \brief load a data matrix 
   * \return a loaded data matrix
   */
  void* XGDMatrixCreateFromFile(const char *fname, int silent);
  /*! 
   * \brief create a matrix content from csr format
   * \param handle a instance of data matrix
   * \param indptr pointer to row headers
   * \param indices findex
   * \param data fvalue
   * \param nindptr number of rows in the matix + 1 
   * \param nelem number of nonzero elements in the matrix
   * \return created dmatrix
   */
  void* XGDMatrixCreateFromCSR(const size_t *indptr,
                               const unsigned *indices,
                               const float *data,
                               size_t nindptr,
                               size_t nelem);
  /*!
   * \brief create matrix content from dense matrix
   * \param handle a instance of data matrix
   * \param data pointer to the data space
   * \param nrow number of rows
   * \param ncol number columns
   * \param missing which value to represent missing value
   * \return created dmatrix
   */
  void* XGDMatrixCreateFromMat(const float *data,
                               size_t nrow,
                               size_t ncol,
                               float  missing);
  /*!
   * \brief create a new dmatrix from sliced content of existing matrix
   * \param handle instance of data matrix to be sliced
   * \param idxset index set
   * \param len length of index set
   * \return a sliced new matrix
   */
  void* XGDMatrixSliceDMatrix(void *handle,
                              const int *idxset,
                              size_t len);
  /*!
   * \brief free space in data matrix
   */
  void XGDMatrixFree(void *handle);
  /*!
   * \brief load a data matrix into binary file
   * \param handle a instance of data matrix
   * \param fname file name
   * \param silent print statistics when saving
   */
  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
  /*!
   * \brief set float vector to a content in info
   * \param handle a instance of data matrix
   * \param field field name, can be label, weight
   * \param array pointer to float vector
   * \param len length of array
   */
  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len);
  /*!
   * \brief set label of the training matrix
   * \param handle a instance of data matrix
   * \param group pointer to group size
   * \param len length of array
   */
  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
  /*!
   * \brief get float info vector from matrix
   * \param handle a instance of data matrix
   * \param len used to set result length
   * \param field field name
   * \return pointer to the label
   */
  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
  /*!
   * \brief return number of rows
   */
  size_t XGDMatrixNumRow(const void *handle);
  // --- start XGBoost class
  /*! 
   * \brief create xgboost learner 
   * \param dmats matrices that are set to be cached
   * \param len length of dmats
   */
  void *XGBoosterCreate(void* dmats[], size_t len);
  /*! 
   * \brief free obj in handle 
   * \param handle handle to be freed
   */
  void XGBoosterFree(void* handle);
  /*! 
   * \brief set parameters 
   * \param handle handle
   * \param name  parameter name
   * \param val value of parameter
   */    
  void XGBoosterSetParam(void *handle, const char *name, const char *value);
  /*! 
   * \brief update the model in one round using dtrain
   * \param handle handle
   * \param iter current iteration rounds
   * \param dtrain training data
   */
  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
  /*!
   * \brief update the model, by directly specify gradient and second order gradient,
   *        this can be used to replace UpdateOneIter, to support customized loss function
   * \param handle handle
   * \param dtrain training data
   * \param grad gradient statistics
   * \param hess second order gradient statistics
   * \param len length of grad/hess array
   */
  void XGBoosterBoostOneIter(void *handle, void *dtrain,
                             float *grad, float *hess, size_t len);
  /*!
   * \brief get evaluation statistics for xgboost
   * \param handle handle
   * \param iter current iteration rounds
   * \param dmats pointers to data to be evaluated
   * \param evnames pointers to names of each data
   * \param len length of dmats
   * \return the string containing evaluation stati
   */
  const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
                                   const char *evnames[], size_t len);
  /*!
   * \brief make prediction based on dmat
   * \param handle handle
   * \param dmat data matrix
   * \param output_margin whether only output raw margin value
   * \param len used to store length of returning result
   */
  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len);
  /*!
   * \brief load model from existing file
   * \param handle handle
   * \param fname file name
   */
  void XGBoosterLoadModel(void *handle, const char *fname);
  /*!
   * \brief save model into existing file
   * \param handle handle
   * \param fname file name
   */
  void XGBoosterSaveModel(const void *handle, const char *fname);
  /*!
   * \brief dump model, return array of strings representing model dump
   * \param handle handle
   * \param fmap  name to fmap can be empty string
   * \param out_len length of output array
   * \return char *data[], representing dump of each model
   */
  const char **XGBoosterDumpModel(void *handle, const char *fmap,
                                  size_t *out_len);
 };
 #endif  // XGBOOST_WRAPPER_H_
		`@ -0,0 +1,3 @@`
							`example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format`

							`for usage: see demo.py and comments in demo.py`