chg

2014-08-23 14:20:29 -07:00
parent 3ba7995754
commit 08a6b92216
16 changed files with 17669 additions and 0 deletions
--- a/wrapper/R-example/agaricus.txt.test
+++ b/wrapper/R-example/agaricus.txt.test
--- a/wrapper/R-example/agaricus.txt.train
+++ b/wrapper/R-example/agaricus.txt.train
--- a/wrapper/R-example/demo.R
+++ b/wrapper/R-example/demo.R
@@ -0,0 +1,14 @@
+# include xgboost library, must set chdir=TRURE
+source('../xgboost.R', chdir=TRUE)
+
+# test code here
+dtrain <- xgb.DMatrix("agaricus.txt.train")
+dtest <- xgb.DMatrix("agaricus.txt.test")
+param = list('bst:max_depth'=2, 'bst:eta'=1, 'silent'=1, 'objective'='binary:logistic')
+watchlist <- list('train'=dtrain,'test'=dtest)
+bst <- xgb.train(param, dtrain, watchlist=watchlist, nround=3)
+
+succ <- xgb.save(bst, "iter.model")
+print('finsih save model')
+bst2 <- xgb.Booster(modelfile="iter.model")
+pred = xgb.predict(bst2, dtest)
--- a/wrapper/R-example/featmap.txt
+++ b/wrapper/R-example/featmap.txt
@@ -0,0 +1,126 @@
+0	cap-shape=bell	i
+1	cap-shape=conical	i
+2	cap-shape=convex	i
+3	cap-shape=flat	i
+4	cap-shape=knobbed	i
+5	cap-shape=sunken	i
+6	cap-surface=fibrous	i
+7	cap-surface=grooves	i
+8	cap-surface=scaly	i
+9	cap-surface=smooth	i
+10	cap-color=brown	i
+11	cap-color=buff	i
+12	cap-color=cinnamon	i
+13	cap-color=gray	i
+14	cap-color=green	i
+15	cap-color=pink	i
+16	cap-color=purple	i
+17	cap-color=red	i
+18	cap-color=white	i
+19	cap-color=yellow	i
+20	bruises?=bruises	i
+21	bruises?=no	i
+22	odor=almond	i
+23	odor=anise	i
+24	odor=creosote	i
+25	odor=fishy	i
+26	odor=foul	i
+27	odor=musty	i
+28	odor=none	i
+29	odor=pungent	i
+30	odor=spicy	i
+31	gill-attachment=attached	i
+32	gill-attachment=descending	i
+33	gill-attachment=free	i
+34	gill-attachment=notched	i
+35	gill-spacing=close	i
+36	gill-spacing=crowded	i
+37	gill-spacing=distant	i
+38	gill-size=broad	i
+39	gill-size=narrow	i
+40	gill-color=black	i
+41	gill-color=brown	i
+42	gill-color=buff	i
+43	gill-color=chocolate	i
+44	gill-color=gray	i
+45	gill-color=green	i
+46	gill-color=orange	i
+47	gill-color=pink	i
+48	gill-color=purple	i
+49	gill-color=red	i
+50	gill-color=white	i
+51	gill-color=yellow	i
+52	stalk-shape=enlarging	i
+53	stalk-shape=tapering	i
+54	stalk-root=bulbous	i
+55	stalk-root=club	i
+56	stalk-root=cup	i
+57	stalk-root=equal	i
+58	stalk-root=rhizomorphs	i
+59	stalk-root=rooted	i
+60	stalk-root=missing	i
+61	stalk-surface-above-ring=fibrous	i
+62	stalk-surface-above-ring=scaly	i
+63	stalk-surface-above-ring=silky	i
+64	stalk-surface-above-ring=smooth	i
+65	stalk-surface-below-ring=fibrous	i
+66	stalk-surface-below-ring=scaly	i
+67	stalk-surface-below-ring=silky	i
+68	stalk-surface-below-ring=smooth	i
+69	stalk-color-above-ring=brown	i
+70	stalk-color-above-ring=buff	i
+71	stalk-color-above-ring=cinnamon	i
+72	stalk-color-above-ring=gray	i
+73	stalk-color-above-ring=orange	i
+74	stalk-color-above-ring=pink	i
+75	stalk-color-above-ring=red	i
+76	stalk-color-above-ring=white	i
+77	stalk-color-above-ring=yellow	i
+78	stalk-color-below-ring=brown	i
+79	stalk-color-below-ring=buff	i
+80	stalk-color-below-ring=cinnamon	i
+81	stalk-color-below-ring=gray	i
+82	stalk-color-below-ring=orange	i
+83	stalk-color-below-ring=pink	i
+84	stalk-color-below-ring=red	i
+85	stalk-color-below-ring=white	i
+86	stalk-color-below-ring=yellow	i
+87	veil-type=partial	i
+88	veil-type=universal	i
+89	veil-color=brown	i
+90	veil-color=orange	i
+91	veil-color=white	i
+92	veil-color=yellow	i
+93	ring-number=none	i
+94	ring-number=one	i
+95	ring-number=two	i
+96	ring-type=cobwebby	i
+97	ring-type=evanescent	i
+98	ring-type=flaring	i
+99	ring-type=large	i
+100	ring-type=none	i
+101	ring-type=pendant	i
+102	ring-type=sheathing	i
+103	ring-type=zone	i
+104	spore-print-color=black	i
+105	spore-print-color=brown	i
+106	spore-print-color=buff	i
+107	spore-print-color=chocolate	i
+108	spore-print-color=green	i
+109	spore-print-color=orange	i
+110	spore-print-color=purple	i
+111	spore-print-color=white	i
+112	spore-print-color=yellow	i
+113	population=abundant	i
+114	population=clustered	i
+115	population=numerous	i
+116	population=scattered	i
+117	population=several	i
+118	population=solitary	i
+119	habitat=grasses	i
+120	habitat=leaves	i
+121	habitat=meadows	i
+122	habitat=paths	i
+123	habitat=urban	i
+124	habitat=waste	i
+125	habitat=woods	i
--- a/wrapper/README.md
+++ b/wrapper/README.md
@@ -0,0 +1,12 @@
+Wrapper of XGBoost
+=====
+This folder provides wrapper of xgboost to other languages
+
+
+Python
+=====
+To make the python module, type ```make``` in the root directory of project
+
+R 
+=====
+To make the R wrapper, type ```make R``` in the root directory of project
--- a/wrapper/python-example/README.md
+++ b/wrapper/python-example/README.md
@@ -0,0 +1,3 @@
+example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format
+
+for usage: see demo.py and comments in demo.py
--- a/wrapper/python-example/agaricus.txt.test
+++ b/wrapper/python-example/agaricus.txt.test
--- a/wrapper/python-example/agaricus.txt.train
+++ b/wrapper/python-example/agaricus.txt.train
--- a/wrapper/python-example/demo.py
+++ b/wrapper/python-example/demo.py
@@ -0,0 +1,112 @@
+#!/usr/bin/python
+import sys
+import numpy as np
+import scipy.sparse
+# append the path to xgboost, you may need to change the following line
+# alternatively, you can add the path to PYTHONPATH environment variable
+sys.path.append('../')
+import xgboost as xgb
+
+### simple example
+# load file from text file, also binary buffer generated by xgboost
+dtrain = xgb.DMatrix('agaricus.txt.train')
+dtest = xgb.DMatrix('agaricus.txt.test')
+
+# specify parameters via map, definition are same as c++ version
+param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
+
+# specify validations set to watch performance
+evallist  = [(dtest,'eval'), (dtrain,'train')]
+num_round = 2
+bst = xgb.train(param, dtrain, num_round, evallist)
+
+# this is prediction
+preds = bst.predict(dtest)
+labels = dtest.get_label()
+print ('error=%f' % (  sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
+bst.save_model('0001.model')
+# dump model
+bst.dump_model('dump.raw.txt')
+# dump model with feature map
+bst.dump_model('dump.nice.txt','featmap.txt')
+
+###
+# build dmatrix from scipy.sparse
+print ('start running example of build DMatrix from scipy.sparse')
+labels = []
+row = []; col = []; dat = []
+i = 0
+for l in open('agaricus.txt.train'):
+    arr = l.split()
+    labels.append( int(arr[0]))
+    for it in arr[1:]:
+        k,v = it.split(':')
+        row.append(i); col.append(int(k)); dat.append(float(v))
+    i += 1
+csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
+dtrain = xgb.DMatrix( csr )
+dtrain.set_label(labels)
+evallist  = [(dtest,'eval'), (dtrain,'train')]
+bst = xgb.train( param, dtrain, num_round, evallist )
+
+print ('start running example of build DMatrix from numpy array')
+# NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
+npymat = csr.todense()
+dtrain = xgb.DMatrix( npymat)
+dtrain.set_label(labels)
+evallist  = [(dtest,'eval'), (dtrain,'train')]
+bst = xgb.train( param, dtrain, num_round, evallist )
+
+###
+# advanced: cutomsized loss function, set loss_type to 0, so that predict get untransformed score
+# 
+print ('start running example to used cutomized objective function')
+
+# note: for customized objective function, we leave objective as default
+# note: what we are getting is margin value in prediction
+# you must know what you are doing
+param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
+
+# user define objective function, given prediction, return gradient and second order gradient
+# this is loglikelihood loss
+def logregobj(preds, dtrain):
+    labels = dtrain.get_label()
+    preds = 1.0 / (1.0 + np.exp(-preds))
+    grad = preds - labels
+    hess = preds * (1.0-preds)
+    return grad, hess
+
+# user defined evaluation function, return a pair metric_name, result
+# NOTE: when you do customized loss function, the default prediction value is margin
+# this may make buildin evalution metric not function properly
+# for example, we are doing logistic loss, the prediction is score before logistic transformation
+# the buildin evaluation error assumes input is after logistic transformation
+# Take this in mind when you use the customization, and maybe you need write customized evaluation function
+def evalerror(preds, dtrain):
+    labels = dtrain.get_label()
+    # return a pair metric_name, result
+    # since preds are margin(before logistic transformation, cutoff at 0)
+    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
+
+# training with customized objective, we can also do step by step training
+# simply look at xgboost.py's implementation of train
+bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
+
+
+###
+# advanced: start from a initial base prediction
+#
+print ('start running example to start from a initial prediction')
+# specify parameters via map, definition are same as c++ version
+param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
+# train xgboost for 1 round
+bst = xgb.train( param, dtrain, 1, evallist )
+# Note: we need the margin value instead of transformed prediction in set_base_margin
+# do predict with output_margin=True, will always give you margin values before logistic transformation
+ptrain = bst.predict(dtrain, output_margin=True)
+ptest  = bst.predict(dtest, output_margin=True)
+dtrain.set_base_margin(ptrain)
+dtest.set_base_margin(ptest)
+
+print ('this is result of running from initial prediction')
+bst = xgb.train( param, dtrain, 1, evallist )
--- a/wrapper/python-example/featmap.txt
+++ b/wrapper/python-example/featmap.txt
@@ -0,0 +1,126 @@
+0	cap-shape=bell	i
+1	cap-shape=conical	i
+2	cap-shape=convex	i
+3	cap-shape=flat	i
+4	cap-shape=knobbed	i
+5	cap-shape=sunken	i
+6	cap-surface=fibrous	i
+7	cap-surface=grooves	i
+8	cap-surface=scaly	i
+9	cap-surface=smooth	i
+10	cap-color=brown	i
+11	cap-color=buff	i
+12	cap-color=cinnamon	i
+13	cap-color=gray	i
+14	cap-color=green	i
+15	cap-color=pink	i
+16	cap-color=purple	i
+17	cap-color=red	i
+18	cap-color=white	i
+19	cap-color=yellow	i
+20	bruises?=bruises	i
+21	bruises?=no	i
+22	odor=almond	i
+23	odor=anise	i
+24	odor=creosote	i
+25	odor=fishy	i
+26	odor=foul	i
+27	odor=musty	i
+28	odor=none	i
+29	odor=pungent	i
+30	odor=spicy	i
+31	gill-attachment=attached	i
+32	gill-attachment=descending	i
+33	gill-attachment=free	i
+34	gill-attachment=notched	i
+35	gill-spacing=close	i
+36	gill-spacing=crowded	i
+37	gill-spacing=distant	i
+38	gill-size=broad	i
+39	gill-size=narrow	i
+40	gill-color=black	i
+41	gill-color=brown	i
+42	gill-color=buff	i
+43	gill-color=chocolate	i
+44	gill-color=gray	i
+45	gill-color=green	i
+46	gill-color=orange	i
+47	gill-color=pink	i
+48	gill-color=purple	i
+49	gill-color=red	i
+50	gill-color=white	i
+51	gill-color=yellow	i
+52	stalk-shape=enlarging	i
+53	stalk-shape=tapering	i
+54	stalk-root=bulbous	i
+55	stalk-root=club	i
+56	stalk-root=cup	i
+57	stalk-root=equal	i
+58	stalk-root=rhizomorphs	i
+59	stalk-root=rooted	i
+60	stalk-root=missing	i
+61	stalk-surface-above-ring=fibrous	i
+62	stalk-surface-above-ring=scaly	i
+63	stalk-surface-above-ring=silky	i
+64	stalk-surface-above-ring=smooth	i
+65	stalk-surface-below-ring=fibrous	i
+66	stalk-surface-below-ring=scaly	i
+67	stalk-surface-below-ring=silky	i
+68	stalk-surface-below-ring=smooth	i
+69	stalk-color-above-ring=brown	i
+70	stalk-color-above-ring=buff	i
+71	stalk-color-above-ring=cinnamon	i
+72	stalk-color-above-ring=gray	i
+73	stalk-color-above-ring=orange	i
+74	stalk-color-above-ring=pink	i
+75	stalk-color-above-ring=red	i
+76	stalk-color-above-ring=white	i
+77	stalk-color-above-ring=yellow	i
+78	stalk-color-below-ring=brown	i
+79	stalk-color-below-ring=buff	i
+80	stalk-color-below-ring=cinnamon	i
+81	stalk-color-below-ring=gray	i
+82	stalk-color-below-ring=orange	i
+83	stalk-color-below-ring=pink	i
+84	stalk-color-below-ring=red	i
+85	stalk-color-below-ring=white	i
+86	stalk-color-below-ring=yellow	i
+87	veil-type=partial	i
+88	veil-type=universal	i
+89	veil-color=brown	i
+90	veil-color=orange	i
+91	veil-color=white	i
+92	veil-color=yellow	i
+93	ring-number=none	i
+94	ring-number=one	i
+95	ring-number=two	i
+96	ring-type=cobwebby	i
+97	ring-type=evanescent	i
+98	ring-type=flaring	i
+99	ring-type=large	i
+100	ring-type=none	i
+101	ring-type=pendant	i
+102	ring-type=sheathing	i
+103	ring-type=zone	i
+104	spore-print-color=black	i
+105	spore-print-color=brown	i
+106	spore-print-color=buff	i
+107	spore-print-color=chocolate	i
+108	spore-print-color=green	i
+109	spore-print-color=orange	i
+110	spore-print-color=purple	i
+111	spore-print-color=white	i
+112	spore-print-color=yellow	i
+113	population=abundant	i
+114	population=clustered	i
+115	population=numerous	i
+116	population=scattered	i
+117	population=several	i
+118	population=solitary	i
+119	habitat=grasses	i
+120	habitat=leaves	i
+121	habitat=meadows	i
+122	habitat=paths	i
+123	habitat=urban	i
+124	habitat=waste	i
+125	habitat=woods	i
--- a/wrapper/xgboost.R
+++ b/wrapper/xgboost.R
@@ -0,0 +1,136 @@
+# load in library
+dyn.load("./libxgboostR.so")
+
+# constructing DMatrix
+xgb.DMatrix <- function(data) {
+  if (typeof(data) == "character") {
+    handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE))
+  }else {
+    stop("xgb.DMatrix cannot recognize data type")
+  }
+  return(structure(handle, class="xgb.DMatrix"))
+}
+# construct a Booster from cachelist
+xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
+  if (typeof(cachelist) != "list") {
+    stop("xgb.Booster: only accepts list of DMatrix as cachelist")
+  }
+  for (dm in cachelist) {
+    if (class(dm) != "xgb.DMatrix") {
+      stop("xgb.Booster: only accepts list of DMatrix as cachelist")
+    }
+  }
+  handle <- .Call("XGBoosterCreate_R", cachelist)
+  for (i in 1:length(params)) {
+    p = params[i]    
+    .Call("XGBoosterSetParam_R", handle, names(p), as.character(p))
+  }
+  if (!is.null(modelfile)) {
+    if (typeof(modelfile) != "character"){
+      stop("xgb.Booster: modelfile must be character");
+    }
+    .Call("XGBoosterLoadModel_R", handle, modelfile)
+  }
+  return(structure(handle, class="xgb.Booster"))
+}
+# train a model using given parameters
+xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL) {
+  if (typeof(params) != "list") {
+    stop("xgb.train: first argument params must be list");
+  }
+  if (class(dtrain) != "xgb.DMatrix") {
+    stop("xgb.train: second argument dtrain must be xgb.DMatrix");
+  }
+  bst <- xgb.Booster(params, append(watchlist,dtrain))
+  for (i in 1:nrounds) {
+    if (is.null(obj)) {
+      succ <- xgb.iter.update(bst, dtrain, i-1)
+    } else {
+      pred = xgb.predict(bst, dtrain)
+      gpair = obj(pred, dtrain)
+      succ <- xgb.iter.boost(bst, dtrain, gpair)
+    }    
+    if (length(watchlist) != 0) {
+      msg <- xgb.iter.eval(bst, watchlist, i-1)
+        cat(msg); cat("\n")
+    }
+  }
+  return(bst)
+}
+# save model or DMatrix to file 
+xgb.save <- function(handle, fname) {
+  if (typeof(fname) != "character") {
+    stop("xgb.save: fname must be character");
+  }
+  if (class(handle) == "xgb.Booster") {
+    .Call("XGBoosterSaveModel_R", handle, fname);
+    return(TRUE)
+  }
+  if (class(handle) == "xgb.DMatrix") {
+    .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE))
+    return(TRUE)
+  }
+  stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
+  return(FALSE)
+}
+# predict 
+xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
+  if (class(booster) != "xgb.Booster") {
+    stop("xgb.iter.update: first argument must be type xgb.Booster")
+  }
+  if (class(dmat) != "xgb.DMatrix") {
+    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
+  }
+  ret = .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin))
+  return(ret)
+}
+##--------------------------------------
+# the following are low level iteratively function, not needed
+# if you do not want to use them
+#---------------------------------------
+# iteratively update booster with dtrain
+xgb.iter.update <- function(booster, dtrain, iter) {
+  if (class(booster) != "xgb.Booster") {
+    stop("xgb.iter.update: first argument must be type xgb.Booster")
+  }
+  if (class(dtrain) != "xgb.DMatrix") {
+    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
+  }
+  .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain)
+  return(TRUE)
+}
+# iteratively update booster with customized statistics
+xgb.iter.boost <- function(booster, dtrain, gpair) {
+  if (class(booster) != "xgb.Booster") {
+    stop("xgb.iter.update: first argument must be type xgb.Booster")
+  }
+  if (class(dtrain) != "xgb.DMatrix") {
+    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
+  }
+  .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess)
+  return(TRUE)
+}
+# iteratively evaluate one iteration
+xgb.iter.eval <- function(booster, watchlist, iter) {
+  if (class(booster) != "xgb.Booster") {
+    stop("xgb.eval: first argument must be type xgb.Booster")
+  }
+  if (typeof(watchlist) != "list") {
+    stop("xgb.eval: only accepts list of DMatrix as watchlist")
+  }
+  for (w in watchlist) {
+    if (class(w) != "xgb.DMatrix") {
+      stop("xgb.eval: watch list can only contain xgb.DMatrix")
+    }
+  }
+  evnames <- list()
+  for (i in 1:length(watchlist)) {
+    w <- watchlist[i]
+    if (length(names(w)) == 0) {
+      stop("xgb.eval: name tag must be presented for every elements in watchlist")
+    }
+    evnames <- append(evnames, names(w))
+  }
+  msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames)
+  return(msg)
+}
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -0,0 +1,266 @@
+# Author: Tianqi Chen, Bing Xu
+# module for xgboost
+import ctypes
+import os
+# optinally have scipy sparse, though not necessary
+import numpy
+import sys
+import numpy.ctypeslib
+import scipy.sparse as scp
+
+# set this line correctly
+XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
+
+# load in xgboost library
+xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
+
+xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
+xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
+xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
+xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
+xglib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
+xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
+
+xglib.XGBoosterCreate.restype = ctypes.c_void_p
+xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
+xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
+xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
+
+
+def ctypes2numpy(cptr, length):
+    # convert a ctypes pointer array to numpy
+    assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
+    res = numpy.zeros(length, dtype='float32')
+    assert ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0])
+    return res
+
+# data matrix used in xgboost
+class DMatrix:
+    # constructor
+    def __init__(self, data, label=None, missing=0.0, weight = None):
+        # force into void_p, mac need to pass things in as void_p
+        if data == None:
+            self.handle = None
+            return
+        if isinstance(data, str):
+            self.handle = ctypes.c_void_p(
+                xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1))
+        elif isinstance(data, scp.csr_matrix):
+            self.__init_from_csr(data)
+        elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
+            self.__init_from_npy2d(data, missing)
+        else:
+            try:
+                csr = scp.csr_matrix(data)
+                self.__init_from_csr(csr)
+            except:
+                raise Exception("can not intialize DMatrix from"+str(type(data)))
+        if label != None:
+            self.set_label(label)
+        if weight !=None:
+            self.set_weight(weight)
+    # convert data from csr matrix
+    def __init_from_csr(self, csr):
+        assert len(csr.indices) == len(csr.data)
+        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSR(
+            (ctypes.c_ulong  * len(csr.indptr))(*csr.indptr),
+            (ctypes.c_uint  * len(csr.indices))(*csr.indices),
+            (ctypes.c_float * len(csr.data))(*csr.data),
+            len(csr.indptr), len(csr.data)))
+    # convert data from numpy matrix
+    def __init_from_npy2d(self,mat,missing):
+        data = numpy.array(mat.reshape(mat.size), dtype='float32')
+        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromMat(
+            data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
+            mat.shape[0], mat.shape[1], ctypes.c_float(missing)))
+    # destructor
+    def __del__(self):
+        xglib.XGDMatrixFree(self.handle)
+    def __get_float_info(self, field):
+        length = ctypes.c_ulong()
+        ret = xglib.XGDMatrixGetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
+                                          ctypes.byref(length))
+        return ctypes2numpy(ret, length.value)
+    def __set_float_info(self, field, data):
+        xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')),
+                                    (ctypes.c_float*len(data))(*data), len(data))
+    # load data from file
+    def save_binary(self, fname, silent=True):
+        xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
+    # set label of dmatrix
+    def set_label(self, label):
+        self.__set_float_info('label', label)
+    # set weight of each instances
+    def set_weight(self, weight):
+        self.__set_float_info('weight', weight)
+    # set initialized margin prediction
+    def set_base_margin(self, margin):
+        """
+        set base margin of booster to start from
+        this can be used to specify a prediction value of
+        existing model to be base_margin
+        However, remember margin is needed, instead of transformed prediction
+        e.g. for logistic regression: need to put in value before logistic transformation
+        see also example/demo.py
+        """
+        self.__set_float_info('base_margin', margin)
+    # set group size of dmatrix, used for rank
+    def set_group(self, group):
+        xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
+    # get label from dmatrix
+    def get_label(self):
+        return self.__get_float_info('label')
+    # get weight from dmatrix
+    def get_weight(self):
+        return self.__get_float_info('weight')
+    # get base_margin from dmatrix
+    def get_base_margin(self):
+        return self.__get_float_info('base_margin')
+    def num_row(self):
+        return xglib.XGDMatrixNumRow(self.handle)
+    # slice the DMatrix to return a new DMatrix that only contains rindex
+    def slice(self, rindex):
+        res = DMatrix(None)
+        res.handle = ctypes.c_void_p(xglib.XGDMatrixSliceDMatrix(
+            self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex)))
+        return res
+
+class Booster:
+    """learner class """
+    def __init__(self, params={}, cache=[], model_name = None):
+        """ constructor, param: """
+        for d in cache:
+            assert isinstance(d, DMatrix)
+        dmats = (ctypes.c_void_p  * len(cache))(*[ d.handle for d in cache])
+        self.handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, len(cache)))
+        self.set_param({'seed':0})
+        self.set_param(params)
+        if model_name != None:
+            self.load_model(model_name)
+    def __del__(self):
+        xglib.XGBoosterFree(self.handle)
+    def set_param(self, params, pv=None):
+        if isinstance(params, dict):
+            for k, v in params.items():
+                xglib.XGBoosterSetParam(
+                    self.handle, ctypes.c_char_p(k.encode('utf-8')),
+                    ctypes.c_char_p(str(v).encode('utf-8')))
+        elif isinstance(params,str) and pv != None:
+            xglib.XGBoosterSetParam(
+                self.handle, ctypes.c_char_p(params.encode('utf-8')),
+                ctypes.c_char_p(str(pv).encode('utf-8')))
+        else:
+            for k, v in params:
+                xglib.XGBoosterSetParam(
+                    self.handle, ctypes.c_char_p(k.encode('utf-8')),
+                    ctypes.c_char_p(str(v).encode('utf-8')))
+    def update(self, dtrain, it):
+        """
+        update
+          dtrain: the training DMatrix
+          it: current iteration number
+        """
+        assert isinstance(dtrain, DMatrix)
+        xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
+    def boost(self, dtrain, grad, hess):
+        """ update """
+        assert len(grad) == len(hess)
+        assert isinstance(dtrain, DMatrix)
+        xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
+                                    (ctypes.c_float*len(grad))(*grad),
+                                    (ctypes.c_float*len(hess))(*hess),
+                                    len(grad))
+    def eval_set(self, evals, it = 0):
+        for d in evals:
+            assert isinstance(d[0], DMatrix)
+            assert isinstance(d[1], str)
+        dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
+        evnames = (ctypes.c_char_p * len(evals))(
+            * [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
+        return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
+    def eval(self, mat, name = 'eval', it = 0):
+        return self.eval_set( [(mat,name)], it)
+    def predict(self, data, output_margin=False):
+        """
+        predict with data
+            data: the dmatrix storing the input
+            output_margin: whether output raw margin value that is untransformed
+        """
+        length = ctypes.c_ulong()
+        preds = xglib.XGBoosterPredict(self.handle, data.handle,
+                                       int(output_margin), ctypes.byref(length))
+        return ctypes2numpy(preds, length.value)
+    def save_model(self, fname):
+        """ save model to file """
+        xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
+    def load_model(self, fname):
+        """load model from file"""
+        xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
+    def dump_model(self, fo, fmap=''):
+        """dump model into text file"""
+        if isinstance(fo,str):
+            fo = open(fo,'w')
+            need_close = True
+        else:
+            need_close = False
+        ret = self.get_dump(fmap)
+        for i in range(len(ret)):
+            fo.write('booster[%d]:\n' %i)
+            fo.write( ret[i] )
+        if need_close:
+            fo.close()
+    def get_dump(self, fmap=''):
+        """get dump of model as list of strings """
+        length = ctypes.c_ulong()
+        sarr = xglib.XGBoosterDumpModel(self.handle, ctypes.c_char_p(fmap.encode('utf-8')), ctypes.byref(length))
+        res = []
+        for i in range(length.value):
+            res.append( str(sarr[i]) )
+        return res
+    def get_fscore(self, fmap=''):
+        """ get feature importance of each feature """
+        trees = self.get_dump(fmap)
+        fmap = {}
+        for tree in trees:
+            print tree
+            for l in tree.split('\n'):
+                arr = l.split('[')
+                if len(arr) == 1:
+                    continue
+                fid = arr[1].split(']')[0]
+                fid = fid.split('<')[0]
+                if fid not in fmap:
+                    fmap[fid] = 1
+                else:
+                    fmap[fid]+= 1
+        return fmap
+
+def evaluate(bst, evals, it, feval = None):
+    """evaluation on eval set"""
+    if feval != None:
+        res = '[%d]' % it
+        for dm, evname in evals:
+            name, val = feval(bst.predict(dm), dm)
+            res += '\t%s-%s:%f' % (evname, name, val)
+    else:
+        res = bst.eval_set(evals, it)
+
+    return res
+
+def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
+    """ train a booster with given paramaters """
+    bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
+    if obj == None:
+        for i in range(num_boost_round):
+            bst.update( dtrain, i )
+            if len(evals) != 0:
+                sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
+    else:
+        # try customized objective function
+        for i in range(num_boost_round):
+            pred = bst.predict( dtrain )
+            grad, hess = obj( pred, dtrain )
+            bst.boost( dtrain, grad, hess )
+            if len(evals) != 0:
+                sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
+    return bst
--- a/wrapper/xgboost_R.cpp
+++ b/wrapper/xgboost_R.cpp
@@ -0,0 +1,115 @@
+#include <vector>
+#include <string>
+#include "xgboost_wrapper.h"
+#include "xgboost_R.h"
+#include "../src/utils/utils.h"
+#include "../src/utils/omp.h"
+
+using namespace xgboost;
+
+extern "C" {
+  void _DMatrixFinalizer(SEXP ext) {    
+    if (R_ExternalPtrAddr(ext) == NULL) return;
+    XGDMatrixFree(R_ExternalPtrAddr(ext));
+    R_ClearExternalPtr(ext);
+  }
+  SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
+    void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
+    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
+    UNPROTECT(1);
+    return ret;
+  }
+  void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
+    XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
+                        CHAR(asChar(fname)), asInteger(silent));
+  }
+
+  // functions related to booster
+  void _BoosterFinalizer(SEXP ext) {    
+    if (R_ExternalPtrAddr(ext) == NULL) return;
+    XGBoosterFree(R_ExternalPtrAddr(ext));
+    R_ClearExternalPtr(ext);
+  }
+  SEXP XGBoosterCreate_R(SEXP dmats) {
+    int len = length(dmats);
+    std::vector<void*> dvec;
+    for (int i = 0; i < len; ++i){
+      dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
+    }
+    void *handle = XGBoosterCreate(&dvec[0], dvec.size());
+    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+    R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
+    UNPROTECT(1);
+    return ret;
+  }
+  void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
+    XGBoosterSetParam(R_ExternalPtrAddr(handle),
+                      CHAR(asChar(name)),
+                      CHAR(asChar(val)));
+  }
+  void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
+    XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
+                           asInteger(iter),
+                           R_ExternalPtrAddr(dtrain));
+  }
+  void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
+    utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
+    int len = length(grad);
+    std::vector<float> tgrad(len), thess(len);
+    #pragma omp parallel for schedule(static)
+    for (int j = 0; j < len; ++j) {
+      tgrad[j] = REAL(grad)[j];
+      thess[j] = REAL(hess)[j];
+    }
+    XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
+                          R_ExternalPtrAddr(dtrain),
+                          &tgrad[0], &thess[0], len);
+  }
+  SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
+    utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
+    int len = length(dmats);
+    std::vector<void*> vec_dmats;
+    std::vector<std::string> vec_names;
+    std::vector<const char*> vec_sptr;
+    for (int i = 0; i < len; ++i){
+      vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
+      vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
+      vec_sptr.push_back(vec_names.back().c_str());
+    }
+    return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
+                                         asInteger(iter),
+                                         &vec_dmats[0], &vec_sptr[0], len)); 
+  }
+  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
+    size_t olen;
+    const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
+                                        R_ExternalPtrAddr(dmat),
+                                        asInteger(output_margin),
+                                        &olen);
+    SEXP ret = PROTECT(allocVector(REALSXP, olen));
+    for (size_t i = 0; i < olen; ++i) {
+      REAL(ret)[i] = res[i];
+    }
+    UNPROTECT(1);
+    return ret;
+  }
+  void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
+    XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
+  }
+  void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
+    XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
+  }
+  void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
+    size_t olen;
+    const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
+                                          CHAR(asChar(fmap)),
+                                          &olen);
+    FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
+    for (size_t i = 0; i < olen; ++i) {
+      fprintf(fo, "booster[%lu]:\n", i);
+      fprintf(fo, "%s\n", res[i]);
+    }
+    fclose(fo);
+  }
+}
--- a/wrapper/xgboost_R.h
+++ b/wrapper/xgboost_R.h
@@ -0,0 +1,91 @@
+#ifndef XGBOOST_WRAPPER_R_H_
+#define XGBOOST_WRAPPER_R_H_
+/*!
+ * \file xgboost_wrapper_R.h
+ * \author Tianqi Chen
+ * \brief R wrapper of xgboost
+ */
+extern "C" {
+#include <Rinternals.h>
+}
+
+extern "C" {
+  /*!
+   * \brief load a data matrix 
+   * \param fname name of the content
+   * \param silent whether print messages
+   * \return a loaded data matrix
+   */
+  SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
+  /*!
+   * \brief load a data matrix into binary file
+   * \param handle a instance of data matrix
+   * \param fname file name
+   * \param silent print statistics when saving
+   */
+  void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
+  /*! 
+   * \brief create xgboost learner 
+   * \param dmats a list of dmatrix handles that will be cached
+   */  
+  SEXP XGBoosterCreate_R(SEXP dmats);
+  /*! 
+   * \brief set parameters 
+   * \param handle handle
+   * \param name  parameter name
+   * \param val value of parameter
+   */
+  void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
+  /*! 
+   * \brief update the model in one round using dtrain
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dtrain training data
+   */
+  void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain);
+  /*!
+   * \brief update the model, by directly specify gradient and second order gradient,
+   *        this can be used to replace UpdateOneIter, to support customized loss function
+   * \param handle handle
+   * \param dtrain training data
+   * \param grad gradient statistics
+   * \param hess second order gradient statistics
+   */
+  void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess);
+  /*!
+   * \brief get evaluation statistics for xgboost
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dmats list of handles to dmatrices
+   * \param evname name of evaluation
+   * \return the string containing evaluation stati
+   */
+  SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames);
+  /*!
+   * \brief make prediction based on dmat
+   * \param handle handle
+   * \param dmat data matrix
+   * \param output_margin whether only output raw margin value
+   */
+  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
+  /*!
+   * \brief load model from existing file
+   * \param handle handle
+   * \param fname file name
+   */
+  void XGBoosterLoadModel_R(SEXP handle, SEXP fname);
+  /*!
+   * \brief save model into existing file
+   * \param handle handle
+   * \param fname file name
+   */    
+  void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
+  /*!
+   * \brief dump model into text file 
+   * \param handle handle
+   * \param fname file name of model that can be dumped into
+   * \param fmap  name to fmap can be empty string
+   */
+  void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap);
+};
+#endif  // XGBOOST_WRAPPER_R_H_
--- a/wrapper/xgboost_wrapper.cpp
+++ b/wrapper/xgboost_wrapper.cpp
@@ -0,0 +1,249 @@
+// implementations in ctypes
+#include <cstdio>
+#include <vector>
+#include <string>
+#include <cstring>
+#include <algorithm>
+#include "./xgboost_wrapper.h"
+#include "../src/data.h"
+#include "../src/learner/learner-inl.hpp"
+#include "../src/io/io.h"
+#include "../src/io/simple_dmatrix-inl.hpp"
+
+using namespace xgboost;
+using namespace xgboost::io;
+
+namespace xgboost {
+namespace wrapper {
+// booster wrapper class
+class Booster: public learner::BoostLearner<FMatrixS> {
+ public:
+  explicit Booster(const std::vector<DataMatrix*>& mats) {
+    this->silent = 1;
+    this->init_model = false;
+    this->SetCacheData(mats);
+  }
+  const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) {
+    this->CheckInitModel();
+    this->Predict(dmat, output_margin, &this->preds_);
+    *len = this->preds_.size();
+    return &this->preds_[0];
+  }
+  inline void BoostOneIter(const DataMatrix &train,
+                           float *grad, float *hess, size_t len) {
+    this->gpair_.resize(len);
+    const unsigned ndata = static_cast<unsigned>(len);
+    #pragma omp parallel for schedule(static)
+    for (unsigned j = 0; j < ndata; ++j) {
+      gpair_[j] = bst_gpair(grad[j], hess[j]);
+    }
+    gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
+  }
+  inline void CheckInitModel(void) {
+    if (!init_model) {
+      this->InitModel(); init_model = true;
+    }
+  }
+  inline void LoadModel(const char *fname) {
+    learner::BoostLearner<FMatrixS>::LoadModel(fname);
+    this->init_model = true;
+  }
+  inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
+    model_dump = this->DumpModel(fmap, with_stats);
+    model_dump_cptr.resize(model_dump.size());
+    for (size_t i = 0; i < model_dump.size(); ++i) {
+      model_dump_cptr[i] = model_dump[i].c_str();
+    }
+    *len = model_dump.size();
+    return &model_dump_cptr[0];
+  }
+  // temporal fields
+  // temporal data to save evaluation dump
+  std::string eval_str;
+  // temporal space to save model dump
+  std::vector<std::string> model_dump;
+  std::vector<const char*> model_dump_cptr;
+
+ private:
+  bool init_model;
+};
+}  // namespace wrapper
+}  // namespace xgboost
+
+using namespace xgboost::wrapper;
+
+extern "C"{
+  void* XGDMatrixCreateFromFile(const char *fname, int silent) {
+    return LoadDataMatrix(fname, silent, false);
+  }
+  void* XGDMatrixCreateFromCSR(const size_t *indptr,
+                               const unsigned *indices,
+                               const float *data,
+                               size_t nindptr,
+                               size_t nelem) {
+    DMatrixSimple *p_mat = new DMatrixSimple();
+    DMatrixSimple &mat = *p_mat;
+    mat.row_ptr_.resize(nindptr);
+    memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
+    mat.row_data_.resize(nelem);
+    for (size_t i = 0; i < nelem; ++i) {
+      mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
+      mat.info.num_col = std::max(mat.info.num_col,
+                                  static_cast<size_t>(indices[i]+1));
+    }
+    mat.info.num_row = nindptr - 1;
+    return p_mat;
+  }
+  void* XGDMatrixCreateFromMat(const float *data,
+                               size_t nrow,
+                               size_t ncol,
+                               float  missing) {
+    DMatrixSimple *p_mat = new DMatrixSimple();
+    DMatrixSimple &mat = *p_mat;
+    mat.info.num_row = nrow;
+    mat.info.num_col = ncol;
+    for (size_t i = 0; i < nrow; ++i, data += ncol) {
+      size_t nelem = 0;
+      for (size_t j = 0; j < ncol; ++j) {
+        if (data[j] != missing) {
+          mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
+          ++nelem;
+        }
+      }
+      mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
+    }
+    return p_mat;
+  }
+  void* XGDMatrixSliceDMatrix(void *handle,
+                              const int *idxset,
+                              size_t len) {
+    DMatrixSimple tmp;
+    DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
+    if (dsrc.magic != DMatrixSimple::kMagic) {
+      tmp.CopyFrom(dsrc);
+    }
+    DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
+                       *static_cast<DMatrixSimple*>(handle): tmp);
+    DMatrixSimple *p_ret = new DMatrixSimple();
+    DMatrixSimple &ret = *p_ret;
+
+    utils::Check(src.info.group_ptr.size() == 0,
+                 "slice does not support group structure");
+    ret.Clear();
+    ret.info.num_row = len;
+    ret.info.num_col = src.info.num_col;
+
+    utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
+    iter->BeforeFirst();
+    utils::Assert(iter->Next(), "slice");
+    const SparseBatch &batch = iter->Value();
+    for (size_t i = 0; i < len; ++i) {
+      const int ridx = idxset[i];
+      SparseBatch::Inst inst = batch[ridx];
+      utils::Check(static_cast<size_t>(ridx) < batch.size, "slice index exceed number of rows");
+      ret.row_data_.resize(ret.row_data_.size() + inst.length);
+      memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
+             sizeof(SparseBatch::Entry) * inst.length);
+      ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
+      if (src.info.labels.size() != 0) {
+        ret.info.labels.push_back(src.info.labels[ridx]);
+      }
+      if (src.info.weights.size() != 0) {
+        ret.info.weights.push_back(src.info.weights[ridx]);
+      }
+      if (src.info.info.root_index.size() != 0) {
+        ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
+      }
+    }
+    return p_ret;
+  }
+  void XGDMatrixFree(void *handle) {
+    delete static_cast<DataMatrix*>(handle);
+  }
+  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
+    SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
+  }
+  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) {
+    std::vector<float> &vec = 
+        static_cast<DataMatrix*>(handle)->info.GetInfo(field);
+    vec.resize(len);
+    memcpy(&vec[0], info, sizeof(float) * len);
+  }
+  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) {
+    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
+    pmat->info.group_ptr.resize(len + 1);
+    pmat->info.group_ptr[0] = 0;
+    for (size_t i = 0; i < len; ++i) {
+      pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
+    }
+  }
+  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) {
+    const std::vector<float> &vec =
+        static_cast<const DataMatrix*>(handle)->info.GetInfo(field);
+    *len = vec.size();
+    return &vec[0];
+  }
+  size_t XGDMatrixNumRow(const void *handle) {
+    return static_cast<const DataMatrix*>(handle)->info.num_row;
+  }
+
+  // xgboost implementation
+  void *XGBoosterCreate(void *dmats[], size_t len) {
+    std::vector<DataMatrix*> mats;
+    for (size_t i = 0; i < len; ++i) {
+      DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
+      mats.push_back(dtr);
+    }
+    return new Booster(mats);
+  }
+  void XGBoosterFree(void *handle) {
+    delete static_cast<Booster*>(handle);
+  }
+  void XGBoosterSetParam(void *handle, const char *name, const char *value) {
+    static_cast<Booster*>(handle)->SetParam(name, value);
+  }
+  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
+    Booster *bst = static_cast<Booster*>(handle);
+    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInitModel();
+    bst->CheckInit(dtr);
+    bst->UpdateOneIter(iter, *dtr);
+  }
+  void XGBoosterBoostOneIter(void *handle, void *dtrain,
+                             float *grad, float *hess, size_t len) {
+    Booster *bst = static_cast<Booster*>(handle);
+    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInitModel();
+    bst->CheckInit(dtr);
+    bst->BoostOneIter(*dtr, grad, hess, len);
+  }
+  const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
+                                   const char *evnames[], size_t len) {
+    Booster *bst = static_cast<Booster*>(handle);
+    std::vector<std::string> names;
+    std::vector<const DataMatrix*> mats;
+    for (size_t i = 0; i < len; ++i) {
+      mats.push_back(static_cast<DataMatrix*>(dmats[i]));
+      names.push_back(std::string(evnames[i]));
+    }
+    bst->CheckInitModel();
+    bst->eval_str = bst->EvalOneIter(iter, mats, names);
+    return bst->eval_str.c_str();
+  }
+  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) {
+    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
+  }
+  void XGBoosterLoadModel(void *handle, const char *fname) {
+    static_cast<Booster*>(handle)->LoadModel(fname);
+  }
+  void XGBoosterSaveModel(const void *handle, const char *fname) {
+    static_cast<const Booster*>(handle)->SaveModel(fname);
+  }
+  const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
+    utils::FeatMap featmap;
+    if (strlen(fmap) != 0) {
+      featmap.LoadText(fmap);
+    }
+    return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len);
+  }
+};
--- a/wrapper/xgboost_wrapper.h
+++ b/wrapper/xgboost_wrapper.h
@@ -0,0 +1,171 @@
+#ifndef XGBOOST_WRAPPER_H_
+#define XGBOOST_WRAPPER_H_
+/*!
+ * \file xgboost_wrapperh
+ * \author Tianqi Chen
+ * \brief a C style wrapper of xgboost
+ *  can be used to create wrapper of other languages
+ */
+#include <cstdio>
+
+extern "C" {
+  /*!
+   * \brief load a data matrix 
+   * \return a loaded data matrix
+   */
+  void* XGDMatrixCreateFromFile(const char *fname, int silent);
+  /*! 
+   * \brief create a matrix content from csr format
+   * \param handle a instance of data matrix
+   * \param indptr pointer to row headers
+   * \param indices findex
+   * \param data fvalue
+   * \param nindptr number of rows in the matix + 1 
+   * \param nelem number of nonzero elements in the matrix
+   * \return created dmatrix
+   */
+  void* XGDMatrixCreateFromCSR(const size_t *indptr,
+                               const unsigned *indices,
+                               const float *data,
+                               size_t nindptr,
+                               size_t nelem);
+  /*!
+   * \brief create matrix content from dense matrix
+   * \param handle a instance of data matrix
+   * \param data pointer to the data space
+   * \param nrow number of rows
+   * \param ncol number columns
+   * \param missing which value to represent missing value
+   * \return created dmatrix
+   */
+  void* XGDMatrixCreateFromMat(const float *data,
+                               size_t nrow,
+                               size_t ncol,
+                               float  missing);
+  /*!
+   * \brief create a new dmatrix from sliced content of existing matrix
+   * \param handle instance of data matrix to be sliced
+   * \param idxset index set
+   * \param len length of index set
+   * \return a sliced new matrix
+   */
+  void* XGDMatrixSliceDMatrix(void *handle,
+                              const int *idxset,
+                              size_t len);
+  /*!
+   * \brief free space in data matrix
+   */
+  void XGDMatrixFree(void *handle);
+  /*!
+   * \brief load a data matrix into binary file
+   * \param handle a instance of data matrix
+   * \param fname file name
+   * \param silent print statistics when saving
+   */
+  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
+  /*!
+   * \brief set float vector to a content in info
+   * \param handle a instance of data matrix
+   * \param field field name, can be label, weight
+   * \param array pointer to float vector
+   * \param len length of array
+   */
+  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len);
+  /*!
+   * \brief set label of the training matrix
+   * \param handle a instance of data matrix
+   * \param group pointer to group size
+   * \param len length of array
+   */
+  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
+  /*!
+   * \brief get float info vector from matrix
+   * \param handle a instance of data matrix
+   * \param len used to set result length
+   * \param field field name
+   * \return pointer to the label
+   */
+  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
+  /*!
+   * \brief return number of rows
+   */
+  size_t XGDMatrixNumRow(const void *handle);
+  // --- start XGBoost class
+  /*! 
+   * \brief create xgboost learner 
+   * \param dmats matrices that are set to be cached
+   * \param len length of dmats
+   */
+  void *XGBoosterCreate(void* dmats[], size_t len);
+  /*! 
+   * \brief free obj in handle 
+   * \param handle handle to be freed
+   */
+  void XGBoosterFree(void* handle);
+  /*! 
+   * \brief set parameters 
+   * \param handle handle
+   * \param name  parameter name
+   * \param val value of parameter
+   */    
+  void XGBoosterSetParam(void *handle, const char *name, const char *value);
+  /*! 
+   * \brief update the model in one round using dtrain
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dtrain training data
+   */
+  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
+  /*!
+   * \brief update the model, by directly specify gradient and second order gradient,
+   *        this can be used to replace UpdateOneIter, to support customized loss function
+   * \param handle handle
+   * \param dtrain training data
+   * \param grad gradient statistics
+   * \param hess second order gradient statistics
+   * \param len length of grad/hess array
+   */
+  void XGBoosterBoostOneIter(void *handle, void *dtrain,
+                             float *grad, float *hess, size_t len);
+  /*!
+   * \brief get evaluation statistics for xgboost
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dmats pointers to data to be evaluated
+   * \param evnames pointers to names of each data
+   * \param len length of dmats
+   * \return the string containing evaluation stati
+   */
+  const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
+                                   const char *evnames[], size_t len);
+  /*!
+   * \brief make prediction based on dmat
+   * \param handle handle
+   * \param dmat data matrix
+   * \param output_margin whether only output raw margin value
+   * \param len used to store length of returning result
+   */
+  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len);
+  /*!
+   * \brief load model from existing file
+   * \param handle handle
+   * \param fname file name
+   */
+  void XGBoosterLoadModel(void *handle, const char *fname);
+  /*!
+   * \brief save model into existing file
+   * \param handle handle
+   * \param fname file name
+   */
+  void XGBoosterSaveModel(const void *handle, const char *fname);
+  /*!
+   * \brief dump model, return array of strings representing model dump
+   * \param handle handle
+   * \param fmap  name to fmap can be empty string
+   * \param out_len length of output array
+   * \return char *data[], representing dump of each model
+   */
+  const char **XGBoosterDumpModel(void *handle, const char *fmap,
+                                  size_t *out_len);
+};
+#endif  // XGBOOST_WRAPPER_H_