Merge pull request #1 from tqchen/master

updating fork to current master
2014-08-27 20:17:44 +02:00
parent 40483e6dc3 0fe5470a4f
commit ce1803a40c
60 changed files with 10097 additions and 352 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -16,7 +16,6 @@
 *conf
 *buffer
 *model
-xgboost
 *pyc
 *train
 *test
@@ -24,3 +23,20 @@ xgboost
 *rar
 *vali
 *data
+*sdf
+Release
+*exe*
+*exp
+ipch
+*.filters
+*.user
+*log
+Debug
+*suo
+*test*
+.Rhistory
+*.dll
+*i386
+*x64
+*dump
+*save
--- a/14
+++ b/14
@@ -3,18 +3,21 @@ export CXX = g++
 export LDFLAGS= -pthread -lm 
 # note for R module
 # add include path to Rinternals.h here
-export CPLUS_INCLUDE_PATH=/usr/share/R/include

 ifeq ($(no_omp),1)
 	export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -DDISABLE_OPENMP 
 else
-	export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fopenmp
+	export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp
 endif

+# expose these flags to R CMD SHLIB
+export PKG_CPPFLAGS = $(CFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_
+
 # specify tensor path
 BIN = xgboost
 OBJ = 
-SLIB = wrapper/libxgboostwrapper.so wrapper/libxgboostR.so
+SLIB = wrapper/libxgboostwrapper.so 
+RLIB = wrapper/libxgboostR.so 
 .PHONY: clean all R

 all: $(BIN) wrapper/libxgboostwrapper.so
@@ -31,6 +34,9 @@ $(BIN) :
 $(SLIB) :
 	$(CXX) $(CFLAGS) -fPIC $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)

+$(RLIB) :
+	R CMD SHLIB -c -o $@ $(filter %.cpp %.o %.c, $^)
+
 $(OBJ) : 
 	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )

@@ -38,4 +44,4 @@ install:
 	cp -f -r $(BIN)  $(INSTALL_PATH)

 clean:
-	$(RM) $(OBJ) $(BIN) $(SLIB) *~ */*~ */*/*~
+	$(RM) $(OBJ) $(BIN) $(SLIB) $(RLIB) *~ */*~ */*/*~
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -0,0 +1,16 @@
+Package: xgboost
+Type: Package
+Title: R wrapper of xgboost
+Version: 0.3-0
+Date: 2014-08-23
+Author: Tianqi Chen
+Maintainer: Tianqi Chen <tianqi.tchen@gmail.com>
+Description: xgboost
+License: See LICENSE file
+URL: https://github.com/tqchen/xgboost
+BugReports: https://github.com/tqchen/xgboost/issues
+Depends:
+    R (>= 2.0.2)
+Imports:
+    Matrix (>= 1.1-0),
+    methods
--- a/R-package/LICENSE
+++ b/R-package/LICENSE
@@ -0,0 +1,13 @@
+Copyright (c) 2014 by Tianqi Chen and Contributors 
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -0,0 +1,10 @@
+importClassesFrom("Matrix", dgCMatrix, dgeMatrix)
+
+export(xgboost)
+export(xgb.DMatrix)
+export(xgb.getinfo)
+exportMethods(predict)
+export(xgb.train)
+export(xgb.save)
+export(xgb.load)
+export(xgb.dump)
--- a/R-package/R/predict.xgb.Booster.R
+++ b/R-package/R/predict.xgb.Booster.R
@@ -0,0 +1,16 @@
+#' @export
+setClass("xgb.Booster")
+
+#' @export
+setMethod("predict",
+          signature = "xgb.Booster",
+          definition = function(object, newdata, outputmargin = FALSE)
+          {
+              if (class(newdata) != "xgb.DMatrix") {
+                  newdata = xgb.DMatrix(newdata)
+              }
+              ret <- .Call("XGBoosterPredict_R", object, newdata, 
+                           as.integer(outputmargin), PACKAGE="xgboost")
+              return(ret)
+          })
+
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -0,0 +1,128 @@
+# depends on matrix
+.onLoad <- function(libname, pkgname) {
+    library.dynam("xgboost", pkgname, libname);
+}
+.onUnload <- function(libpath) {
+    library.dynam.unload("xgboost", libpath);
+}
+
+# set information into dmatrix, this mutate dmatrix
+xgb.setinfo <- function(dmat, name, info) {
+    if (class(dmat) != "xgb.DMatrix") {
+        stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
+    }
+    if (name == "label") {
+        .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    if (name == "weight") {
+        .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    if (name == "base_margin") {
+        .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    if (name == "group") {
+        .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    stop(paste("xgb.setinfo: unknown info name", name))
+    return(FALSE)
+}
+
+# construct a Booster from cachelist
+xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
+    if (typeof(cachelist) != "list") {
+        stop("xgb.Booster: only accepts list of DMatrix as cachelist")
+    }
+    for (dm in cachelist) {
+        if (class(dm) != "xgb.DMatrix") {
+            stop("xgb.Booster: only accepts list of DMatrix as cachelist")
+        }
+    }
+    handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
+    .Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
+    if (length(params) != 0) {
+        for (i in 1:length(params)) {
+            p <- params[i]
+            .Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
+        }
+    }
+    if (!is.null(modelfile)) {
+        if (typeof(modelfile) != "character"){
+            stop("xgb.Booster: modelfile must be character");
+        }
+        .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
+    }
+    return(structure(handle, class="xgb.Booster"))
+}
+
+
+# predict, depreciated
+xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.predict: first argument must be type xgb.Booster")
+    }
+    if (class(dmat) != "xgb.DMatrix") {
+        stop("xgb.predict: second argument must be type xgb.DMatrix")
+    }
+    ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
+    return(ret)
+}
+
+##--------------------------------------
+# the following are low level iteratively function, not needed
+# if you do not want to use them
+#---------------------------------------
+
+# iteratively update booster with dtrain
+xgb.iter.update <- function(booster, dtrain, iter) {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.iter.update: first argument must be type xgb.Booster")
+    }
+    if (class(dtrain) != "xgb.DMatrix") {
+        stop("xgb.iter.update: second argument must be type xgb.DMatrix")
+    }
+    .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
+    return(TRUE)
+}
+
+# iteratively update booster with customized statistics
+xgb.iter.boost <- function(booster, dtrain, gpair) {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.iter.update: first argument must be type xgb.Booster")
+    }
+    if (class(dtrain) != "xgb.DMatrix") {
+        stop("xgb.iter.update: second argument must be type xgb.DMatrix")
+    }
+    .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
+    return(TRUE)
+}
+
+# iteratively evaluate one iteration
+xgb.iter.eval <- function(booster, watchlist, iter) {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.eval: first argument must be type xgb.Booster")
+    }
+    if (typeof(watchlist) != "list") {
+        stop("xgb.eval: only accepts list of DMatrix as watchlist")
+    }
+    for (w in watchlist) {
+        if (class(w) != "xgb.DMatrix") {
+            stop("xgb.eval: watch list can only contain xgb.DMatrix")
+        }
+    }
+    evnames <- list()
+    if (length(watchlist) != 0) {
+        for (i in 1:length(watchlist)) {
+            w <- watchlist[i]
+            if (length(names(w)) == 0) {
+                stop("xgb.eval: name tag must be presented for every elements in watchlist")
+            }
+            evnames <- append(evnames, names(w))     
+        }
+    }
+    msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
+    return(msg)
+}
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -0,0 +1,22 @@
+# constructing DMatrix
+xgb.DMatrix <- function(data, missing=0.0, ...) {
+    if (typeof(data) == "character") {
+        handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
+    } else if(is.matrix(data)) {
+        handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
+    } else if(class(data) == "dgCMatrix") {
+        handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
+    } else {
+        stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
+    }
+    dmat <- structure(handle, class="xgb.DMatrix")
+    
+    info = list(...)
+    if (length(info)==0)
+        return(dmat)
+    for (i in 1:length(info)) {
+        p = info[i]
+        xgb.setinfo(dmat, names(p), p[[1]])
+    }
+    return(dmat)
+}
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@@ -0,0 +1,11 @@
+# dump model
+xgb.dump <- function(booster, fname, fmap = "") {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.dump: first argument must be type xgb.Booster")
+    }
+    if (typeof(fname) != "character"){
+        stop("xgb.dump: second argument must be type character")
+    }
+    .Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
+    return(TRUE)
+}
--- a/R-package/R/xgb.getinfo.R
+++ b/R-package/R/xgb.getinfo.R
@@ -0,0 +1,16 @@
+# get information from dmatrix
+xgb.getinfo <- function(dmat, name) {
+    if (typeof(name) != "character") {
+        stop("xgb.getinfo: name must be character")
+    }
+    if (class(dmat) != "xgb.DMatrix") {
+        stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
+    }
+    if (name != "label" &&
+            name != "weight" &&
+            name != "base_margin" ) {
+        stop(paste("xgb.getinfo: unknown info name", name))
+    }
+    ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
+    return(ret)
+}
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -0,0 +1,5 @@
+xgb.load <- function(modelfile) {
+    if (is.null(modelfile))
+        stop('xgb.load: modelfile cannot be NULL')
+    xgb.Booster(modelfile=modelfile)
+}
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@@ -0,0 +1,16 @@
+# save model or DMatrix to file 
+xgb.save <- function(handle, fname) {
+    if (typeof(fname) != "character") {
+        stop("xgb.save: fname must be character")
+    }
+    if (class(handle) == "xgb.Booster") {
+        .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
+        return(TRUE)
+    }
+    if (class(handle) == "xgb.DMatrix") {
+        .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
+    return(FALSE)
+}
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -0,0 +1,38 @@
+# train a model using given parameters
+xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
+    if (typeof(params) != "list") {
+        stop("xgb.train: first argument params must be list");
+    }
+    if (class(dtrain) != "xgb.DMatrix") {
+        stop("xgb.train: second argument dtrain must be xgb.DMatrix");
+    }
+    bst <- xgb.Booster(params, append(watchlist,dtrain))
+    for (i in 1:nrounds) {
+        if (is.null(obj)) {
+            succ <- xgb.iter.update(bst, dtrain, i-1)
+        } else {
+            pred <- xgb.predict(bst, dtrain)
+            gpair <- obj(pred, dtrain)
+            succ <- xgb.iter.boost(bst, dtrain, gpair)
+        }
+        if (length(watchlist) != 0) {
+            if (is.null(feval)) {      
+                msg <- xgb.iter.eval(bst, watchlist, i-1)
+                cat(msg); cat("\n")
+            } else {
+                cat("["); cat(i); cat("]");
+                for (j in 1:length(watchlist)) {
+                    w <- watchlist[j]
+                    if (length(names(w)) == 0) {
+                        stop("xgb.eval: name tag must be presented for every elements in watchlist")
+                    }
+                    ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
+                    cat("\t"); cat(names(w)); cat("-"); cat(ret$metric); 
+                    cat(":"); cat(ret$value)
+                }
+                cat("\n")        
+            }
+        }
+    }
+    return(bst)
+}
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -0,0 +1,49 @@
+# Main function for xgboost-package
+
+xgboost = function(x=NULL,y=NULL,DMatrix=NULL, file=NULL, validation=NULL, 
+                   nrounds=10, obj=NULL, feval=NULL, margin=NULL, verbose = T, ...)
+{
+    if (!is.null(DMatrix))
+        dtrain = DMatrix
+    else
+    {
+        if (is.null(x) && is.null(y))
+        {
+            if (is.null(file))
+                stop('xgboost need input data, either R objects, local files or DMatrix object.')
+            dtrain = xgb.DMatrix(file)
+        }
+        else
+            dtrain = xgb.DMatrix(x, label=y)
+        if (!is.null(margin))
+        {
+            succ <- xgb.setinfo(dtrain, "base_margin", margin)
+            if (!succ)
+                warning('Attemp to use margin failed.')
+        }
+    }
+    
+    params = list(...)
+    
+    watchlist=list()
+    if (verbose)
+    {
+        if (!is.null(validation))
+        {
+            if (class(validation)!='xgb.DMatrix')
+                dtest = xgb.DMatrix(validation)
+            else
+                dtest = validation
+            watchlist = list(eval=dtest,train=dtrain)
+        }
+            
+        else
+            watchlist = list(train=dtrain)
+    }
+    
+    bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval)
+    
+    return(bst)
+}
+
+
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -0,0 +1,10 @@
+This is subfolder for experimental version of R package.
+
+Not yet ready.
+
+Installation:
+
+```r
+require(devtools)
+install_github('xgboost','tqchen',subdir='R-package')
+```
--- a/R-package/inst/examples/agaricus.txt.test
+++ b/R-package/inst/examples/agaricus.txt.test
--- a/R-package/inst/examples/agaricus.txt.train
+++ b/R-package/inst/examples/agaricus.txt.train
--- a/R-package/inst/examples/demo-new.R
+++ b/R-package/inst/examples/demo-new.R
@@ -0,0 +1,133 @@
+require(xgboost)
+require(methods)
+
+# helper function to read libsvm format
+# this is very badly written, load in dense, and convert to sparse
+# use this only for demo purpose
+# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
+read.libsvm = function(fname, maxcol) {
+    content = readLines(fname)
+    nline = length(content)
+    label = numeric(nline)
+    mat = matrix(0, nline, maxcol+1)
+    for (i in 1:nline) {
+        arr = as.vector(strsplit(content[i], " ")[[1]])
+        label[i] = as.numeric(arr[[1]])
+        for (j in 2:length(arr)) {
+            kv = strsplit(arr[j], ":")[[1]]
+            # to avoid 0 index
+            findex = as.integer(kv[1]) + 1
+            fvalue = as.numeric(kv[2])
+            mat[i,findex] = fvalue
+        }
+    }
+    mat = as(mat, "sparseMatrix")
+    return(list(label=label, data=mat))
+}
+
+############################
+# Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.
+############################
+
+# Directly read in local file
+dtrain = xgb.DMatrix('agaricus.txt.train')
+class(dtrain)
+
+# read file in R
+csc = read.libsvm("agaricus.txt.train", 126)
+y = csc$label
+x = csc$data
+
+# x as Sparse Matrix
+class(x)
+dtrain = xgb.DMatrix(x, label=y)
+
+# x as dense matrix
+dense.x = as.matrix(x)
+dtrain = xgb.DMatrix(dense.x, label=y)
+
+############################
+# Test xgboost with local file, sparse matrix and dense matrix in R.
+############################
+
+# Test with DMatrix object
+bst = xgboost(DMatrix=dtrain, max_depth=2, eta=1, silent=1, objective='binary:logistic')
+
+# Test with local file
+bst = xgboost(file='agaricus.txt.train', max_depth=2, eta=1, silent=1, objective='binary:logistic')
+
+# Test with Sparse Matrix
+bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic')
+
+# Test with dense Matrix
+bst = xgboost(x = dense.x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic')
+
+# Test with validation set
+bst = xgboost(file='agaricus.txt.train', validation='agaricus.txt.test', 
+              max_depth=2, eta=1, silent=1, objective='binary:logistic')
+
+############################
+# Test predict
+############################
+
+# Prediction with DMatrix object
+dtest = xgb.DMatrix('agaricus.txt.test')
+pred = predict(bst, dtest)
+
+# Prediction with local test file
+pred = predict(bst, 'agaricus.txt.test')
+
+# Prediction with Sparse Matrix
+csc = read.libsvm("agaricus.txt.test", 126)
+test.y = csc$label
+test.x = csc$data
+pred = predict(bst, test.x)
+
+# Extrac label with xgb.getinfo
+labels = xgb.getinfo(dtest, "label")
+err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
+print(paste("error=",err))
+
+############################
+# Save and load model to hard disk
+############################
+
+# save model to binary local file
+xgb.save(bst, 'model.save')
+
+# load binary model to R
+bst = xgb.load('model.save')
+pred = predict(bst, test.x)
+
+# save model to text file
+xgb.dump(bst, 'model.dump')
+
+############################
+# Customized objective and evaluation function 
+############################
+
+# user define objective function, given prediction, return gradient and second order gradient
+# this is loglikelihood loss
+logregobj = function(preds, dtrain) {
+    labels = xgb.getinfo(dtrain, "label")
+    preds = 1.0 / (1.0 + exp(-preds))
+    grad = preds - labels
+    hess = preds * (1.0-preds)
+    return(list(grad=grad, hess=hess))
+}
+# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
+# NOTE: when you do customized loss function, the default prediction value is margin
+# this may make buildin evalution metric not function properly
+# for example, we are doing logistic loss, the prediction is score before logistic transformation
+# the buildin evaluation error assumes input is after logistic transformation
+# Take this in mind when you use the customization, and maybe you need write customized evaluation function
+evalerror = function(preds, dtrain) {
+    labels = xgb.getinfo(dtrain, "label")
+    err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
+    return(list(metric="error", value=err))
+}
+
+bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic',
+              obj=logregobj, feval=evalerror)
+
+
--- a/R-package/inst/examples/demo-old.R
+++ b/R-package/inst/examples/demo-old.R
@@ -0,0 +1,127 @@
+# load xgboost library
+require(xgboost)
+require(methods)
+
+# helper function to read libsvm format
+# this is very badly written, load in dense, and convert to sparse
+# use this only for demo purpose
+# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
+read.libsvm <- function(fname, maxcol) {
+  content <- readLines(fname)
+  nline <- length(content)
+  label <- numeric(nline)
+  mat <- matrix(0, nline, maxcol+1)
+  for (i in 1:nline) {
+    arr <- as.vector(strsplit(content[i], " ")[[1]])
+    label[i] <- as.numeric(arr[[1]])
+    for (j in 2:length(arr)) {
+      kv <- strsplit(arr[j], ":")[[1]]
+      # to avoid 0 index
+      findex <- as.integer(kv[1]) + 1
+      fvalue <- as.numeric(kv[2])
+      mat[i,findex] <- fvalue
+    }
+  }
+  mat <- as(mat, "sparseMatrix")
+  return(list(label=label, data=mat))
+}
+
+# test code here
+dtrain <- xgb.DMatrix("agaricus.txt.train")
+dtest <- xgb.DMatrix("agaricus.txt.test")
+param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
+watchlist <- list("eval"=dtest,"train"=dtrain)
+# training xgboost model
+bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+# make prediction
+preds <- xgb.predict(bst, dtest)
+labels <- xgb.getinfo(dtest, "label")
+err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
+# print error rate
+print(paste("error=",err))
+
+# dump model
+xgb.dump(bst, "dump.raw.txt")
+# dump model with feature map
+xgb.dump(bst, "dump.nice.txt", "featmap.txt")
+
+# save dmatrix into binary buffer
+succ <- xgb.save(dtest, "dtest.buffer")
+# save model into file
+succ <- xgb.save(bst, "xgb.model")
+# load model and data in 
+bst2 <- xgb.Booster(modelfile="xgb.model")
+dtest2 <- xgb.DMatrix("dtest.buffer")
+preds2 <- xgb.predict(bst2, dtest2)
+# assert they are the same
+stopifnot(sum(abs(preds2-preds)) == 0)
+
+###
+# build dmatrix from sparseMatrix
+###
+print ('start running example of build DMatrix from R.sparseMatrix')
+csc <- read.libsvm("agaricus.txt.train", 126)
+label <- csc$label
+data <- csc$data
+dtrain <- xgb.DMatrix(data, info=list(label=label) )
+watchlist <- list("eval"=dtest,"train"=dtrain)
+bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+
+###
+# build dmatrix from dense matrix
+###
+print ('start running example of build DMatrix from R.Matrix')
+mat = as.matrix(data)
+dtrain <- xgb.DMatrix(mat, info=list(label=label) )
+watchlist <- list("eval"=dtest,"train"=dtrain)
+bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+
+###
+# advanced: cutomsized loss function
+# 
+print("start running example to used cutomized objective function")
+# note: for customized objective function, we leave objective as default
+# note: what we are getting is margin value in prediction
+# you must know what you are doing
+param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
+# user define objective function, given prediction, return gradient and second order gradient
+# this is loglikelihood loss
+logregobj <- function(preds, dtrain) {
+  labels <- xgb.getinfo(dtrain, "label")
+  preds <- 1.0 / (1.0 + exp(-preds))
+  grad <- preds - labels
+  hess <- preds * (1.0-preds)
+  return(list(grad=grad, hess=hess))
+}
+# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
+# NOTE: when you do customized loss function, the default prediction value is margin
+# this may make buildin evalution metric not function properly
+# for example, we are doing logistic loss, the prediction is score before logistic transformation
+# the buildin evaluation error assumes input is after logistic transformation
+# Take this in mind when you use the customization, and maybe you need write customized evaluation function
+evalerror <- function(preds, dtrain) {
+  labels <- xgb.getinfo(dtrain, "label")
+  err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
+  return(list(metric="error", value=err))
+}
+
+# training with customized objective, we can also do step by step training
+# simply look at xgboost.py"s implementation of train
+bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
+
+###
+# advanced: start from a initial base prediction
+#
+print ("start running example to start from a initial prediction")
+# specify parameters via map, definition are same as c++ version
+param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
+# train xgboost for 1 round
+bst <- xgb.train( param, dtrain, 1, watchlist )
+# Note: we need the margin value instead of transformed prediction in set_base_margin
+# do predict with output_margin=True, will always give you margin values before logistic transformation
+ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
+ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
+succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
+succ <- xgb.setinfo(dtest, "base_margin", ptest)
+print ("this is result of running from initial prediction")
+bst <- xgb.train( param, dtrain, 1, watchlist )
--- a/R-package/inst/examples/demo.R
+++ b/R-package/inst/examples/demo.R
@@ -0,0 +1,103 @@
+require(xgboost)
+require(methods)
+
+# helper function to read libsvm format
+# this is very badly written, load in dense, and convert to sparse
+# use this only for demo purpose
+# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
+read.libsvm = function(fname, maxcol) {
+    content = readLines(fname)
+    nline = length(content)
+    label = numeric(nline)
+    mat = matrix(0, nline, maxcol+1)
+    for (i in 1:nline) {
+        arr = as.vector(strsplit(content[i], " ")[[1]])
+        label[i] = as.numeric(arr[[1]])
+        for (j in 2:length(arr)) {
+            kv = strsplit(arr[j], ":")[[1]]
+            # to avoid 0 index
+            findex = as.integer(kv[1]) + 1
+            fvalue = as.numeric(kv[2])
+            mat[i,findex] = fvalue
+        }
+    }
+    mat = as(mat, "sparseMatrix")
+    return(list(label=label, data=mat))
+}
+
+# Parameter setting
+dtrain <- xgb.DMatrix("agaricus.txt.train")
+dtest <- xgb.DMatrix("agaricus.txt.test")
+param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
+watchlist = list("eval"=dtest,"train"=dtrain)
+
+###########################
+# Train from local file
+###########################
+
+# Training
+bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist)
+# Prediction
+pred = predict(bst, 'agaricus.txt.test')
+# Performance
+labels = xgb.getinfo(dtest, "label")
+err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
+print(paste("error=",err))
+
+###########################
+# Train from R object
+###########################
+
+csc = read.libsvm("agaricus.txt.train", 126)
+y = csc$label
+x = csc$data
+# x as Sparse Matrix
+class(x)
+
+# Training
+bst = xgboost(x,y,params=param,watchlist=watchlist)
+# Prediction
+pred = predict(bst, 'agaricus.txt.test')
+# Performance
+labels = xgb.getinfo(dtest, "label")
+err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
+print(paste("error=",err))
+
+# Training with dense matrix
+x = as.matrix(x)
+bst = xgboost(x,y,params=param,watchlist=watchlist)
+
+###########################
+# Train with customization
+###########################
+
+# user define objective function, given prediction, return gradient and second order gradient
+# this is loglikelihood loss
+logregobj = function(preds, dtrain) {
+    labels = xgb.getinfo(dtrain, "label")
+    preds = 1.0 / (1.0 + exp(-preds))
+    grad = preds - labels
+    hess = preds * (1.0-preds)
+    return(list(grad=grad, hess=hess))
+}
+# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
+# NOTE: when you do customized loss function, the default prediction value is margin
+# this may make buildin evalution metric not function properly
+# for example, we are doing logistic loss, the prediction is score before logistic transformation
+# the buildin evaluation error assumes input is after logistic transformation
+# Take this in mind when you use the customization, and maybe you need write customized evaluation function
+evalerror = function(preds, dtrain) {
+    labels = xgb.getinfo(dtrain, "label")
+    err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
+    return(list(metric="error", value=err))
+}
+
+bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror)
+
+############################
+# Train with previous result
+############################
+
+bst = xgboost(x,y,params=param,watchlist=watchlist)
+pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE)
+bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)
--- a/R-package/inst/examples/featmap.txt
+++ b/R-package/inst/examples/featmap.txt
@@ -0,0 +1,126 @@
+0	cap-shape=bell	i
+1	cap-shape=conical	i
+2	cap-shape=convex	i
+3	cap-shape=flat	i
+4	cap-shape=knobbed	i
+5	cap-shape=sunken	i
+6	cap-surface=fibrous	i
+7	cap-surface=grooves	i
+8	cap-surface=scaly	i
+9	cap-surface=smooth	i
+10	cap-color=brown	i
+11	cap-color=buff	i
+12	cap-color=cinnamon	i
+13	cap-color=gray	i
+14	cap-color=green	i
+15	cap-color=pink	i
+16	cap-color=purple	i
+17	cap-color=red	i
+18	cap-color=white	i
+19	cap-color=yellow	i
+20	bruises?=bruises	i
+21	bruises?=no	i
+22	odor=almond	i
+23	odor=anise	i
+24	odor=creosote	i
+25	odor=fishy	i
+26	odor=foul	i
+27	odor=musty	i
+28	odor=none	i
+29	odor=pungent	i
+30	odor=spicy	i
+31	gill-attachment=attached	i
+32	gill-attachment=descending	i
+33	gill-attachment=free	i
+34	gill-attachment=notched	i
+35	gill-spacing=close	i
+36	gill-spacing=crowded	i
+37	gill-spacing=distant	i
+38	gill-size=broad	i
+39	gill-size=narrow	i
+40	gill-color=black	i
+41	gill-color=brown	i
+42	gill-color=buff	i
+43	gill-color=chocolate	i
+44	gill-color=gray	i
+45	gill-color=green	i
+46	gill-color=orange	i
+47	gill-color=pink	i
+48	gill-color=purple	i
+49	gill-color=red	i
+50	gill-color=white	i
+51	gill-color=yellow	i
+52	stalk-shape=enlarging	i
+53	stalk-shape=tapering	i
+54	stalk-root=bulbous	i
+55	stalk-root=club	i
+56	stalk-root=cup	i
+57	stalk-root=equal	i
+58	stalk-root=rhizomorphs	i
+59	stalk-root=rooted	i
+60	stalk-root=missing	i
+61	stalk-surface-above-ring=fibrous	i
+62	stalk-surface-above-ring=scaly	i
+63	stalk-surface-above-ring=silky	i
+64	stalk-surface-above-ring=smooth	i
+65	stalk-surface-below-ring=fibrous	i
+66	stalk-surface-below-ring=scaly	i
+67	stalk-surface-below-ring=silky	i
+68	stalk-surface-below-ring=smooth	i
+69	stalk-color-above-ring=brown	i
+70	stalk-color-above-ring=buff	i
+71	stalk-color-above-ring=cinnamon	i
+72	stalk-color-above-ring=gray	i
+73	stalk-color-above-ring=orange	i
+74	stalk-color-above-ring=pink	i
+75	stalk-color-above-ring=red	i
+76	stalk-color-above-ring=white	i
+77	stalk-color-above-ring=yellow	i
+78	stalk-color-below-ring=brown	i
+79	stalk-color-below-ring=buff	i
+80	stalk-color-below-ring=cinnamon	i
+81	stalk-color-below-ring=gray	i
+82	stalk-color-below-ring=orange	i
+83	stalk-color-below-ring=pink	i
+84	stalk-color-below-ring=red	i
+85	stalk-color-below-ring=white	i
+86	stalk-color-below-ring=yellow	i
+87	veil-type=partial	i
+88	veil-type=universal	i
+89	veil-color=brown	i
+90	veil-color=orange	i
+91	veil-color=white	i
+92	veil-color=yellow	i
+93	ring-number=none	i
+94	ring-number=one	i
+95	ring-number=two	i
+96	ring-type=cobwebby	i
+97	ring-type=evanescent	i
+98	ring-type=flaring	i
+99	ring-type=large	i
+100	ring-type=none	i
+101	ring-type=pendant	i
+102	ring-type=sheathing	i
+103	ring-type=zone	i
+104	spore-print-color=black	i
+105	spore-print-color=brown	i
+106	spore-print-color=buff	i
+107	spore-print-color=chocolate	i
+108	spore-print-color=green	i
+109	spore-print-color=orange	i
+110	spore-print-color=purple	i
+111	spore-print-color=white	i
+112	spore-print-color=yellow	i
+113	population=abundant	i
+114	population=clustered	i
+115	population=numerous	i
+116	population=scattered	i
+117	population=several	i
+118	population=solitary	i
+119	habitat=grasses	i
+120	habitat=leaves	i
+121	habitat=meadows	i
+122	habitat=paths	i
+123	habitat=urban	i
+124	habitat=waste	i
+125	habitat=woods	i
--- a/R-package/src/Makevars
+++ b/R-package/src/Makevars
@@ -0,0 +1,28 @@
+# _*_ mode: Makefile; _*_
+export CC  = gcc
+export CXX = g++
+
+# expose these flags to R CMD SHLIB
+PKG_CPPFLAGS = -O3 -Wno-unknown-pragmas  -DXGBOOST_CUSTOMIZE_ERROR_  -fPIC  $(SHLIB_OPENMP_CFLAGS)
+PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
+
+ifeq ($(no_omp),1)
+	PKG_CPPFLAGS += -DDISABLE_OPENMP 
+endif
+
+CXXOBJ= xgboost_wrapper.o xgboost_io.o
+OBJECTS= xgboost_R.o $(CXXOBJ)
+
+.PHONY: all clean
+all: $(SHLIB)
+$(SHLIB): $(OBJECTS)
+
+xgboost_wrapper.o: ../../wrapper/xgboost_wrapper.cpp
+xgboost_io.o: ../../src/io/io.cpp
+
+$(CXXOBJ) : 
+	$(CXX) -c $(PKG_CPPFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
+
+clean:
+	rm -rf *.so *.o *~ *.dll
+
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -0,0 +1,32 @@
+# _*_ mode: Makefile; _*_
+export CC  = gcc
+export CXX = g++
+
+# expose these flags to R CMD SHLIB
+PKG_CPPFLAGS = -O3 -Wno-unknown-pragmas  -DXGBOOST_CUSTOMIZE_ERROR_ -fopenmp -fPIC  $(SHLIB_OPENMP_CFLAGS)
+PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
+
+# add flag to build native code even in cross compiler
+ifeq "$(WIN)" "64"
+	PKG_CPPFLAGS += -m64
+endif
+
+ifeq ($(no_omp),1)
+	PKG_CPPFLAGS += -DDISABLE_OPENMP 
+endif
+
+CXXOBJ= xgboost_wrapper.o xgboost_io.o
+OBJECTS= xgboost_R.o $(CXXOBJ)
+
+.PHONY: all clean
+all: $(SHLIB)
+$(SHLIB): $(OBJECTS)
+
+xgboost_wrapper.o: ../../wrapper/xgboost_wrapper.cpp
+xgboost_io.o: ../../src/io/io.cpp
+
+$(CXXOBJ) : 
+	$(CXX) -c $(PKG_CPPFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
+
+clean:
+	rm -rf *.so *.o *~ *.dll
--- a/R-package/src/xgboost_R.cpp
+++ b/R-package/src/xgboost_R.cpp
@@ -0,0 +1,221 @@
+#include <vector>
+#include <string>
+#include <utility>
+#include <cstring>
+#include "xgboost_R.h"
+#include "../../wrapper/xgboost_wrapper.h"
+#include "../../src/utils/utils.h"
+#include "../../src/utils/omp.h"
+#include "../../src/utils/matrix_csr.h"
+
+using namespace xgboost;
+// implements error handling
+namespace xgboost {
+namespace utils {
+void HandleAssertError(const char *msg) {
+  error("%s", msg);
+}
+void HandleCheckError(const char *msg) {
+  error("%s", msg);
+}
+}  // namespace utils
+}  // namespace xgboost
+
+extern "C" {
+  void _DMatrixFinalizer(SEXP ext) {    
+    if (R_ExternalPtrAddr(ext) == NULL) return;
+    XGDMatrixFree(R_ExternalPtrAddr(ext));
+    R_ClearExternalPtr(ext);
+  }
+  SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
+    void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
+    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
+    UNPROTECT(1);
+    return ret;
+  }
+  SEXP XGDMatrixCreateFromMat_R(SEXP mat, 
+                                SEXP missing) {
+    SEXP dim = getAttrib(mat, R_DimSymbol);
+    int nrow = INTEGER(dim)[0];
+    int ncol = INTEGER(dim)[1];    
+    double *din = REAL(mat);
+    std::vector<float> data(nrow * ncol);
+    #pragma omp parallel for schedule(static)
+    for (int i = 0; i < nrow; ++i) {
+      for (int j = 0; j < ncol; ++j) {
+        data[i * ncol +j] = din[i + nrow * j];
+      }
+    }
+    void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing));
+    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
+    UNPROTECT(1);
+    return ret;    
+  }
+  SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
+                                SEXP indices,
+                                SEXP data) {
+    const int *col_ptr = INTEGER(indptr);
+    const int *row_index = INTEGER(indices);
+    const double *col_data = REAL(data);
+    int ncol = length(indptr) - 1;
+    int ndata = length(data);
+    // transform into CSR format
+    std::vector<bst_ulong> row_ptr;
+    std::vector< std::pair<unsigned, float> > csr_data;
+    utils::SparseCSRMBuilder<std::pair<unsigned,float>, false, bst_ulong> builder(row_ptr, csr_data);
+    builder.InitBudget();
+    for (int i = 0; i < ncol; ++i) {
+      for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
+        builder.AddBudget(row_index[j]);
+      }
+    }
+    builder.InitStorage();
+    for (int i = 0; i < ncol; ++i) {
+      for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
+        builder.PushElem(row_index[j], std::make_pair(i, col_data[j]));
+      }
+    }
+    utils::Assert(csr_data.size() == static_cast<size_t>(ndata), "BUG CreateFromCSC");
+    std::vector<float> row_data(ndata);
+    std::vector<unsigned> col_index(ndata);
+    #pragma omp parallel for schedule(static)
+    for (int i = 0; i < ndata; ++i) {
+      col_index[i] = csr_data[i].first;
+      row_data[i] = csr_data[i].second;      
+    }
+    void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata );
+    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
+    UNPROTECT(1);
+    return ret;
+  }
+  void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
+    XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
+                        CHAR(asChar(fname)), asInteger(silent));
+  }
+  void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
+    int len = length(array);
+    const char *name = CHAR(asChar(field));
+    if (!strcmp("group", name)) {
+      std::vector<unsigned> vec(len);
+      #pragma omp parallel for schedule(static)      
+      for (int i = 0; i < len; ++i) {
+        vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
+      }
+      XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len);
+      return;
+    }
+    {
+      std::vector<float> vec(len);
+      #pragma omp parallel for schedule(static)
+      for (int i = 0; i < len; ++i) {
+        vec[i] = REAL(array)[i];
+      }
+      XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), 
+                            CHAR(asChar(field)),
+                            &vec[0], len);
+    }
+  }
+  SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
+    bst_ulong olen;
+    const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
+                                             CHAR(asChar(field)), &olen);
+    SEXP ret = PROTECT(allocVector(REALSXP, olen));
+    for (size_t i = 0; i < olen; ++i) {
+      REAL(ret)[i] = res[i];
+    }
+    UNPROTECT(1);
+    return ret;
+  }
+  // functions related to booster
+  void _BoosterFinalizer(SEXP ext) {    
+    if (R_ExternalPtrAddr(ext) == NULL) return;
+    XGBoosterFree(R_ExternalPtrAddr(ext));
+    R_ClearExternalPtr(ext);
+  }
+  SEXP XGBoosterCreate_R(SEXP dmats) {
+    int len = length(dmats);
+    std::vector<void*> dvec;
+    for (int i = 0; i < len; ++i){
+      dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
+    }
+    void *handle = XGBoosterCreate(&dvec[0], dvec.size());
+    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+    R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
+    UNPROTECT(1);
+    return ret;
+  }
+  void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
+    XGBoosterSetParam(R_ExternalPtrAddr(handle),
+                      CHAR(asChar(name)),
+                      CHAR(asChar(val)));
+  }
+  void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
+    XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
+                           asInteger(iter),
+                           R_ExternalPtrAddr(dtrain));
+  }
+  void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
+    utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
+    int len = length(grad);
+    std::vector<float> tgrad(len), thess(len);
+    #pragma omp parallel for schedule(static)
+    for (int j = 0; j < len; ++j) {
+      tgrad[j] = REAL(grad)[j];
+      thess[j] = REAL(hess)[j];
+    }
+    XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
+                          R_ExternalPtrAddr(dtrain),
+                          &tgrad[0], &thess[0], len);
+  }
+  SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
+    utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
+    int len = length(dmats);
+    std::vector<void*> vec_dmats;
+    std::vector<std::string> vec_names;
+    std::vector<const char*> vec_sptr;
+    for (int i = 0; i < len; ++i) {
+      vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
+      vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
+    }
+    for (int i = 0; i < len; ++i) {
+      vec_sptr.push_back(vec_names[i].c_str());
+    }
+    return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
+                                         asInteger(iter),
+                                         &vec_dmats[0], &vec_sptr[0], len));
+  }
+  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
+    bst_ulong olen;
+    const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
+                                        R_ExternalPtrAddr(dmat),
+                                        asInteger(output_margin),
+                                        &olen);
+    SEXP ret = PROTECT(allocVector(REALSXP, olen));
+    for (size_t i = 0; i < olen; ++i) {
+      REAL(ret)[i] = res[i];
+    }
+    UNPROTECT(1);
+    return ret;
+  }
+  void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
+    XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
+  }
+  void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
+    XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
+  }
+  void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
+    bst_ulong olen;
+    const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
+                                          CHAR(asChar(fmap)),
+                                          &olen);
+    FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
+    for (size_t i = 0; i < olen; ++i) {
+      fprintf(fo, "booster[%u]:\n", static_cast<unsigned>(i));
+      fprintf(fo, "%s", res[i]);
+    }
+    fclose(fo);
+  }
+}
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -0,0 +1,124 @@
+#ifndef XGBOOST_WRAPPER_R_H_
+#define XGBOOST_WRAPPER_R_H_
+/*!
+ * \file xgboost_wrapper_R.h
+ * \author Tianqi Chen
+ * \brief R wrapper of xgboost
+ */
+extern "C" {
+#include <Rinternals.h>
+}
+
+extern "C" {
+  /*!
+   * \brief load a data matrix 
+   * \param fname name of the content
+   * \param silent whether print messages
+   * \return a loaded data matrix
+   */
+  SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
+  /*!
+   * \brief create matrix content from dense matrix
+   * This assumes the matrix is stored in column major format
+   * \param data R Matrix object
+   * \param missing which value to represent missing value
+   * \return created dmatrix
+   */
+  SEXP XGDMatrixCreateFromMat_R(SEXP mat, 
+                                SEXP missing);
+  /*! 
+   * \brief create a matrix content from CSC format
+   * \param indptr pointer to column headers
+   * \param indices row indices
+   * \param data content of the data
+   * \return created dmatrix
+   */
+  SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
+                                SEXP indices,
+                                SEXP data);
+  /*!
+   * \brief load a data matrix into binary file
+   * \param handle a instance of data matrix
+   * \param fname file name
+   * \param silent print statistics when saving
+   */
+  void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
+  /*!
+   * \brief set information to dmatrix
+   * \param handle a instance of data matrix
+   * \param field field name, can be label, weight
+   * \param array pointer to float vector
+   */
+  void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array);
+  /*!
+   * \brief get info vector from matrix
+   * \param handle a instance of data matrix
+   * \param field field name
+   * \return info vector
+   */  
+  SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
+  /*! 
+   * \brief create xgboost learner 
+   * \param dmats a list of dmatrix handles that will be cached
+   */  
+  SEXP XGBoosterCreate_R(SEXP dmats);
+  /*! 
+   * \brief set parameters 
+   * \param handle handle
+   * \param name  parameter name
+   * \param val value of parameter
+   */
+  void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
+  /*! 
+   * \brief update the model in one round using dtrain
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dtrain training data
+   */
+  void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain);
+  /*!
+   * \brief update the model, by directly specify gradient and second order gradient,
+   *        this can be used to replace UpdateOneIter, to support customized loss function
+   * \param handle handle
+   * \param dtrain training data
+   * \param grad gradient statistics
+   * \param hess second order gradient statistics
+   */
+  void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess);
+  /*!
+   * \brief get evaluation statistics for xgboost
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dmats list of handles to dmatrices
+   * \param evname name of evaluation
+   * \return the string containing evaluation stati
+   */
+  SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames);
+  /*!
+   * \brief make prediction based on dmat
+   * \param handle handle
+   * \param dmat data matrix
+   * \param output_margin whether only output raw margin value
+   */
+  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
+  /*!
+   * \brief load model from existing file
+   * \param handle handle
+   * \param fname file name
+   */
+  void XGBoosterLoadModel_R(SEXP handle, SEXP fname);
+  /*!
+   * \brief save model into existing file
+   * \param handle handle
+   * \param fname file name
+   */    
+  void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
+  /*!
+   * \brief dump model into text file 
+   * \param handle handle
+   * \param fname file name of model that can be dumped into
+   * \param fmap  name to fmap can be empty string
+   */
+  void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap);
+};
+#endif  // XGBOOST_WRAPPER_R_H_
--- a/demo/binary_classification/mushroom.conf
+++ b/demo/binary_classification/mushroom.conf
@@ -6,13 +6,13 @@ objective = binary:logistic

 # Tree Booster Parameters
 # step size shrinkage
-bst:eta = 1.0 
+eta = 1.0 
 # minimum loss reduction required to make a further partition
-bst:gamma = 1.0 
+gamma = 1.0 
 # minimum sum of instance weight(hessian) needed in a child
-bst:min_child_weight = 1 
+min_child_weight = 1 
 # maximum depth of a tree
-bst:max_depth = 3 
+max_depth = 3 

 # Task Parameters
 # the number of round to do boosting
--- a/demo/kaggle-higgs/higgs-numpy.py
+++ b/demo/kaggle-higgs/higgs-numpy.py
@@ -42,8 +42,8 @@ param = {}
 param['objective'] = 'binary:logitraw'
 # scale weight of positive examples
 param['scale_pos_weight'] = sum_wneg/sum_wpos
-param['bst:eta'] = 0.1 
-param['bst:max_depth'] = 6
+param['eta'] = 0.1 
+param['max_depth'] = 6
 param['eval_metric'] = 'auc'
 param['silent'] = 1
 param['nthread'] = 16
--- a/demo/multiclass_classification/train.py
+++ b/demo/multiclass_classification/train.py
@@ -25,8 +25,8 @@ param = {}
 # use softmax multi-class classification
 param['objective'] = 'multi:softmax'
 # scale weight of positive examples
-param['bst:eta'] = 0.1
-param['bst:max_depth'] = 6
+param['eta'] = 0.1
+param['max_depth'] = 6
 param['silent'] = 1
 param['nthread'] = 4
 param['num_class'] = 6
--- a/demo/rank/mq2008.conf
+++ b/demo/rank/mq2008.conf
@@ -5,13 +5,13 @@ objective="rank:pairwise"

 # Tree Booster Parameters
 # step size shrinkage
-bst:eta = 0.1 
+eta = 0.1 
 # minimum loss reduction required to make a further partition
-bst:gamma = 1.0 
+gamma = 1.0 
 # minimum sum of instance weight(hessian) needed in a child
-bst:min_child_weight = 0.1
+min_child_weight = 0.1
 # maximum depth of a tree
-bst:max_depth = 6
+max_depth = 6

 # Task parameters
 # the number of round to do boosting
--- a/demo/regression/machine.conf
+++ b/demo/regression/machine.conf
@@ -7,13 +7,13 @@ objective = reg:linear

 # Tree Booster Parameters
 # step size shrinkage
-bst:eta = 1.0 
+eta = 1.0 
 # minimum loss reduction required to make a further partition
-bst:gamma = 1.0 
+gamma = 1.0 
 # minimum sum of instance weight(hessian) needed in a child
-bst:min_child_weight = 1 
+min_child_weight = 1 
 # maximum depth of a tree
-bst:max_depth = 3 
+max_depth = 3 

 # Task parameters
 # the number of round to do boosting
--- a/src/data.h
+++ b/src/data.h
@@ -12,6 +12,7 @@
 #include <cstring>
 #include <algorithm>
 #include "utils/io.h"
+#include "utils/omp.h"
 #include "utils/utils.h"
 #include "utils/iterator.h"
 #include "utils/random.h"
@@ -44,6 +45,10 @@ struct bst_gpair {
 * these information are not necessarily presented, and can be empty
 */
 struct BoosterInfo {
+  /*! \brief number of rows in the data */
+  size_t num_row;
+  /*! \brief number of columns in the data */
+  size_t num_col;
  /*!
   * \brief specified root index of each instance,
   *  can be used for multi task setting
@@ -51,6 +56,9 @@ struct BoosterInfo {
  std::vector<unsigned> root_index;
  /*! \brief set fold indicator */
  std::vector<unsigned> fold_index;
+  /*! \brief number of rows, number of columns */
+  BoosterInfo(void) : num_row(0), num_col(0) {
+  }
  /*! \brief get root of ith instance */
  inline unsigned GetRoot(size_t i) const {
    return root_index.size() == 0 ? 0 : root_index[i];
@@ -96,7 +104,7 @@ struct SparseBatch {
  const Entry *data_ptr;
  /*! \brief get i-th row from the batch */
  inline Inst operator[](size_t i) const {
-    return Inst(data_ptr + row_ptr[i], row_ptr[i+1] - row_ptr[i]);
+    return Inst(data_ptr + row_ptr[i], static_cast<bst_uint>(row_ptr[i+1] - row_ptr[i]));
  }
 };

@@ -334,7 +342,7 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
      const SparseBatch &batch = iter_->Value();
      for (size_t i = 0; i < batch.size; ++i) {
        if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
-          buffered_rowset_.push_back(batch.base_rowid+i);
+          buffered_rowset_.push_back(static_cast<bst_uint>(batch.base_rowid+i));
          SparseBatch::Inst inst = batch[i];
          for (bst_uint j = 0; j < inst.length; ++j) {
            builder.AddBudget(inst[j].findex);
@@ -363,11 +371,11 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
    }

    // sort columns
-    unsigned ncol = static_cast<unsigned>(this->NumCol());
+    bst_omp_uint ncol = static_cast<bst_omp_uint>(this->NumCol());
    #pragma omp parallel for schedule(static)
-    for (unsigned i = 0; i < ncol; ++i) {
-      std::sort(&col_data_[col_ptr_[i]],
-                &col_data_[col_ptr_[i + 1]], Entry::CmpValue);
+    for (bst_omp_uint i = 0; i < ncol; ++i) {
+      std::sort(&col_data_[0] + col_ptr_[i],
+                &col_data_[0] + col_ptr_[i + 1], Entry::CmpValue);
    }
  }

--- a/src/gbm/gblinear-inl.hpp
+++ b/src/gbm/gblinear-inl.hpp
@@ -51,20 +51,21 @@ class GBLinear : public IGradBooster<FMatrix> {
    // for all the output group
    for (int gid = 0; gid < ngroup; ++gid) {
      double sum_grad = 0.0, sum_hess = 0.0;
-      const unsigned ndata = static_cast<unsigned>(rowset.size());
+      const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
      #pragma omp parallel for schedule(static) reduction(+: sum_grad, sum_hess)
-      for (unsigned i = 0; i < ndata; ++i) {
+      for (bst_omp_uint i = 0; i < ndata; ++i) {
        bst_gpair &p = gpair[rowset[i] * ngroup + gid];
        if (p.hess >= 0.0f) {
          sum_grad += p.grad; sum_hess += p.hess;
        }
      }
      // remove bias effect
-      double dw = param.learning_rate * param.CalcDeltaBias(sum_grad, sum_hess, model.bias()[gid]);
+      bst_float dw = static_cast<bst_float>(
+          param.learning_rate * param.CalcDeltaBias(sum_grad, sum_hess, model.bias()[gid]));
      model.bias()[gid] += dw;
      // update grad value
      #pragma omp parallel for schedule(static)
-      for (unsigned i = 0; i < ndata; ++i) {
+      for (bst_omp_uint i = 0; i < ndata; ++i) {
        bst_gpair &p = gpair[rowset[i] * ngroup + gid];
        if (p.hess >= 0.0f) {
          p.grad += p.hess * dw;
@@ -72,9 +73,9 @@ class GBLinear : public IGradBooster<FMatrix> {
      }
    }
    // number of features
-    const unsigned nfeat = static_cast<unsigned>(feat_index.size());
+    const bst_omp_uint nfeat = static_cast<bst_omp_uint>(feat_index.size());
    #pragma omp parallel for schedule(static)
-    for (unsigned i = 0; i < nfeat; ++i) {
+    for (bst_omp_uint i = 0; i < nfeat; ++i) {
      const bst_uint fid = feat_index[i];
      for (int gid = 0; gid < ngroup; ++gid) {
        double sum_grad = 0.0, sum_hess = 0.0;
@@ -86,7 +87,7 @@ class GBLinear : public IGradBooster<FMatrix> {
          sum_hess += p.hess * v * v;
        }
        float &w = model[fid][gid];
-        double dw = param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w);
+        bst_float dw = static_cast<bst_float>(param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w));
        w += dw;
        // update grad value
        for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
@@ -116,9 +117,9 @@ class GBLinear : public IGradBooster<FMatrix> {
      // k is number of group
      preds.resize(preds.size() + batch.size * ngroup);
      // parallel over local batch
-      const unsigned nsize = static_cast<unsigned>(batch.size);
+      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
      #pragma omp parallel for schedule(static)
-      for (unsigned i = 0; i < nsize; ++i) {
+      for (bst_omp_uint i = 0; i < nsize; ++i) {
        const size_t ridx = batch.base_rowid + i;
        // loop over output groups
        for (int gid = 0; gid < ngroup; ++gid) {
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -94,8 +94,9 @@ class GBTree : public IGradBooster<FMatrix> {
                   "must have exactly ngroup*nrow gpairs");
      std::vector<bst_gpair> tmp(gpair.size()/ngroup);
      for (int gid = 0; gid < ngroup; ++gid) {
+        bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
        #pragma omp parallel for schedule(static)
-        for (size_t i = 0; i < tmp.size(); ++i) {
+        for (bst_omp_uint i = 0; i < nsize; ++i) {
          tmp[i] = gpair[i * ngroup + gid];
        }
        this->BoostNewTrees(tmp, fmat, info, gid);
@@ -129,13 +130,13 @@ class GBTree : public IGradBooster<FMatrix> {
      // k is number of group
      preds.resize(preds.size() + batch.size * mparam.num_output_group);
      // parallel over local batch
-      const unsigned nsize = static_cast<unsigned>(batch.size);
+      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
      #pragma omp parallel for schedule(static)
-      for (unsigned i = 0; i < nsize; ++i) {
+      for (bst_omp_uint i = 0; i < nsize; ++i) {
        const int tid = omp_get_thread_num();
        tree::RegTree::FVec &feats = thread_temp[tid];
-        const size_t ridx = batch.base_rowid + i;
-        const unsigned root_idx = info.GetRoot(i);
+        int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
+        const unsigned root_idx = info.GetRoot(ridx);
        // loop over output groups
        for (int gid = 0; gid < mparam.num_output_group; ++gid) {
          preds[ridx * mparam.num_output_group + gid] =
@@ -172,15 +173,15 @@ class GBTree : public IGradBooster<FMatrix> {
    }
    updaters.clear();
    std::string tval = tparam.updater_seq;
-    char *saveptr, *pstr;
-    pstr = strtok_r(&tval[0], ",", &saveptr);
+    char *pstr;
+    pstr = strtok(&tval[0], ",");
    while (pstr != NULL) {
      updaters.push_back(tree::CreateUpdater<FMatrix>(pstr));
      for (size_t j = 0; j < cfg.size(); ++j) {
        // set parameters
        updaters.back()->SetParam(cfg[j].first.c_str(), cfg[j].second.c_str());
      }
-      pstr = strtok_r(NULL, ",", &saveptr);
+      pstr = strtok(NULL, ",");
    }
    tparam.updater_initialized = 1;
  }
@@ -218,7 +219,7 @@ class GBTree : public IGradBooster<FMatrix> {
                    tree::RegTree::FVec *p_feats) {
    size_t itop = 0;
    float  psum = 0.0f;
-    const int bid = mparam.BufferOffset(buffer_index, bst_group);
+    const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
    // load buffered results if any
    if (bid >= 0) {
      itop = pred_counter[bid];
@@ -320,7 +321,7 @@ class GBTree : public IGradBooster<FMatrix> {
     * \brief get the buffer offset given a buffer index and group id  
     * \return calculated buffer offset
     */
-    inline size_t BufferOffset(int64_t buffer_index, int bst_group) const {
+    inline int64_t BufferOffset(int64_t buffer_index, int bst_group) const {
      if (buffer_index < 0) return -1;
      utils::Check(buffer_index < num_pbuffer, "buffer_index exceed num_pbuffer");
      return buffer_index + num_pbuffer * bst_group;
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -2,6 +2,7 @@
 #define _CRT_SECURE_NO_DEPRECATE
 #include <string>
 #include "./io.h"
+#include "../utils/io.h"
 #include "../utils/utils.h"
 #include "simple_dmatrix-inl.hpp"
 // implements data loads using dmatrix simple for now
@@ -9,6 +10,19 @@
 namespace xgboost {
 namespace io {
 DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
+  int magic;
+  utils::FileStream fs(utils::FopenCheck(fname, "rb"));
+  utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
+  fs.Seek(0);
+
+  if (magic == DMatrixSimple::kMagic) { 
+    DMatrixSimple *dmat = new DMatrixSimple();
+    dmat->LoadBinary(fs, silent, fname);
+    fs.Close();
+    return dmat;
+  } 
+  fs.Close();
+ 
  DMatrixSimple *dmat = new DMatrixSimple();
  dmat->CacheLoad(fname, silent, savebuffer);
  return dmat;
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -62,10 +62,10 @@ class DMatrixSimple : public DataMatrix {
  inline size_t AddRow(const std::vector<SparseBatch::Entry> &feats) {
    for (size_t i = 0; i < feats.size(); ++i) {
      row_data_.push_back(feats[i]);
-      info.num_col = std::max(info.num_col, static_cast<size_t>(feats[i].findex+1));
+      info.info.num_col = std::max(info.info.num_col, static_cast<size_t>(feats[i].findex+1));
    }
    row_ptr_.push_back(row_ptr_.back() + feats.size());
-    info.num_row += 1;
+    info.info.num_row += 1;
    return row_ptr_.size() - 2;
  }
  /*!
@@ -99,19 +99,19 @@ class DMatrixSimple : public DataMatrix {

    if (!silent) {
      printf("%lux%lu matrix with %lu entries is loaded from %s\n",
-             info.num_row, info.num_col, row_data_.size(), fname);
+             info.num_row(), info.num_col(), row_data_.size(), fname);
    }
    fclose(file);
    // try to load in additional file
    std::string name = fname;
    std::string gname = name + ".group";
    if (info.TryLoadGroup(gname.c_str(), silent)) {
-      utils::Check(info.group_ptr.back() == info.num_row,
+      utils::Check(info.group_ptr.back() == info.num_row(),
                   "DMatrix: group data does not match the number of rows in features");
    }
    std::string wname = name + ".weight";
    if (info.TryLoadFloatInfo("weight", wname.c_str(), silent)) {
-      utils::Check(info.weights.size() == info.num_row,
+      utils::Check(info.weights.size() == info.num_row(),
                   "DMatrix: weight data does not match the number of rows in features");
    }
    std::string mname = name + ".base_margin";
@@ -128,6 +128,17 @@ class DMatrixSimple : public DataMatrix {
    FILE *fp = fopen64(fname, "rb");
    if (fp == NULL) return false;
    utils::FileStream fs(fp);
+    this->LoadBinary(fs, silent, fname);
+    fs.Close();
+    return true;
+  }
+  /*!
+   * \brief load from binary stream
+   * \param fs input file stream
+   * \param silent whether print information during loading
+   * \param fname file name, used to print message
+   */
+  inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
    int magic;
    utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
    utils::Check(magic == kMagic, "invalid format,magic number mismatch");
@@ -135,16 +146,19 @@ class DMatrixSimple : public DataMatrix {
    info.LoadBinary(fs);
    FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
    fmat.LoadColAccess(fs);
-    fs.Close();

    if (!silent) {
-      printf("%lux%lu matrix with %lu entries is loaded from %s\n",
-             info.num_row, info.num_col, row_data_.size(), fname);
+      printf("%lux%lu matrix with %lu entries is loaded",
+             info.num_row(), info.num_col(), row_data_.size());
+      if (fname != NULL) {
+        printf(" from %s\n", fname);
+      } else {
+        printf("\n");
+      }
      if (info.group_ptr.size() != 0) {
        printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
      }
    }
-    return true;
  }
  /*!
   * \brief save to binary file
@@ -163,7 +177,7 @@ class DMatrixSimple : public DataMatrix {

    if (!silent) {
      printf("%lux%lu matrix with %lu entries is saved to %s\n",
-             info.num_row, info.num_col, row_data_.size(), fname);
+             info.num_row(), info.num_col(), row_data_.size(), fname);
      if (info.group_ptr.size() != 0) {
        printf("data contains %lu groups\n", info.group_ptr.size()-1);
      }
@@ -179,7 +193,7 @@ class DMatrixSimple : public DataMatrix {
   * \param savebuffer whether do save binary buffer if it is text
   */
  inline void CacheLoad(const char *fname, bool silent = false, bool savebuffer = true) {
-    int len = strlen(fname);
+    size_t len = strlen(fname);
    if (len > 8 && !strcmp(fname + len - 7, ".buffer")) {
      if (!this->LoadBinary(fname, silent)) {
        utils::Error("can not open file \"%s\"", fname);
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -15,10 +15,12 @@ namespace learner {
 * \brief meta information needed in training, including label, weight
 */
 struct MetaInfo {
-  /*! \brief number of rows in the data */
-  size_t num_row;
-  /*! \brief number of columns in the data */
-  size_t num_col;
+  /*! 
+   * \brief information needed by booster 
+   * BoosterInfo does not implement save and load,
+   * all serialization is done in MetaInfo
+   */
+  BoosterInfo info;
  /*! \brief label of each instance */
  std::vector<float> labels;
  /*!
@@ -28,8 +30,6 @@ struct MetaInfo {
  std::vector<bst_uint> group_ptr;
  /*! \brief weights of each instance, optional */
  std::vector<float> weights;
-  /*! \brief information needed by booster */
-  BoosterInfo info;
  /*! 
   * \brief initialized margins,
   * if specified, xgboost will start from this init margin
@@ -39,7 +39,15 @@ struct MetaInfo {
  /*! \brief version flag, used to check version of this info */
  static const int kVersion = 0;
  // constructor
-  MetaInfo(void) : num_row(0), num_col(0) {}
+  MetaInfo(void) {}
+  /*! \return number of rows in dataset */
+  inline size_t num_row(void) const {
+    return info.num_row;
+  }
+  /*! \return number of columns in dataset */
+  inline size_t num_col(void) const {
+    return info.num_col;
+  }
  /*! \brief clear all the information */
  inline void Clear(void) {
    labels.clear();
@@ -47,7 +55,7 @@ struct MetaInfo {
    weights.clear();
    info.root_index.clear();
    base_margin.clear();
-    num_row = num_col = 0;
+    info.num_row = info.num_col = 0;
  }
  /*! \brief get weight of each instances */
  inline float GetWeight(size_t i) const {
@@ -60,8 +68,8 @@ struct MetaInfo {
  inline void SaveBinary(utils::IStream &fo) const {
    int version = kVersion;
    fo.Write(&version, sizeof(version));
-    fo.Write(&num_row, sizeof(num_row));
-    fo.Write(&num_col, sizeof(num_col));
+    fo.Write(&info.num_row, sizeof(info.num_row));
+    fo.Write(&info.num_col, sizeof(info.num_col));
    fo.Write(labels);
    fo.Write(group_ptr);
    fo.Write(weights);
@@ -70,9 +78,9 @@ struct MetaInfo {
  }
  inline void LoadBinary(utils::IStream &fi) {
    int version;
-    utils::Check(fi.Read(&version, sizeof(version)), "MetaInfo: invalid format");
-    utils::Check(fi.Read(&num_row, sizeof(num_row)), "MetaInfo: invalid format");
-    utils::Check(fi.Read(&num_col, sizeof(num_col)), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format");
+    utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format");
+    utils::Check(fi.Read(&info.num_col, sizeof(info.num_col)) != 0, "MetaInfo: invalid format");
    utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
    utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
    utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
@@ -94,19 +102,28 @@ struct MetaInfo {
    fclose(fi);
    return true;
  }
-  inline std::vector<float>& GetInfo(const char *field) {
+  inline std::vector<float>& GetFloatInfo(const char *field) {
    if (!strcmp(field, "label")) return labels;
    if (!strcmp(field, "weight")) return weights;
    if (!strcmp(field, "base_margin")) return base_margin;
    utils::Error("unknown field %s", field);
    return labels;
  }
-  inline const std::vector<float>& GetInfo(const char *field) const {
-    return ((MetaInfo*)this)->GetInfo(field);
+  inline const std::vector<float>& GetFloatInfo(const char *field) const {
+    return ((MetaInfo*)this)->GetFloatInfo(field);
+  }
+  inline std::vector<unsigned> &GetUIntInfo(const char *field) {
+    if (!strcmp(field, "root_index")) return info.root_index;
+    if (!strcmp(field, "fold_index")) return info.fold_index;
+    utils::Error("unknown field %s", field);
+    return info.root_index;
+  }
+  inline const std::vector<unsigned> &GetUIntInfo(const char *field) const {
+    return ((MetaInfo*)this)->GetUIntInfo(field);
  }
  // try to load weight information from file, if exists
  inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
-    std::vector<float> &weights = this->GetInfo(field);
+    std::vector<float> &weights = this->GetFloatInfo(field);
    FILE *fi = fopen64(fname, "r");
    if (fi == NULL) return false;
    float wt;
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -26,10 +26,10 @@ struct EvalEWiseBase : public IEvaluator {
                     const MetaInfo &info) const {
    utils::Check(preds.size() == info.labels.size(),
                 "label and prediction size not match");
-    const unsigned ndata = static_cast<unsigned>(preds.size());
+    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
    float sum = 0.0, wsum = 0.0;
    #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
-    for (unsigned i = 0; i < ndata; ++i) {
+    for (bst_omp_uint i = 0; i < ndata; ++i) {
      const float wt = info.GetWeight(i);
      sum += Derived::EvalRow(info.labels[i], preds[i]) * wt;
      wsum += wt;
@@ -109,12 +109,12 @@ struct EvalAMS : public IEvaluator {
  }
  virtual float Eval(const std::vector<float> &preds,
                     const MetaInfo &info) const {
-    const unsigned ndata = static_cast<unsigned>(preds.size());
+    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
    utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
    std::vector< std::pair<float, unsigned> > rec(ndata);

    #pragma omp parallel for schedule(static)
-    for (unsigned i = 0; i < ndata; ++i) {
+    for (bst_omp_uint i = 0; i < ndata; ++i) {
      rec[i] = std::make_pair(preds[i], i);
    }
    std::sort(rec.begin(), rec.end(), CmpFirst);
@@ -123,7 +123,7 @@ struct EvalAMS : public IEvaluator {
    const double br = 10.0;
    unsigned thresindex = 0;
    double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
-    for (unsigned i = 0; i < ndata-1 && i < ntop; ++i) {
+    for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) {
      const unsigned ridx = rec[i].second;
      const float wt = info.weights[ridx];
      if (info.labels[ridx] > 0.5f) {
@@ -132,7 +132,7 @@ struct EvalAMS : public IEvaluator {
        b_fp += wt;
      }
      if (rec[i].first != rec[i+1].first) {
-        double ams = sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
+        double ams = sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
        if (tams < ams) {
          thresindex = i;
          tams = ams;
@@ -141,9 +141,9 @@ struct EvalAMS : public IEvaluator {
    }
    if (ntop == ndata) {
      fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
-      return tams;
+      return static_cast<float>(tams);
    } else {
-      return sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
+      return static_cast<float>(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp)));
    }
  }
  virtual const char *Name(void) const {
@@ -171,7 +171,7 @@ struct EvalPrecisionRatio : public IEvaluator{
    utils::Assert(preds.size() == info.labels.size(), "label size predict size not match");
    std::vector< std::pair<float, unsigned> > rec;
    for (size_t j = 0; j < preds.size(); ++j) {
-      rec.push_back(std::make_pair(preds[j], j));
+      rec.push_back(std::make_pair(preds[j], static_cast<unsigned>(j)));
    }
    std::sort(rec.begin(), rec.end(), CmpFirst);
    double pratio = CalcPRatio(rec, info);
@@ -207,11 +207,11 @@ struct EvalAuc : public IEvaluator {
  virtual float Eval(const std::vector<float> &preds,
                     const MetaInfo &info) const {
    utils::Check(preds.size() == info.labels.size(), "label size predict size not match");
-    std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(preds.size());
    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
    utils::Check(gptr.back() == preds.size(),
                 "EvalAuc: group structure must match number of prediction");
-    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
    // sum statictis
    double sum_auc = 0.0f;
    #pragma omp parallel reduction(+:sum_auc)
@@ -219,7 +219,7 @@ struct EvalAuc : public IEvaluator {
      // each thread takes a local rec
      std::vector< std::pair<float, unsigned> > rec;
      #pragma omp for schedule(static)
-      for (unsigned k = 0; k < ngroup; ++k) {
+      for (bst_omp_uint k = 0; k < ngroup; ++k) {
        rec.clear();
        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
          rec.push_back(std::make_pair(preds[j], j));
@@ -264,12 +264,12 @@ struct EvalRankList : public IEvaluator {
    utils::Check(preds.size() == info.labels.size(),
                  "label size predict size not match");
    // quick consistency when group is not available
-    std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(preds.size());
    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
    utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
    utils::Assert(gptr.back() == preds.size(),
                   "EvalRanklist: group structure must match number of prediction");
-    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
    // sum statistics
    double sum_metric = 0.0f;
    #pragma omp parallel reduction(+:sum_metric)
@@ -277,7 +277,7 @@ struct EvalRankList : public IEvaluator {
      // each thread takes a local rec
      std::vector< std::pair<float, unsigned> > rec;
      #pragma omp for schedule(static)
-      for (unsigned k = 0; k < ngroup; ++k) {
+      for (bst_omp_uint k = 0; k < ngroup; ++k) {
        rec.clear();
        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
          rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j])));
@@ -339,7 +339,7 @@ struct EvalNDCG : public EvalRankList{
    for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
      const unsigned rel = rec[i].second;
      if (rel != 0) { 
-        sumdcg += ((1 << rel) - 1) / logf(i + 2);
+        sumdcg += ((1 << rel) - 1) / log(i + 2.0);
      }
    }
    return static_cast<float>(sumdcg);
--- a/src/learner/evaluation.h
+++ b/src/learner/evaluation.h
@@ -7,6 +7,7 @@
 */
 #include <string>
 #include <vector>
+#include <cstdio>
 #include "../utils/utils.h"
 #include "./dmatrix.h"

--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -58,9 +58,9 @@ class BoostLearner {
      if (dupilicate) continue;
      // set mats[i]'s cache learner pointer to this
      mats[i]->cache_learner_ptr_ = this;
-      cache_.push_back(CacheEntry(mats[i], buffer_size, mats[i]->info.num_row));
-      buffer_size += mats[i]->info.num_row;
-      num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->info.num_col));
+      cache_.push_back(CacheEntry(mats[i], buffer_size, mats[i]->info.num_row()));
+      buffer_size += mats[i]->info.num_row();
+      num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->info.num_col()));
    }
    char str_temp[25];
    if (num_feature > mparam.num_feature) {
@@ -79,6 +79,11 @@ class BoostLearner {
   * \param val  value of the parameter
   */
  inline void SetParam(const char *name, const char *val) {
+    // in this version, bst: prefix is no longer required 
+    if (strncmp(name, "bst:", 4) != 0) {
+      std::string n = "bst:"; n += name;
+      this->SetParam(n.c_str(), val);
+    }
    if (!strcmp(name, "silent")) silent = atoi(val);
    if (!strcmp(name, "prob_buffer_row")) prob_buffer_row = static_cast<float>(atof(val));
    if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
@@ -91,7 +96,7 @@ class BoostLearner {
      if (!strcmp(name, "objective")) name_obj_ = val;
      if (!strcmp(name, "booster")) name_gbm_ = val;
      mparam.SetParam(name, val);
-    }
+    }    
    if (gbm_ != NULL) gbm_->SetParam(name, val);
    if (obj_ != NULL) obj_->SetParam(name, val);
    if (gbm_ == NULL || obj_ == NULL) {
@@ -248,17 +253,17 @@ class BoostLearner {
                  data.info.info, out_preds);
    // add base margin
    std::vector<float> &preds = *out_preds;
-    const unsigned ndata = static_cast<unsigned>(preds.size());
+    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
    if (data.info.base_margin.size() != 0) {
      utils::Check(preds.size() == data.info.base_margin.size(),
                   "base_margin.size does not match with prediction size");
      #pragma omp parallel for schedule(static)
-      for (unsigned j = 0; j < ndata; ++j) {
+      for (bst_omp_uint j = 0; j < ndata; ++j) {
        preds[j] += data.info.base_margin[j];
      }
    } else {
      #pragma omp parallel for schedule(static)
-      for (unsigned j = 0; j < ndata; ++j) {
+      for (bst_omp_uint j = 0; j < ndata; ++j) {
        preds[j] += mparam.base_score;
      }
    }
@@ -329,8 +334,8 @@ class BoostLearner {
  inline int64_t FindBufferOffset(const DMatrix<FMatrix> &mat) const {
    for (size_t i = 0; i < cache_.size(); ++i) {
      if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) {
-        if (cache_[i].num_row_ == mat.info.num_row) {
-          return cache_[i].buffer_offset_;
+        if (cache_[i].num_row_ == mat.info.num_row()) {
+          return static_cast<int64_t>(cache_[i].buffer_offset_);
        }
      }
    }
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -116,9 +116,9 @@ class RegLossObj : public IObjFunction{
    gpair.resize(preds.size());
    // start calculating gradient
    const unsigned nstep = static_cast<unsigned>(info.labels.size());
-    const unsigned ndata = static_cast<unsigned>(preds.size());
+    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
    #pragma omp parallel for schedule(static)
-    for (unsigned i = 0; i < ndata; ++i) {
+    for (bst_omp_uint i = 0; i < ndata; ++i) {
      const unsigned j = i % nstep;
      float p = loss.PredTransform(preds[i]);
      float w = info.GetWeight(j);
@@ -132,9 +132,9 @@ class RegLossObj : public IObjFunction{
  }
  virtual void PredTransform(std::vector<float> *io_preds) {
    std::vector<float> &preds = *io_preds;
-    const unsigned ndata = static_cast<unsigned>(preds.size());
+    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
    #pragma omp parallel for schedule(static)
-    for (unsigned j = 0; j < ndata; ++j) {
+    for (bst_omp_uint j = 0; j < ndata; ++j) {
      preds[j] = loss.PredTransform(preds[j]);
    }
  }
@@ -169,12 +169,12 @@ class SoftmaxMultiClassObj : public IObjFunction {
    std::vector<bst_gpair> &gpair = *out_gpair;
    gpair.resize(preds.size());
    const unsigned nstep = static_cast<unsigned>(info.labels.size() * nclass);
-    const unsigned ndata = static_cast<unsigned>(preds.size() / nclass);
+    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size() / nclass);
    #pragma omp parallel
    {
      std::vector<float> rec(nclass);
      #pragma omp for schedule(static)
-      for (unsigned i = 0; i < ndata; ++i) {
+      for (bst_omp_uint i = 0; i < ndata; ++i) {
        for (int k = 0; k < nclass; ++k) {
          rec[k] = preds[i * nclass + k];
        }
@@ -210,18 +210,18 @@ class SoftmaxMultiClassObj : public IObjFunction {
    utils::Check(nclass != 0, "must set num_class to use softmax");
    std::vector<float> &preds = *io_preds;
    std::vector<float> tmp;
-    const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
+    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size()/nclass);
    if (prob == 0) tmp.resize(ndata);
    #pragma omp parallel
    {
      std::vector<float> rec(nclass);
      #pragma omp for schedule(static)
-      for (unsigned j = 0; j < ndata; ++j) {
+      for (bst_omp_uint j = 0; j < ndata; ++j) {
        for (int k = 0; k < nclass; ++k) {
          rec[k] = preds[j * nclass + k];
        }
        if (prob == 0) {
-          tmp[j] = FindMaxIndex(rec);
+          tmp[j] = static_cast<float>(FindMaxIndex(rec));
        } else {
          Softmax(&rec);
          for (int k = 0; k < nclass; ++k) {
@@ -259,11 +259,11 @@ class LambdaRankObj : public IObjFunction {
    std::vector<bst_gpair> &gpair = *out_gpair;
    gpair.resize(preds.size());
    // quick consistency when group is not available
-    std::vector<unsigned> tgptr(2, 0); tgptr[1] = info.labels.size();
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size());
    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
    utils::Check(gptr.size() != 0 && gptr.back() == info.labels.size(),
                 "group structure not consistent with #rows");
-    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
    #pragma omp parallel
    {
      // parall construct, declare random number generator here, so that each
@@ -273,7 +273,7 @@ class LambdaRankObj : public IObjFunction {
      std::vector<ListEntry>  lst;
      std::vector< std::pair<float, unsigned> > rec;
      #pragma omp for schedule(static)
-      for (unsigned k = 0; k < ngroup; ++k) {
+      for (bst_omp_uint k = 0; k < ngroup; ++k) {
        lst.clear(); pairs.clear();
        for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
          lst.push_back(ListEntry(preds[j], info.labels[j], j));
@@ -290,7 +290,7 @@ class LambdaRankObj : public IObjFunction {
          unsigned j = i + 1;
          while (j < rec.size() && rec[j].first == rec[i].first) ++j;
          // bucket in [i,j), get a sample outside bucket
-          unsigned nleft = i, nright = rec.size() - j;
+          unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);
          if (nleft + nright != 0) {
            int nsample = num_pairsample;
            while (nsample --) {
@@ -436,9 +436,9 @@ class LambdaRankObjNDCG : public LambdaRankObj {
  inline static float CalcDCG(const std::vector<float> &labels) {
    double sumdcg = 0.0;
    for (size_t i = 0; i < labels.size(); ++i) {
-      const unsigned rel = labels[i];
+      const unsigned rel = static_cast<unsigned>(labels[i]);
      if (rel != 0) {
-        sumdcg += ((1 << rel) - 1) / logf(i + 2);
+        sumdcg += ((1 << rel) - 1) / logf(static_cast<float>(i + 2));
      }
    }
    return static_cast<float>(sumdcg);
--- a/src/tree/model.h
+++ b/src/tree/model.h
@@ -42,11 +42,17 @@ class TreeModel {
    int max_depth;
    /*! \brief  number of features used for tree construction */
    int num_feature;
+    /*! 
+     * \brief leaf vector size, used for vector tree
+     * used to store more than one dimensional information in tree
+     */
+    int size_leaf_vector;
    /*! \brief reserved part */
-    int reserved[32];
+    int reserved[31];
    /*! \brief constructor */
    Param(void) {
      max_depth = 0;
+      size_leaf_vector = 0;
      memset(reserved, 0, sizeof(reserved));
    }
    /*! 
@@ -57,6 +63,7 @@ class TreeModel {
    inline void SetParam(const char *name, const char *val) {
      if (!strcmp("num_roots", name)) num_roots = atoi(val);
      if (!strcmp("num_feature", name)) num_feature = atoi(val);
+      if (!strcmp("size_leaf_vector", name)) size_leaf_vector = atoi(val);
    }
  };
  /*! \brief tree node */
@@ -166,10 +173,12 @@ class TreeModel {
 protected:
  // vector of nodes
  std::vector<Node> nodes;
-  // stats of nodes
-  std::vector<TNodeStat> stats;
  // free node space, used during training process
  std::vector<int>  deleted_nodes;
+  // stats of nodes
+  std::vector<TNodeStat> stats;
+  // leaf vector, that is used to store additional information
+  std::vector<bst_float> leaf_vector;
  // allocate a new node,
  // !!!!!! NOTE: may cause BUG here, nodes.resize
  inline int AllocNode(void) {
@@ -184,6 +193,7 @@ class TreeModel {
                 "number of nodes in the tree exceed 2^31");
    nodes.resize(param.num_nodes);
    stats.resize(param.num_nodes);
+    leaf_vector.resize(param.num_nodes * param.size_leaf_vector); 
    return nd;
  }
  // delete a tree node
@@ -247,6 +257,16 @@ class TreeModel {
  inline NodeStat &stat(int nid) {
    return stats[nid];
  }
+  /*! \brief get leaf vector given nid */
+  inline bst_float* leafvec(int nid) {
+    if (leaf_vector.size() == 0) return NULL;
+    return &leaf_vector[nid * param.size_leaf_vector];
+  }
+  /*! \brief get leaf vector given nid */
+  inline const bst_float* leafvec(int nid) const{
+    if (leaf_vector.size() == 0) return NULL;
+    return &leaf_vector[nid * param.size_leaf_vector];
+  }
  /*! \brief initialize the model */
  inline void InitModel(void) {
    param.num_nodes = param.num_roots;
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -11,45 +11,6 @@
 namespace xgboost {
 namespace tree {

-/*! \brief core statistics used for tree construction */
-struct GradStats {
-  /*! \brief sum gradient statistics */
-  double sum_grad;
-  /*! \brief sum hessian statistics */
-  double sum_hess;
-  /*! \brief constructor */
-  GradStats(void) {
-    this->Clear();
-  }
-  /*! \brief clear the statistics */
-  inline void Clear(void) {
-    sum_grad = sum_hess = 0.0f;
-  }
-  /*! \brief add statistics to the data */
-  inline void Add(double grad, double hess) {
-    sum_grad += grad; sum_hess += hess;
-  }
-  /*! \brief add statistics to the data */
-  inline void Add(const bst_gpair& b) {
-    this->Add(b.grad, b.hess);
-  }
-  /*! \brief add statistics to the data */
-  inline void Add(const GradStats &b) {
-    this->Add(b.sum_grad, b.sum_hess);
-  }
-  /*! \brief substract the statistics by b */
-  inline GradStats Substract(const GradStats &b) const {
-    GradStats res;
-    res.sum_grad = this->sum_grad - b.sum_grad;
-    res.sum_hess = this->sum_hess - b.sum_hess;
-    return res;
-  }
-  /*! \return whether the statistics is not used yet */
-  inline bool Empty(void) const {
-    return sum_hess == 0.0;
-  }
-};
-
 /*! \brief training parameters for regression tree */
 struct TrainParam{
  // learning step size for a time
@@ -106,7 +67,7 @@ struct TrainParam{
    if (!strcmp(name, "min_child_weight")) min_child_weight = static_cast<float>(atof(val));
    if (!strcmp(name, "min_split_loss")) min_split_loss = static_cast<float>(atof(val));
    if (!strcmp(name, "reg_lambda")) reg_lambda = static_cast<float>(atof(val));
-    if (!strcmp(name, "reg_method")) reg_method = static_cast<float>(atof(val));
+    if (!strcmp(name, "reg_method")) reg_method = atoi(val);
    if (!strcmp(name, "subsample")) subsample = static_cast<float>(atof(val));
    if (!strcmp(name, "colsample_bylevel")) colsample_bylevel = static_cast<float>(atof(val));
    if (!strcmp(name, "colsample_bytree")) colsample_bytree  = static_cast<float>(atof(val));
@@ -165,13 +126,6 @@ struct TrainParam{
  inline bool cannot_split(double sum_hess, int depth) const {
    return sum_hess < this->min_child_weight * 2.0;
  }
-  // code support for template data
-  inline double CalcWeight(const GradStats &d) const {
-    return this->CalcWeight(d.sum_grad, d.sum_hess);
-  }
-  inline double CalcGain(const GradStats &d) const {
-    return this->CalcGain(d.sum_grad, d.sum_hess);
-  }

 protected:
  // functions for L1 cost
@@ -185,6 +139,63 @@ struct TrainParam{
  }
 };

+/*! \brief core statistics used for tree construction */
+struct GradStats {
+  /*! \brief sum gradient statistics */
+  double sum_grad;
+  /*! \brief sum hessian statistics */
+  double sum_hess;
+  /*! \brief constructor, the object must be cleared during construction */
+  explicit GradStats(const TrainParam &param) {
+    this->Clear();
+  }
+  /*! \brief clear the statistics */
+  inline void Clear(void) {
+    sum_grad = sum_hess = 0.0f;
+  }
+  /*!
+   * \brief accumulate statistics,
+   * \param gpair the vector storing the gradient statistics
+   * \param info the additional information 
+   * \param ridx instance index of this instance
+   */
+  inline void Add(const std::vector<bst_gpair> &gpair,
+                  const BoosterInfo &info,
+                  bst_uint ridx) {
+    const bst_gpair &b = gpair[ridx];
+    this->Add(b.grad, b.hess);
+  }
+  /*! \brief caculate leaf weight */
+  inline double CalcWeight(const TrainParam &param) const {
+    return param.CalcWeight(sum_grad, sum_hess);
+  }
+  /*! \brief calculate gain of the solution */
+  inline double CalcGain(const TrainParam &param) const {
+    return param.CalcGain(sum_grad, sum_hess);
+  }
+  /*! \brief add statistics to the data */
+  inline void Add(const GradStats &b) {
+    this->Add(b.sum_grad, b.sum_hess);
+  }
+  /*! \brief set current value to a - b */
+  inline void SetSubstract(const GradStats &a, const GradStats &b) {
+    sum_grad = a.sum_grad - b.sum_grad;
+    sum_hess = a.sum_hess - b.sum_hess;
+  }
+  /*! \return whether the statistics is not used yet */
+  inline bool Empty(void) const {
+    return sum_hess == 0.0;
+  }
+  /*! \brief set leaf vector value based on statistics */
+  inline void SetLeafVec(const TrainParam &param, bst_float *vec) const{
+  }
+ protected:
+  /*! \brief add statistics to the data */
+  inline void Add(double grad, double hess) {
+    sum_grad += grad; sum_hess += hess;
+  }
+};
+
 /*! 
 * \brief statistics that is helpful to store 
 *   and represent a split solution for the tree
--- a/src/tree/updater.h
+++ b/src/tree/updater.h
@@ -60,7 +60,7 @@ namespace tree {
 template<typename FMatrix>
 inline IUpdater<FMatrix>* CreateUpdater(const char *name) {
  if (!strcmp(name, "prune")) return new TreePruner<FMatrix>();
-  if (!strcmp(name, "refresh")) return new TreeRefresher<FMatrix>();
+  if (!strcmp(name, "refresh")) return new TreeRefresher<FMatrix, GradStats>();
  if (!strcmp(name, "grow_colmaker")) return new ColMaker<FMatrix, GradStats>();
  utils::Error("unknown updater:%s", name);
  return NULL;
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -51,8 +51,8 @@ class ColMaker: public IUpdater<FMatrix> {
    /*! \brief current best solution */
    SplitEntry best;
    // constructor
-    ThreadEntry(void) {
-      stats.Clear();
+    explicit ThreadEntry(const TrainParam &param)
+        : stats(param) {
    }
  };
  struct NodeEntry {
@@ -65,8 +65,8 @@ class ColMaker: public IUpdater<FMatrix> {
    /*! \brief current best solution */
    SplitEntry best;
    // constructor
-    NodeEntry(void) : root_gain(0.0f), weight(0.0f){
-      stats.Clear();
+    explicit NodeEntry(const TrainParam &param)
+        : stats(param), root_gain(0.0f), weight(0.0f){
    }
  };
  // actual builder that runs the algorithm
@@ -80,13 +80,13 @@ class ColMaker: public IUpdater<FMatrix> {
                        const BoosterInfo &info,
                        RegTree *p_tree) {
      this->InitData(gpair, fmat, info.root_index, *p_tree);
-      this->InitNewNode(qexpand, gpair, fmat, *p_tree);
+      this->InitNewNode(qexpand, gpair, fmat, info, *p_tree);

      for (int depth = 0; depth < param.max_depth; ++depth) {
-        this->FindSplit(depth, this->qexpand, gpair, fmat, p_tree);
+        this->FindSplit(depth, this->qexpand, gpair, fmat, info, p_tree);
        this->ResetPosition(this->qexpand, fmat, *p_tree);
        this->UpdateQueueExpand(*p_tree, &this->qexpand);
-        this->InitNewNode(qexpand, gpair, fmat, *p_tree);
+        this->InitNewNode(qexpand, gpair, fmat, info, *p_tree);
        // if nothing left to be expand, break
        if (qexpand.size() == 0) break;
      }
@@ -100,6 +100,7 @@ class ColMaker: public IUpdater<FMatrix> {
        p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg;
        p_tree->stat(nid).base_weight = snode[nid].weight;
        p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess);
+        snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid));
      }
    }

@@ -175,34 +176,35 @@ class ColMaker: public IUpdater<FMatrix> {
    inline void InitNewNode(const std::vector<int> &qexpand,
                            const std::vector<bst_gpair> &gpair,
                            const FMatrix &fmat,
+                            const BoosterInfo &info,
                            const RegTree &tree) {
      {// setup statistics space for each tree node
        for (size_t i = 0; i < stemp.size(); ++i) {
-          stemp[i].resize(tree.param.num_nodes, ThreadEntry());
+          stemp[i].resize(tree.param.num_nodes, ThreadEntry(param));
        }
-        snode.resize(tree.param.num_nodes, NodeEntry());
+        snode.resize(tree.param.num_nodes, NodeEntry(param));
      }
      const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
      // setup position
-      const unsigned ndata = static_cast<unsigned>(rowset.size());
+      const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
      #pragma omp parallel for schedule(static)
-      for (unsigned i = 0; i < ndata; ++i) {
+      for (bst_omp_uint i = 0; i < ndata; ++i) {
        const bst_uint ridx = rowset[i];
        const int tid = omp_get_thread_num();
        if (position[ridx] < 0) continue;
-        stemp[tid][position[ridx]].stats.Add(gpair[ridx]);
+        stemp[tid][position[ridx]].stats.Add(gpair, info, ridx);
      }
      // sum the per thread statistics together
      for (size_t j = 0; j < qexpand.size(); ++j) {
        const int nid = qexpand[j];
-        TStats stats; stats.Clear();
+        TStats stats(param);
        for (size_t tid = 0; tid < stemp.size(); ++tid) {
          stats.Add(stemp[tid][nid].stats);
        }
        // update node statistics
        snode[nid].stats = stats;
-        snode[nid].root_gain = param.CalcGain(stats);
-        snode[nid].weight = param.CalcWeight(stats);
+        snode[nid].root_gain = static_cast<float>(stats.CalcGain(param));
+        snode[nid].weight = static_cast<float>(stats.CalcWeight(param));
      }
    }
    /*! \brief update queue expand add in new leaves */
@@ -223,12 +225,15 @@ class ColMaker: public IUpdater<FMatrix> {
    template<typename Iter>
    inline void EnumerateSplit(Iter it, unsigned fid,
                               const std::vector<bst_gpair> &gpair,
+                               const BoosterInfo &info,
                               std::vector<ThreadEntry> &temp,
                               bool is_forward_search) {
      // clear all the temp statistics
      for (size_t j = 0; j < qexpand.size(); ++j) {
        temp[qexpand[j]].stats.Clear();
      }
+      // left statistics
+      TStats c(param);
      while (it.Next()) {
        const bst_uint ridx = it.rindex();
        const int nid = position[ridx];
@@ -239,19 +244,19 @@ class ColMaker: public IUpdater<FMatrix> {
        ThreadEntry &e = temp[nid];
        // test if first hit, this is fine, because we set 0 during init
        if (e.stats.Empty()) {
-          e.stats.Add(gpair[ridx]);
+          e.stats.Add(gpair, info, ridx);
          e.last_fvalue = fvalue;
        } else {
          // try to find a split
          if (fabsf(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
-            TStats c = snode[nid].stats.Substract(e.stats);
+            c.SetSubstract(snode[nid].stats, e.stats);
            if (c.sum_hess >= param.min_child_weight) {
-              double loss_chg = param.CalcGain(e.stats) + param.CalcGain(c) - snode[nid].root_gain;
+              bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
              e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, !is_forward_search);
            }
          }
          // update the statistics
-          e.stats.Add(gpair[ridx]);
+          e.stats.Add(gpair, info, ridx);
          e.last_fvalue = fvalue;
        }
      }
@@ -259,9 +264,9 @@ class ColMaker: public IUpdater<FMatrix> {
      for (size_t i = 0; i < qexpand.size(); ++i) {
        const int nid = qexpand[i];
        ThreadEntry &e = temp[nid];
-        TStats c = snode[nid].stats.Substract(e.stats);
+        c.SetSubstract(snode[nid].stats, e.stats);
        if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
-          const double loss_chg = param.CalcGain(e.stats) + param.CalcGain(c) - snode[nid].root_gain;
+          bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
          const float delta = is_forward_search ? rt_eps : -rt_eps;
          e.best.Update(loss_chg, fid, e.last_fvalue + delta, !is_forward_search);
        }
@@ -269,7 +274,9 @@ class ColMaker: public IUpdater<FMatrix> {
    }
    // find splits at current level, do split per level
    inline void FindSplit(int depth, const std::vector<int> &qexpand,
-                          const std::vector<bst_gpair> &gpair, const FMatrix &fmat,
+                          const std::vector<bst_gpair> &gpair,
+                          const FMatrix &fmat,
+                          const BoosterInfo &info,
                          RegTree *p_tree) {
      std::vector<unsigned> feat_set = feat_index;
      if (param.colsample_bylevel != 1.0f) {
@@ -279,19 +286,19 @@ class ColMaker: public IUpdater<FMatrix> {
        feat_set.resize(n);
      }
      // start enumeration
-      const unsigned nsize = static_cast<unsigned>(feat_set.size());
+      const bst_omp_uint nsize = static_cast<bst_omp_uint>(feat_set.size());
      #if defined(_OPENMP)
      const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
      #endif
      #pragma omp parallel for schedule(dynamic, batch_size)
-      for (unsigned i = 0; i < nsize; ++i) {
+      for (bst_omp_uint i = 0; i < nsize; ++i) {
        const unsigned fid = feat_set[i];
        const int tid = omp_get_thread_num();
        if (param.need_forward_search(fmat.GetColDensity(fid))) {
-          this->EnumerateSplit(fmat.GetSortedCol(fid), fid, gpair, stemp[tid], true);
+          this->EnumerateSplit(fmat.GetSortedCol(fid), fid, gpair, info, stemp[tid], true);
        }
        if (param.need_backward_search(fmat.GetColDensity(fid))) {
-          this->EnumerateSplit(fmat.GetReverseSortedCol(fid), fid, gpair, stemp[tid], false);
+          this->EnumerateSplit(fmat.GetReverseSortedCol(fid), fid, gpair, info, stemp[tid], false);
        }
      }
      // after this each thread's stemp will get the best candidates, aggregate results
@@ -314,9 +321,9 @@ class ColMaker: public IUpdater<FMatrix> {
    inline void ResetPosition(const std::vector<int> &qexpand, const FMatrix &fmat, const RegTree &tree) {
      const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
      // step 1, set default direct nodes to default, and leaf nodes to -1
-      const unsigned ndata = static_cast<unsigned>(rowset.size());
+      const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
      #pragma omp parallel for schedule(static)
-      for (unsigned i = 0; i < ndata; ++i) {
+      for (bst_omp_uint i = 0; i < ndata; ++i) {
        const bst_uint ridx = rowset[i];
        const int nid = position[ridx];
        if (nid >= 0) {
@@ -337,9 +344,9 @@ class ColMaker: public IUpdater<FMatrix> {
      std::sort(fsplits.begin(), fsplits.end());
      fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
      // start put things into right place
-      const unsigned nfeats = static_cast<unsigned>(fsplits.size());
+      const bst_omp_uint nfeats = static_cast<bst_omp_uint>(fsplits.size());
      #pragma omp parallel for schedule(dynamic, 1)
-      for (unsigned i = 0; i < nfeats; ++i) {
+      for (bst_omp_uint i = 0; i < nfeats; ++i) {
        const unsigned fid = fsplits[i];
        for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
          const bst_uint ridx = it.rindex();
--- a/src/tree/updater_refresh-inl.hpp
+++ b/src/tree/updater_refresh-inl.hpp
@@ -13,7 +13,7 @@
 namespace xgboost {
 namespace tree {
 /*! \brief pruner that prunes a tree after growing finishs */
-template<typename FMatrix>
+template<typename FMatrix, typename TStats>
 class TreeRefresher: public IUpdater<FMatrix> {
 public:
  virtual ~TreeRefresher(void) {}
@@ -30,7 +30,7 @@ class TreeRefresher: public IUpdater<FMatrix> {
    // number of threads
    int nthread;
    // thread temporal space
-    std::vector< std::vector<GradStats> > stemp;
+    std::vector< std::vector<TStats> > stemp;
    std::vector<RegTree::FVec> fvec_temp;
    // setup temp space for each thread
    #pragma omp parallel
@@ -38,14 +38,14 @@ class TreeRefresher: public IUpdater<FMatrix> {
      nthread = omp_get_num_threads();
    }
    fvec_temp.resize(nthread, RegTree::FVec());
-    stemp.resize(trees.size() * nthread, std::vector<GradStats>());
+    stemp.resize(trees.size() * nthread, std::vector<TStats>());
    #pragma omp parallel
    {
      int tid = omp_get_thread_num();
      for (size_t i = 0; i < trees.size(); ++i) {
-        std::vector<GradStats> &vec = stemp[tid * trees.size() + i];
-        vec.resize(trees[i]->param.num_nodes);
-        std::fill(vec.begin(), vec.end(), GradStats());
+        std::vector<TStats> &vec = stemp[tid * trees.size() + i];
+        vec.resize(trees[i]->param.num_nodes, TStats(param));
+        std::fill(vec.begin(), vec.end(), TStats(param));
      }
      fvec_temp[tid].Init(trees[0]->param.num_feature);
    }
@@ -56,17 +56,16 @@ class TreeRefresher: public IUpdater<FMatrix> {
      const SparseBatch &batch = iter->Value();
      utils::Check(batch.size < std::numeric_limits<unsigned>::max(),
                   "too large batch size ");
-      const unsigned nbatch = static_cast<unsigned>(batch.size);
+      const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
      #pragma omp parallel for schedule(static)
-      for (unsigned i = 0; i < nbatch; ++i) {
+      for (bst_omp_uint i = 0; i < nbatch; ++i) {
        SparseBatch::Inst inst = batch[i];
        const int tid = omp_get_thread_num();
-        const size_t ridx = batch.base_rowid + i;
+        const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
        RegTree::FVec &feats = fvec_temp[tid];
        feats.Fill(inst);
        for (size_t j = 0; j < trees.size(); ++j) {
-          AddStats(*trees[j], feats, gpair[ridx],
-                   info.GetRoot(j),
+          AddStats(*trees[j], feats, gpair, info, ridx,
                   &stemp[tid * trees.size() + j]);
        }
        feats.Drop(inst);
@@ -95,31 +94,34 @@ class TreeRefresher: public IUpdater<FMatrix> {
 private:
  inline static void AddStats(const RegTree &tree,
                              const RegTree::FVec &feat,
-                              const bst_gpair &gpair, unsigned root_id,
-                              std::vector<GradStats> *p_gstats) {
-    std::vector<GradStats> &gstats = *p_gstats;
+                              const std::vector<bst_gpair> &gpair,
+                              const BoosterInfo &info,
+                              const bst_uint ridx,
+                              std::vector<TStats> *p_gstats) {
+    std::vector<TStats> &gstats = *p_gstats;
    // start from groups that belongs to current data
-    int pid = static_cast<int>(root_id);
-    gstats[pid].Add(gpair);
+    int pid = static_cast<int>(info.GetRoot(ridx));
+    gstats[pid].Add(gpair, info, ridx);
    // tranverse tree
    while (!tree[pid].is_leaf()) {
      unsigned split_index = tree[pid].split_index();
      pid = tree.GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
-      gstats[pid].Add(gpair);
+      gstats[pid].Add(gpair, info, ridx);
    }
  }
-  inline void Refresh(const std::vector<GradStats> &gstats,
+  inline void Refresh(const std::vector<TStats> &gstats,
                      int nid, RegTree *p_tree) {
    RegTree &tree = *p_tree;
-    tree.stat(nid).base_weight = param.CalcWeight(gstats[nid]);
+    tree.stat(nid).base_weight = static_cast<float>(gstats[nid].CalcWeight(param));
    tree.stat(nid).sum_hess = static_cast<float>(gstats[nid].sum_hess);
+    gstats[nid].SetLeafVec(param, tree.leafvec(nid));
    if (tree[nid].is_leaf()) {
      tree[nid].set_leaf(tree.stat(nid).base_weight * param.learning_rate);
    } else {
-      tree.stat(nid).loss_chg =
-          param.CalcGain(gstats[tree[nid].cleft()]) +
-          param.CalcGain(gstats[tree[nid].cright()]) -
-          param.CalcGain(gstats[nid]);
+      tree.stat(nid).loss_chg = static_cast<float>(
+          gstats[tree[nid].cleft()].CalcGain(param) +
+          gstats[tree[nid].cright()].CalcGain(param) -
+          gstats[nid].CalcGain(param));
      this->Refresh(gstats, tree[nid].cleft(), p_tree);
      this->Refresh(gstats, tree[nid].cright(), p_tree);
    }
--- a/src/utils/io.h
+++ b/src/utils/io.h
@@ -40,7 +40,7 @@ class IStream {
   */
  template<typename T>
  inline void Write(const std::vector<T> &vec) {
-    uint64_t sz = vec.size();
+    uint64_t sz = static_cast<uint64_t>(vec.size());
    this->Write(&sz, sizeof(sz));
    if (sz != 0) {
      this->Write(&vec[0], sizeof(T) * sz);
@@ -66,7 +66,7 @@ class IStream {
   * \param str the string to be serialized
   */ 
  inline void Write(const std::string &str) {
-    uint64_t sz = str.length();
+    uint64_t sz = static_cast<uint64_t>(str.length());
    this->Write(&sz, sizeof(sz));
    if (sz != 0) {
      this->Write(&str[0], sizeof(char) * sz);
@@ -102,6 +102,9 @@ class FileStream : public IStream {
  virtual void Write(const void *ptr, size_t size) {
    fwrite(ptr, size, 1, fp);
  }
+  inline void Seek(size_t pos) {
+    fseek(fp, 0, SEEK_SET);
+  }
  inline void Close(void) {
    fclose(fp);
  }
--- a/src/utils/matrix_csr.h
+++ b/src/utils/matrix_csr.h
@@ -17,26 +17,26 @@ namespace utils {
 * \tparam IndexType type of index used to store the index position, usually unsigned or size_t
 * \tparam whether enabling the usage of aclist, this option must be enabled manually
 */
-template<typename IndexType, bool UseAcList = false>
+template<typename IndexType, bool UseAcList = false, typename SizeType = size_t>
 struct SparseCSRMBuilder {
 private:
  /*! \brief dummy variable used in the indicator matrix construction */
  std::vector<size_t> dummy_aclist;
  /*! \brief pointer to each of the row */
-  std::vector<size_t> &rptr;
+  std::vector<SizeType> &rptr;
  /*! \brief index of nonzero entries in each row */
  std::vector<IndexType> &findex;
  /*! \brief a list of active rows, used when many rows are empty */
  std::vector<size_t> &aclist;

 public:
-  SparseCSRMBuilder(std::vector<size_t> &p_rptr,
+  SparseCSRMBuilder(std::vector<SizeType> &p_rptr,
                    std::vector<IndexType> &p_findex)
      :rptr(p_rptr), findex(p_findex), aclist(dummy_aclist) {
    Assert(!UseAcList, "enabling bug");
  }
  /*! \brief use with caution! rptr must be cleaned before use */
-  SparseCSRMBuilder(std::vector<size_t> &p_rptr,
+  SparseCSRMBuilder(std::vector<SizeType> &p_rptr,
                    std::vector<IndexType> &p_findex,
                    std::vector<size_t> &p_aclist)
      :rptr(p_rptr), findex(p_findex), aclist(p_aclist) {
@@ -62,7 +62,7 @@ struct SparseCSRMBuilder {
   * \param row_id the id of the row
   * \param nelem  number of element budget add to this row
   */
-  inline void AddBudget(size_t row_id, size_t nelem = 1) {
+  inline void AddBudget(size_t row_id, SizeType nelem = 1) {
    if (rptr.size() < row_id + 2) {
      rptr.resize(row_id + 2, 0);
    }
@@ -101,7 +101,7 @@ struct SparseCSRMBuilder {
   * element to each row, the number of calls shall be exactly same as add_budget
   */
  inline void PushElem(size_t row_id, IndexType col_id) {
-    size_t &rp = rptr[row_id + 1];
+    SizeType &rp = rptr[row_id + 1];
    findex[rp++] = col_id;
  }
  /*!
--- a/src/utils/omp.h
+++ b/src/utils/omp.h
@@ -9,10 +9,26 @@
 #include <omp.h>
 #else
 #ifndef DISABLE_OPENMP
-#warning "OpenMP is not available, compile to single thread code"
+#ifndef _MSC_VER
+#warning "OpenMP is not available, compile to single thread code."\
+		 "You may want to ungrade your compiler to enable OpenMP support,"\
+		 "to get benefit of multi-threading."
+#else
+// TODO add warning for msvc
+#endif
 #endif
 inline int omp_get_thread_num() { return 0; }
 inline int omp_get_num_threads() { return 1; }
 inline void omp_set_num_threads(int nthread) {}
 #endif
+
+// loop variable used in openmp
+namespace xgboost {
+#ifdef _MSC_VER
+typedef int bst_omp_uint;
+#else
+typedef unsigned bst_omp_uint;
+#endif
+} // namespace xgboost
+
 #endif  // XGBOOST_UTILS_OMP_H_
--- a/src/utils/random.h
+++ b/src/utils/random.h
@@ -88,11 +88,21 @@ inline void Shuffle(std::vector<T> &data) {
 struct Random{
  /*! \brief set random number seed */
  inline void Seed(unsigned sd) {
-    this->rseed = sd;
+	 this->rseed = sd;
+#if defined(_MSC_VER)||defined(_WIN32)
+    srand(rseed);
+#endif
  }
  /*! \brief return a real number uniform in [0,1) */
  inline double RandDouble(void) {
-    return static_cast<double>( rand_r( &rseed ) ) / (static_cast<double>( RAND_MAX )+1.0);
+	// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
+	// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
+	// todo, replace with another PRNG
+#if defined(_MSC_VER)||defined(_WIN32)
+    return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX) + 1.0);
+#else
+    return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
+#endif
  }
  // random number seed
  unsigned rseed;
--- a/src/utils/utils.h
+++ b/src/utils/utils.h
@@ -6,8 +6,15 @@
 * \author Tianqi Chen
 */
 #define _CRT_SECURE_NO_WARNINGS
+#include <cstdio>
+#include <cstdarg>
+#include <cstdlib>
 #ifdef _MSC_VER
 #define fopen64 fopen
+// NOTE: sprintf_s is not equivalent to snprintf, 
+// they are equivalent when success, which is sufficient for our case
+#define snprintf sprintf_s
+#define vsnprintf vsprintf_s
 #else
 #ifdef _FILE_OFFSET_BITS
 #if _FILE_OFFSET_BITS == 32
@@ -36,49 +43,68 @@ typedef long int64_t;
 #include <inttypes.h>
 #endif

-
-#include <cstdio>
-#include <cstdarg>
-#include <cstdlib>
-
 namespace xgboost {
 /*! \brief namespace for helper utils of the project */
 namespace utils {
+/*! \brief error message buffer length */
+const int kErrorBuffer = 1 << 12;
+
+#ifndef XGBOOST_CUSTOMIZE_ERROR_
+/*! 
+ * \brief handling of Assert error, caused by in-apropriate input
+ * \param msg error message 
+ */
+inline void HandleAssertError(const char *msg) {
+  fprintf(stderr, "AssertError:%s\n", msg);
+  exit(-1);
+}
+/*! 
+ * \brief handling of Check error, caused by in-apropriate input
+ * \param msg error message 
+ */
+inline void HandleCheckError(const char *msg) {
+  fprintf(stderr, "%s\n", msg);
+  exit(-1);
+}
+#else
+// include declarations, some one must implement this
+void HandleAssertError(const char *msg);
+void HandleCheckError(const char *msg);
+#endif

 /*! \brief assert an condition is true, use this to handle debug information */
 inline void Assert(bool exp, const char *fmt, ...) {
  if (!exp) {
+    std::string msg(kErrorBuffer, '\0');
    va_list args;
    va_start(args, fmt);
-    fprintf(stderr, "AssertError:");
-    vfprintf(stderr, fmt, args);
+    vsnprintf(&msg[0], kErrorBuffer, fmt, args);
    va_end(args);
-    fprintf(stderr, "\n");
-    exit(-1);
+    HandleAssertError(msg.c_str());
  }
 }

 /*!\brief same as assert, but this is intended to be used as message for user*/
 inline void Check(bool exp, const char *fmt, ...) {
  if (!exp) {
+    std::string msg(kErrorBuffer, '\0');
    va_list args;
    va_start(args, fmt);
-    vfprintf(stderr, fmt, args);
+    vsnprintf(&msg[0], kErrorBuffer, fmt, args);
    va_end(args);
-    fprintf(stderr, "\n");
-    exit(-1);
+    HandleCheckError(msg.c_str());
  }
 }

 /*! \brief report error message, same as check */
 inline void Error(const char *fmt, ...) {
  {
+    std::string msg(kErrorBuffer, '\0');
    va_list args;
    va_start(args, fmt);
-    vfprintf(stderr, fmt, args);
+    vsnprintf(&msg[0], kErrorBuffer, fmt, args);
    va_end(args);
-    fprintf(stderr, "\n");
-    exit(-1);
+    HandleCheckError(msg.c_str());
  }
 }

--- a/windows/README.md
+++ b/windows/README.md
@@ -0,0 +1 @@
+This is a test for minimal files needed for windows version
--- a/windows/xgboost.sln
+++ b/windows/xgboost.sln
@@ -0,0 +1,26 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xgboost", "xgboost\xgboost.vcxproj", "{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Debug|Win32.ActiveCfg = Debug|Win32
+		{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Debug|Win32.Build.0 = Debug|Win32
+		{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Debug|x64.ActiveCfg = Debug|x64
+		{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Debug|x64.Build.0 = Debug|x64
+		{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Release|Win32.ActiveCfg = Release|Win32
+		{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Release|Win32.Build.0 = Release|Win32
+		{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Release|x64.ActiveCfg = Release|x64
+		{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/windows/xgboost/xgboost.vcxproj
+++ b/windows/xgboost/xgboost.vcxproj
@@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}</ProjectGuid>
+    <RootNamespace>xgboost</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup />
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <OpenMPSupport>true</OpenMPSupport>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\io\io.cpp" />
+    <ClCompile Include="..\..\src\xgboost_main.cpp" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
--- a/wrapper/R-example/demo.R
+++ b/wrapper/R-example/demo.R
@@ -35,7 +35,7 @@ bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
 # make prediction
 preds <- xgb.predict(bst, dtest)
 labels <- xgb.getinfo(dtest, "label")
-err <- as.real(sum(as.integer(preds > 0.5) != labels)) / length(labels)
+err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
 # print error rate
 print(paste("error=",err))

@@ -100,7 +100,7 @@ logregobj <- function(preds, dtrain) {
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
 evalerror <- function(preds, dtrain) {
  labels <- xgb.getinfo(dtrain, "label")
-  err <- as.real(sum(labels != (preds > 0.0))) / length(labels)
+  err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
  return(list(metric="error", value=err))
 }

--- a/wrapper/python-example/demo.py
+++ b/wrapper/python-example/demo.py
@@ -13,7 +13,7 @@ dtrain = xgb.DMatrix('agaricus.txt.train')
 dtest = xgb.DMatrix('agaricus.txt.test')

 # specify parameters via map, definition are same as c++ version
-param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }

 # specify validations set to watch performance
 evallist  = [(dtest,'eval'), (dtrain,'train')]
@@ -75,7 +75,7 @@ print ('start running example to used cutomized objective function')
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
-param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
+param = {'max_depth':2, 'eta':1, 'silent':1 }

 # user define objective function, given prediction, return gradient and second order gradient
 # this is loglikelihood loss
@@ -107,7 +107,7 @@ bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
 #
 print ('start running example to start from a initial prediction')
 # specify parameters via map, definition are same as c++ version
-param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
 # train xgboost for 1 round
 bst = xgb.train( param, dtrain, 1, evallist )
 # Note: we need the margin value instead of transformed prediction in set_base_margin
--- a/wrapper/xgboost.R
+++ b/wrapper/xgboost.R
@@ -48,15 +48,15 @@ xgb.setinfo <- function(dmat, name, info) {
    stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
  }
  if (name == "label") {
-    .Call("XGDMatrixSetInfo_R", dmat, name, as.real(info))
+    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info))
    return(TRUE)
  }
  if (name == "weight") {
-    .Call("XGDMatrixSetInfo_R", dmat, name, as.real(info))
+    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info))
    return(TRUE)
  }
  if (name == "base_margin") {
-    .Call("XGDMatrixSetInfo_R", dmat, name, as.real(info))
+    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info))
    return(TRUE)
  }
  if (name == "group") {
@@ -214,7 +214,7 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
      if (length(names(w)) == 0) {
        stop("xgb.eval: name tag must be presented for every elements in watchlist")
      }
-      evnames <- append(evnames, names(w))
+      evnames <- append(evnames, names(w))     
    }
  }
  msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames)
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -19,6 +19,7 @@ xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
 xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
 xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
 xglib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
+xglib.XGDMatrixGetUIntInfo.restype = ctypes.POINTER(ctypes.c_uint)
 xglib.XGDMatrixNumRow.restype = ctypes.c_ulong

 xglib.XGBoosterCreate.restype = ctypes.c_void_p
@@ -27,10 +28,10 @@ xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
 xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)


-def ctypes2numpy(cptr, length):
+def ctypes2numpy(cptr, length, dtype):
    # convert a ctypes pointer array to numpy
    assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
-    res = numpy.zeros(length, dtype='float32')
+    res = numpy.zeros(length, dtype=dtype)
    assert ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0])
    return res

@@ -44,7 +45,7 @@ class DMatrix:
            return
        if isinstance(data, str):
            self.handle = ctypes.c_void_p(
-                xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1))
+                xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 0))
        elif isinstance(data, scp.csr_matrix):
            self.__init_from_csr(data)
        elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
@@ -76,23 +77,31 @@ class DMatrix:
    # destructor
    def __del__(self):
        xglib.XGDMatrixFree(self.handle)
-    def __get_float_info(self, field):
+    def get_float_info(self, field):
        length = ctypes.c_ulong()
        ret = xglib.XGDMatrixGetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
                                          ctypes.byref(length))
-        return ctypes2numpy(ret, length.value)
-    def __set_float_info(self, field, data):
-        xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')),
+        return ctypes2numpy(ret, length.value, 'float32')
+    def get_uint_info(self, field):
+        length = ctypes.c_ulong()
+        ret = xglib.XGDMatrixGetUIntInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
+                                         ctypes.byref(length))
+        return ctypes2numpy(ret, length.value, 'uint32')
+    def set_float_info(self, field, data):
+        xglib.XGDMatrixSetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
                                    (ctypes.c_float*len(data))(*data), len(data))
+    def set_uint_info(self, field, data):
+        xglib.XGDMatrixSetUIntInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
+                                   (ctypes.c_uint*len(data))(*data), len(data))
    # load data from file
    def save_binary(self, fname, silent=True):
        xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
    # set label of dmatrix
    def set_label(self, label):
-        self.__set_float_info('label', label)
+        self.set_float_info('label', label)
    # set weight of each instances
    def set_weight(self, weight):
-        self.__set_float_info('weight', weight)
+        self.set_float_info('weight', weight)
    # set initialized margin prediction
    def set_base_margin(self, margin):
        """
@@ -103,19 +112,19 @@ class DMatrix:
        e.g. for logistic regression: need to put in value before logistic transformation
        see also example/demo.py
        """
-        self.__set_float_info('base_margin', margin)
+        self.set_float_info('base_margin', margin)
    # set group size of dmatrix, used for rank
    def set_group(self, group):
        xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
    # get label from dmatrix
    def get_label(self):
-        return self.__get_float_info('label')
+        return self.get_float_info('label')
    # get weight from dmatrix
    def get_weight(self):
-        return self.__get_float_info('weight')
+        return self.get_float_info('weight')
    # get base_margin from dmatrix
    def get_base_margin(self):
-        return self.__get_float_info('base_margin')
+        return self.get_float_info('base_margin')
    def num_row(self):
        return xglib.XGDMatrixNumRow(self.handle)
    # slice the DMatrix to return a new DMatrix that only contains rindex
@@ -189,7 +198,7 @@ class Booster:
        length = ctypes.c_ulong()
        preds = xglib.XGBoosterPredict(self.handle, data.handle,
                                       int(output_margin), ctypes.byref(length))
-        return ctypes2numpy(preds, length.value)
+        return ctypes2numpy(preds, length.value, 'float32')
    def save_model(self, fname):
        """ save model to file """
        xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
--- a/wrapper/xgboost_R.cpp
+++ b/wrapper/xgboost_R.cpp
@@ -2,13 +2,24 @@
 #include <string>
 #include <utility>
 #include <cstring>
-#include "xgboost_wrapper.h"
 #include "xgboost_R.h"
+#include "xgboost_wrapper.h"
 #include "../src/utils/utils.h"
 #include "../src/utils/omp.h"
 #include "../src/utils/matrix_csr.h"

 using namespace xgboost;
+// implements error handling
+namespace xgboost {
+namespace utils {
+void HandleAssertError(const char *msg) {
+  error("%s", msg);
+}
+void HandleCheckError(const char *msg) {
+  error("%s", msg);
+}
+}  // namespace utils
+}  // namespace xgboost

 extern "C" {
  void _DMatrixFinalizer(SEXP ext) {    
@@ -51,9 +62,9 @@ extern "C" {
    int ncol = length(indptr) - 1;
    int ndata = length(data);
    // transform into CSR format
-    std::vector<size_t> row_ptr;
+    std::vector<bst_ulong> row_ptr;
    std::vector< std::pair<unsigned, float> > csr_data;
-    utils::SparseCSRMBuilder< std::pair<unsigned,float> > builder(row_ptr, csr_data);
+    utils::SparseCSRMBuilder<std::pair<unsigned,float>, false, bst_ulong> builder(row_ptr, csr_data);
    builder.InitBudget();
    for (int i = 0; i < ncol; ++i) {
      for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
@@ -108,7 +119,7 @@ extern "C" {
    }
  }
  SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
-    size_t olen;
+    bst_ulong olen;
    const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
                                             CHAR(asChar(field)), &olen);
    SEXP ret = PROTECT(allocVector(REALSXP, olen));
@@ -165,17 +176,19 @@ extern "C" {
    std::vector<void*> vec_dmats;
    std::vector<std::string> vec_names;
    std::vector<const char*> vec_sptr;
-    for (int i = 0; i < len; ++i){
+    for (int i = 0; i < len; ++i) {
      vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
      vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
-      vec_sptr.push_back(vec_names.back().c_str());
+    }
+    for (int i = 0; i < len; ++i) {
+      vec_sptr.push_back(vec_names[i].c_str());
    }
    return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
                                         asInteger(iter),
-                                         &vec_dmats[0], &vec_sptr[0], len)); 
+                                         &vec_dmats[0], &vec_sptr[0], len));
  }
  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
-    size_t olen;
+    bst_ulong olen;
    const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
                                        R_ExternalPtrAddr(dmat),
                                        asInteger(output_margin),
@@ -194,13 +207,13 @@ extern "C" {
    XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
  }
  void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
-    size_t olen;
+    bst_ulong olen;
    const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
                                          CHAR(asChar(fmap)),
                                          &olen);
    FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
    for (size_t i = 0; i < olen; ++i) {
-      fprintf(fo, "booster[%lu]:\n", i);
+      fprintf(fo, "booster[%u]:\n", static_cast<unsigned>(i));
      fprintf(fo, "%s", res[i]);
    }
    fclose(fo);
--- a/wrapper/xgboost_wrapper.cpp
+++ b/wrapper/xgboost_wrapper.cpp
@@ -23,18 +23,18 @@ class Booster: public learner::BoostLearner<FMatrixS> {
    this->init_model = false;
    this->SetCacheData(mats);
  }
-  const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) {
+  const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) {
    this->CheckInitModel();
    this->Predict(dmat, output_margin, &this->preds_);
    *len = this->preds_.size();
    return &this->preds_[0];
  }
  inline void BoostOneIter(const DataMatrix &train,
-                           float *grad, float *hess, size_t len) {
+                           float *grad, float *hess, bst_ulong len) {
    this->gpair_.resize(len);
-    const unsigned ndata = static_cast<unsigned>(len);
+    const bst_omp_uint ndata = static_cast<bst_omp_uint>(len);
    #pragma omp parallel for schedule(static)
-    for (unsigned j = 0; j < ndata; ++j) {
+    for (bst_omp_uint j = 0; j < ndata; ++j) {
      gpair_[j] = bst_gpair(grad[j], hess[j]);
    }
    gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
@@ -48,7 +48,7 @@ class Booster: public learner::BoostLearner<FMatrixS> {
    learner::BoostLearner<FMatrixS>::LoadModel(fname);
    this->init_model = true;
  }
-  inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
+  inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, bst_ulong *len) {
    model_dump = this->DumpModel(fmap, with_stats);
    model_dump_cptr.resize(model_dump.size());
    for (size_t i = 0; i < model_dump.size(); ++i) {
@@ -76,35 +76,37 @@ extern "C"{
  void* XGDMatrixCreateFromFile(const char *fname, int silent) {
    return LoadDataMatrix(fname, silent, false);
  }
-  void* XGDMatrixCreateFromCSR(const size_t *indptr,
+  void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
                               const unsigned *indices,
                               const float *data,
-                               size_t nindptr,
-                               size_t nelem) {
+                               bst_ulong nindptr,
+                               bst_ulong nelem) {
    DMatrixSimple *p_mat = new DMatrixSimple();
    DMatrixSimple &mat = *p_mat;
    mat.row_ptr_.resize(nindptr);
-    memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
-    mat.row_data_.resize(nelem);
-    for (size_t i = 0; i < nelem; ++i) {
-      mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
-      mat.info.num_col = std::max(mat.info.num_col,
-                                  static_cast<size_t>(indices[i]+1));
+    for (bst_ulong i = 0; i < nindptr; ++i) {
+      mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
    }
-    mat.info.num_row = nindptr - 1;
+    mat.row_data_.resize(nelem);
+    for (bst_ulong i = 0; i < nelem; ++i) {
+      mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
+      mat.info.info.num_col = std::max(mat.info.info.num_col,
+                                       static_cast<size_t>(indices[i]+1));
+    }
+    mat.info.info.num_row = nindptr - 1;
    return p_mat;
  }
  void* XGDMatrixCreateFromMat(const float *data,
-                               size_t nrow,
-                               size_t ncol,
+                               bst_ulong nrow,
+                               bst_ulong ncol,
                               float  missing) {
    DMatrixSimple *p_mat = new DMatrixSimple();
    DMatrixSimple &mat = *p_mat;
-    mat.info.num_row = nrow;
-    mat.info.num_col = ncol;
-    for (size_t i = 0; i < nrow; ++i, data += ncol) {
-      size_t nelem = 0;
-      for (size_t j = 0; j < ncol; ++j) {
+    mat.info.info.num_row = nrow;
+    mat.info.info.num_col = ncol;
+    for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
+      bst_ulong nelem = 0;
+      for (bst_ulong j = 0; j < ncol; ++j) {
        if (data[j] != missing) {
          mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
          ++nelem;
@@ -116,7 +118,7 @@ extern "C"{
  }
  void* XGDMatrixSliceDMatrix(void *handle,
                              const int *idxset,
-                              size_t len) {
+                              bst_ulong len) {
    DMatrixSimple tmp;
    DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
    if (dsrc.magic != DMatrixSimple::kMagic) {
@@ -130,17 +132,17 @@ extern "C"{
    utils::Check(src.info.group_ptr.size() == 0,
                 "slice does not support group structure");
    ret.Clear();
-    ret.info.num_row = len;
-    ret.info.num_col = src.info.num_col;
+    ret.info.info.num_row = len;
+    ret.info.info.num_col = src.info.num_col();

    utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
    iter->BeforeFirst();
    utils::Assert(iter->Next(), "slice");
    const SparseBatch &batch = iter->Value();
-    for (size_t i = 0; i < len; ++i) {
+    for (bst_ulong i = 0; i < len; ++i) {
      const int ridx = idxset[i];
      SparseBatch::Inst inst = batch[ridx];
-      utils::Check(static_cast<size_t>(ridx) < batch.size, "slice index exceed number of rows");
+      utils::Check(static_cast<bst_ulong>(ridx) < batch.size, "slice index exceed number of rows");
      ret.row_data_.resize(ret.row_data_.size() + inst.length);
      memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
             sizeof(SparseBatch::Entry) * inst.length);
@@ -163,34 +165,46 @@ extern "C"{
  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
    SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
  }
-  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) {
+  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) {
    std::vector<float> &vec = 
-        static_cast<DataMatrix*>(handle)->info.GetInfo(field);
+        static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
    vec.resize(len);
    memcpy(&vec[0], info, sizeof(float) * len);
  }
-  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) {
+  void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
+    std::vector<unsigned> &vec =
+        static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
+    vec.resize(len);
+    memcpy(&vec[0], info, sizeof(unsigned) * len);
+  }
+  void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
    pmat->info.group_ptr.resize(len + 1);
    pmat->info.group_ptr[0] = 0;
-    for (size_t i = 0; i < len; ++i) {
+    for (bst_ulong i = 0; i < len; ++i) {
      pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
    }
  }
-  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) {
+  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) {
    const std::vector<float> &vec =
-        static_cast<const DataMatrix*>(handle)->info.GetInfo(field);
+        static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
    *len = vec.size();
    return &vec[0];
  }
-  size_t XGDMatrixNumRow(const void *handle) {
-    return static_cast<const DataMatrix*>(handle)->info.num_row;
+  const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
+    const std::vector<unsigned> &vec =
+        static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
+    *len = vec.size();
+    return &vec[0];
+  }
+  bst_ulong XGDMatrixNumRow(const void *handle) {
+    return static_cast<const DataMatrix*>(handle)->info.num_row();
  }

  // xgboost implementation
-  void *XGBoosterCreate(void *dmats[], size_t len) {
+  void *XGBoosterCreate(void *dmats[], bst_ulong len) {
    std::vector<DataMatrix*> mats;
-    for (size_t i = 0; i < len; ++i) {
+    for (bst_ulong i = 0; i < len; ++i) {
      DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
      mats.push_back(dtr);
    }
@@ -210,7 +224,7 @@ extern "C"{
    bst->UpdateOneIter(iter, *dtr);
  }
  void XGBoosterBoostOneIter(void *handle, void *dtrain,
-                             float *grad, float *hess, size_t len) {
+                             float *grad, float *hess, bst_ulong len) {
    Booster *bst = static_cast<Booster*>(handle);
    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
    bst->CheckInitModel();
@@ -218,11 +232,11 @@ extern "C"{
    bst->BoostOneIter(*dtr, grad, hess, len);
  }
  const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
-                                   const char *evnames[], size_t len) {
+                                   const char *evnames[], bst_ulong len) {
    Booster *bst = static_cast<Booster*>(handle);
    std::vector<std::string> names;
    std::vector<const DataMatrix*> mats;
-    for (size_t i = 0; i < len; ++i) {
+    for (bst_ulong i = 0; i < len; ++i) {
      mats.push_back(static_cast<DataMatrix*>(dmats[i]));
      names.push_back(std::string(evnames[i]));
    }
@@ -230,7 +244,7 @@ extern "C"{
    bst->eval_str = bst->EvalOneIter(iter, mats, names);
    return bst->eval_str.c_str();
  }
-  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) {
+  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) {
    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
  }
  void XGBoosterLoadModel(void *handle, const char *fname) {
@@ -239,7 +253,7 @@ extern "C"{
  void XGBoosterSaveModel(const void *handle, const char *fname) {
    static_cast<const Booster*>(handle)->SaveModel(fname);
  }
-  const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
+  const char** XGBoosterDumpModel(void *handle, const char *fmap, bst_ulong *len){
    utils::FeatMap featmap;
    if (strlen(fmap) != 0) {
      featmap.LoadText(fmap);
--- a/wrapper/xgboost_wrapper.h
+++ b/wrapper/xgboost_wrapper.h
@@ -7,13 +7,16 @@
 *  can be used to create wrapper of other languages
 */
 #include <cstdio>
+#define XGB_DLL
+// manually define unsign long
+typedef unsigned long bst_ulong;

 extern "C" {
  /*!
   * \brief load a data matrix 
   * \return a loaded data matrix
   */
-  void* XGDMatrixCreateFromFile(const char *fname, int silent);
+  XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
  /*! 
   * \brief create a matrix content from csr format
   * \param indptr pointer to row headers
@@ -23,11 +26,11 @@ extern "C" {
   * \param nelem number of nonzero elements in the matrix
   * \return created dmatrix
   */
-  void* XGDMatrixCreateFromCSR(const size_t *indptr,
-                               const unsigned *indices,
-                               const float *data,
-                               size_t nindptr,
-                               size_t nelem);
+  XGB_DLL void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
+                                       const unsigned *indices,
+                                       const float *data,
+                                       bst_ulong nindptr,
+                                       bst_ulong nelem);
  /*!
   * \brief create matrix content from dense matrix
   * \param data pointer to the data space
@@ -36,10 +39,10 @@ extern "C" {
   * \param missing which value to represent missing value
   * \return created dmatrix
   */
-  void* XGDMatrixCreateFromMat(const float *data,
-                               size_t nrow,
-                               size_t ncol,
-                               float  missing);
+  XGB_DLL void* XGDMatrixCreateFromMat(const float *data,
+                                       bst_ulong nrow,
+                                       bst_ulong ncol,
+                                       float  missing);
  /*!
   * \brief create a new dmatrix from sliced content of existing matrix
   * \param handle instance of data matrix to be sliced
@@ -47,20 +50,20 @@ extern "C" {
   * \param len length of index set
   * \return a sliced new matrix
   */
-  void* XGDMatrixSliceDMatrix(void *handle,
-                              const int *idxset,
-                              size_t len);
+  XGB_DLL void* XGDMatrixSliceDMatrix(void *handle,
+                                      const int *idxset,
+                                      bst_ulong len);
  /*!
   * \brief free space in data matrix
   */
-  void XGDMatrixFree(void *handle);
+  XGB_DLL void XGDMatrixFree(void *handle);
  /*!
   * \brief load a data matrix into binary file
   * \param handle a instance of data matrix
   * \param fname file name
   * \param silent print statistics when saving
   */
-  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
+  XGB_DLL void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
  /*!
   * \brief set float vector to a content in info
   * \param handle a instance of data matrix
@@ -68,52 +71,68 @@ extern "C" {
   * \param array pointer to float vector
   * \param len length of array
   */
-  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len);
+  XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, bst_ulong len);
+  /*!
+   * \brief set uint32 vector to a content in info
+   * \param handle a instance of data matrix
+   * \param field field name
+   * \param array pointer to float vector
+   * \param len length of array
+   */
+  XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, bst_ulong len);
  /*!
   * \brief set label of the training matrix
   * \param handle a instance of data matrix
   * \param group pointer to group size
   * \param len length of array
   */
-  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
+  XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len);
  /*!
   * \brief get float info vector from matrix
   * \param handle a instance of data matrix
   * \param field field name
   * \param out_len used to set result length
-   * \return pointer to the label
+   * \return pointer to the result
   */
-  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
+  XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* out_len);
+  /*!
+   * \brief get uint32 info vector from matrix
+   * \param handle a instance of data matrix
+   * \param field field name
+   * \param out_len used to set result length
+   * \return pointer to the result
+   */
+  XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* out_len);
  /*!
   * \brief return number of rows
   */
-  size_t XGDMatrixNumRow(const void *handle);
+  XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle);
  // --- start XGBoost class
  /*! 
   * \brief create xgboost learner 
   * \param dmats matrices that are set to be cached
   * \param len length of dmats
   */
-  void *XGBoosterCreate(void* dmats[], size_t len);
+  XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len);
  /*! 
   * \brief free obj in handle 
   * \param handle handle to be freed
   */
-  void XGBoosterFree(void* handle);
+  XGB_DLL void XGBoosterFree(void* handle);
  /*! 
   * \brief set parameters 
   * \param handle handle
   * \param name  parameter name
   * \param val value of parameter
   */    
-  void XGBoosterSetParam(void *handle, const char *name, const char *value);
+  XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value);
  /*! 
   * \brief update the model in one round using dtrain
   * \param handle handle
   * \param iter current iteration rounds
   * \param dtrain training data
   */
-  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
+  XGB_DLL void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
  /*!
   * \brief update the model, by directly specify gradient and second order gradient,
   *        this can be used to replace UpdateOneIter, to support customized loss function
@@ -123,8 +142,8 @@ extern "C" {
   * \param hess second order gradient statistics
   * \param len length of grad/hess array
   */
-  void XGBoosterBoostOneIter(void *handle, void *dtrain,
-                             float *grad, float *hess, size_t len);
+  XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain,
+                                     float *grad, float *hess, bst_ulong len);
  /*!
   * \brief get evaluation statistics for xgboost
   * \param handle handle
@@ -134,8 +153,8 @@ extern "C" {
   * \param len length of dmats
   * \return the string containing evaluation stati
   */
-  const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
-                                   const char *evnames[], size_t len);
+  XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
+                                           const char *evnames[], bst_ulong len);
  /*!
   * \brief make prediction based on dmat
   * \param handle handle
@@ -143,19 +162,19 @@ extern "C" {
   * \param output_margin whether only output raw margin value
   * \param len used to store length of returning result
   */
-  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len);
+  XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len);
  /*!
   * \brief load model from existing file
   * \param handle handle
   * \param fname file name
   */
-  void XGBoosterLoadModel(void *handle, const char *fname);
+  XGB_DLL void XGBoosterLoadModel(void *handle, const char *fname);
  /*!
   * \brief save model into existing file
   * \param handle handle
   * \param fname file name
   */
-  void XGBoosterSaveModel(const void *handle, const char *fname);
+  XGB_DLL void XGBoosterSaveModel(const void *handle, const char *fname);
  /*!
   * \brief dump model, return array of strings representing model dump
   * \param handle handle
@@ -163,7 +182,7 @@ extern "C" {
   * \param out_len length of output array
   * \return char *data[], representing dump of each model
   */
-  const char **XGBoosterDumpModel(void *handle, const char *fmap,
-                                  size_t *out_len);
+  XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap,
+                                          bst_ulong *out_len);
 };
 #endif  // XGBOOST_WRAPPER_H_
				`@@ -0,0 +1 @@`
				`This is a test for minimal files needed for windows version`