diff --git a/.gitignore b/.gitignore
index 4551c79cc..220fc602a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,12 +6,15 @@
 # Compiled Dynamic libraries
 *.so
 *.dylib
-
+*.page
 # Compiled Static libraries
 *.lai
 *.la
 *.a
 *~
+*.Rcheck
+*.rds
+*.tar.gz
 *txt*
 *conf
 *buffer
diff --git a/Makefile b/Makefile
index 5599f3ab4..28a289ac6 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@ BIN = xgboost
 OBJ = updater.o gbm.o io.o
 SLIB = wrapper/libxgboostwrapper.so 
 
-.PHONY: clean all python 
+.PHONY: clean all python Rpack
 
 all: $(BIN) $(OBJ) $(SLIB) 
 
@@ -40,19 +40,25 @@ $(OBJ) :
 install:
 	cp -f -r $(BIN)  $(INSTALL_PATH)
 
-R-package.tar.gz:
-	rm -rf xgboost-R
-	cp -r R-package xgboost-R
-	rm -rf xgboost-R/src/*.o xgboost-R/src/*.so xgboost-R/src/*.dll
-	cp -r src xgboost-R/src/src
-	mkdir xgboost-R/src/wrapper
-	cp  wrapper/xgboost_wrapper.h xgboost-R/src/wrapper
-	cp  wrapper/xgboost_wrapper.cpp xgboost-R/src/wrapper
-	cp ./LICENSE xgboost-R
-	cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars
-	cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars.win
-	tar czf $@ xgboost-R
-	rm -rf xgboost-R
+Rpack:
+	make clean
+	rm -rf xgboost xgboost*.tar.gz
+	cp -r R-package xgboost
+	rm -rf xgboost/inst/examples/*.buffer
+	rm -rf xgboost/inst/examples/*.model
+	rm -rf xgboost/inst/examples/dump*
+	rm -rf xgboost/src/*.o xgboost/src/*.so xgboost/src/*.dll
+	rm -rf xgboost/demo/*.model xgboost/demo/*.buffer
+	cp -r src xgboost/src/src
+	mkdir xgboost/src/wrapper
+	cp  wrapper/xgboost_wrapper.h xgboost/src/wrapper
+	cp  wrapper/xgboost_wrapper.cpp xgboost/src/wrapper
+	cp ./LICENSE xgboost
+	cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
+	cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
+	R CMD build xgboost
+	rm -rf xgboost
+	R CMD check --as-cran xgboost*.tar.gz
 
 clean:
-	$(RM) $(OBJ) $(BIN) $(SLIB) *.o *~ */*~ */*/*~
+	$(RM) $(OBJ) $(BIN) $(SLIB) *.o  */*.o */*/*.o *~ */*~ */*/*~
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 7d60143bd..40705e317 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,12 +1,20 @@
 Package: xgboost
 Type: Package
-Title: R wrapper of xgboost
+Title: eXtreme Gradient Boosting
 Version: 0.3-0
 Date: 2014-08-23
-Author: Tianqi Chen, Tong He
-Maintainer: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
-Description: xgboost
-License: file LICENSE
+Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
+Maintainer: Tong He <hetong007@gmail.com>
+Description: This package is a R wrapper of xgboost, which is short for eXtreme
+    Gradient Boosting. It is an efficient and scalable implementation of
+    gradient boosting framework. The package includes efficient linear model
+    solver and tree learning algorithm. The package can automatically do
+    parallel computation with OpenMP, and it can be more than 10 times faster
+    than existing gradient boosting packages such as gbm. It supports various
+    objective functions, including regression, classification and ranking. The
+    package is made to be extensible, so that user are also allowed to define
+    their own objectives easily.
+License: Apache License (== 2.0) | file LICENSE
 URL: https://github.com/tqchen/xgboost
 BugReports: https://github.com/tqchen/xgboost/issues
 Depends:
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 3fc74663e..4a7cb9465 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -10,5 +10,6 @@ export(xgb.save)
 export(xgb.train)
 export(xgboost)
 exportMethods(predict)
+import(methods)
 importClassesFrom(Matrix,dgCMatrix)
 importClassesFrom(Matrix,dgeMatrix)
diff --git a/R-package/R/getinfo.xgb.DMatrix.R b/R-package/R/getinfo.xgb.DMatrix.R
index 5b438049c..3a79fd2fb 100644
--- a/R-package/R/getinfo.xgb.DMatrix.R
+++ b/R-package/R/getinfo.xgb.DMatrix.R
@@ -4,20 +4,23 @@ setClass('xgb.DMatrix')
 #' 
 #' Get information of an xgb.DMatrix object
 #' 
-#' @param object Object of class "xgb.DMatrix"
-#' @param name the name of the field to get
-#' 
 #' @examples
 #' data(iris)
 #' iris[,5] <- as.numeric(iris[,5])
 #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 #' labels <- getinfo(dtrain, "label")
+#' @rdname getinfo
 #' @export
 #' 
 getinfo <- function(object, ...){
     UseMethod("getinfo")
 }
 
+#' @param object Object of class "xgb.DMatrix"
+#' @param name the name of the field to get
+#' @param ... other parameters
+#' @rdname getinfo
+#' @method getinfo xgb.DMatrix
 setMethod("getinfo", signature = "xgb.DMatrix", 
           definition = function(object, name) {
               if (typeof(name) != "character") {
diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R
index b51a1b19c..390ac689e 100644
--- a/R-package/R/predict.xgb.Booster.R
+++ b/R-package/R/predict.xgb.Booster.R
@@ -11,7 +11,8 @@ setClass("xgb.Booster")
 #'   value of sum of functions, when outputmargin=TRUE, the prediction is 
 #'   untransformed margin value. In logistic regression, outputmargin=T will
 #'   output value before logistic transformation.
-#' 
+#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
+#'   set it to be value bigger than 0. It will use all trees by default.
 #' @examples
 #' data(iris)
 #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
@@ -19,11 +20,18 @@ setClass("xgb.Booster")
 #' @export
 #' 
 setMethod("predict", signature = "xgb.Booster", 
-          definition = function(object, newdata, outputmargin = FALSE) {
+          definition = function(object, newdata, outputmargin = FALSE, ntreelimit = NULL) {
   if (class(newdata) != "xgb.DMatrix") {
     newdata <- xgb.DMatrix(newdata)
   }
-  ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), PACKAGE = "xgboost")
+  if (is.null(ntreelimit)) {
+    ntreelimit <- 0
+  } else {
+    if (ntreelimit < 1){
+      stop("predict: ntreelimit must be equal to or greater than 1")
+    }
+  }
+  ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), as.integer(ntreelimit), PACKAGE = "xgboost")
   return(ret)
 })
  
diff --git a/R-package/R/slice.xgb.DMatrix.R b/R-package/R/slice.xgb.DMatrix.R
index 0c56829fa..8a93efc4d 100644
--- a/R-package/R/slice.xgb.DMatrix.R
+++ b/R-package/R/slice.xgb.DMatrix.R
@@ -6,22 +6,25 @@ setClass('xgb.DMatrix')
 #' Get a new DMatrix containing the specified rows of
 #' orginal xgb.DMatrix object
 #' 
-#' @param object Object of class "xgb.DMatrix"
-#' @param idxset a integer vector of indices of rows needed
-#' 
 #' @examples
 #' data(iris)
 #' iris[,5] <- as.numeric(iris[,5])
 #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 #' dsub <- slice(dtrain, 1:3)
+#' @rdname slice
 #' @export
 #' 
 slice <- function(object, ...){
     UseMethod("slice")
 }
 
+#' @param object Object of class "xgb.DMatrix"
+#' @param idxset a integer vector of indices of rows needed
+#' @param ... other parameters
+#' @rdname slice
+#' @method slice xgb.DMatrix
 setMethod("slice", signature = "xgb.DMatrix", 
-          definition = function(object, idxset) {
+          definition = function(object, idxset, ...) {
               if (class(object) != "xgb.DMatrix") {
                   stop("slice: first argument dtrain must be xgb.DMatrix")
               }
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index b3fb39748..da602478a 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -1,4 +1,5 @@
 #' @importClassesFrom Matrix dgCMatrix dgeMatrix
+#' @import methods
 
 # depends on matrix
 .onLoad <- function(libname, pkgname) {
@@ -48,7 +49,6 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
     }
   }
   handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
-  .Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE = "xgboost")
   if (length(params) != 0) {
     for (i in 1:length(params)) {
       p <- params[i]
@@ -121,8 +121,8 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
       stop("xgb.eval: watch list can only contain xgb.DMatrix")
     }
   }
-  evnames <- list()
   if (length(watchlist) != 0) {
+    evnames <- list()
     for (i in 1:length(watchlist)) {
       w <- watchlist[i]
       if (length(names(w)) == 0) {
@@ -130,8 +130,10 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
       }
       evnames <- append(evnames, names(w))
     }
+    msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, 
+                 evnames, PACKAGE = "xgboost")
+  } else {
+    msg <- ""
   }
-  msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, 
-               evnames, PACKAGE = "xgboost")
   return(msg)
 } 
diff --git a/R-package/R/xgb.DMatrix.save.R b/R-package/R/xgb.DMatrix.save.R
index b108c2dad..4fcb71301 100644
--- a/R-package/R/xgb.DMatrix.save.R
+++ b/R-package/R/xgb.DMatrix.save.R
@@ -2,7 +2,7 @@
 #' 
 #' Save xgb.DMatrix object to binary file
 #' 
-#' @param model the model object.
+#' @param DMatrix the model object.
 #' @param fname the name of the binary file.
 #' 
 #' @examples
diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R
index 2c7813712..09406dc99 100644
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@@ -4,9 +4,12 @@
 #' 
 #' @param model the model object.
 #' @param fname the name of the binary file.
-#' @param fmap feature map file representing the type of feature, to make it
-#'        look nice, run demo/demo.R for result and demo/featmap.txt for example
-#'        Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model
+#' @param fmap feature map file representing the type of feature. 
+#'        Detailed description could be found at 
+#'        \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
+#'        Run inst/examples/demo.R for the result and inst/examples/featmap.txt 
+#'        for example Format.
+#'        
 #'
 #' @examples
 #' data(iris)
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index ceb87c1cb..58a575d03 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -15,7 +15,7 @@
 #' }
 #'
 #'   See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for 
-#'   further details. See also demo/demo.R for walkthrough example in R.
+#'   further details. See also inst/examples/demo.R for walkthrough example in R.
 #' @param dtrain takes an \code{xgb.DMatrix} as the input.
 #' @param nrounds the max number of iterations
 #' @param watchlist what information should be printed when \code{verbose=1} or
@@ -24,10 +24,11 @@
 #'    watchlist=list(validation1=mat1, validation2=mat2) to watch
 #'    the performance of each round's model on mat1 and mat2
 #'
-#' @param obj customized objective function. Given prediction and dtrain, 
-#'   return gradient and second order gradient.
-#' @param feval custimized evaluation function. Given prediction and dtrain,
-#'   return a \code{list(metric='metric-name', value='metric-value')}.
+#' @param obj customized objective function. Returns gradient and second order 
+#'   gradient with given prediction and dtrain, 
+#' @param feval custimized evaluation function. Returns 
+#'   \code{list(metric='metric-name', value='metric-value')} with given 
+#'   prediction and dtrain,
 #' @param ... other parameters to pass to \code{params}.
 #' 
 #' @details 
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index c6fc99980..6f4633fb8 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -19,7 +19,7 @@
 #' }
 #'
 #'   See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for 
-#'   further details. See also demo/demo.R for walkthrough example in R.
+#'   further details. See also inst/examples/demo.R for walkthrough example in R.
 #' @param nrounds the max number of iterations
 #' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print 
 #'   information of performance. If 2, xgboost will print information of both
diff --git a/R-package/README.md b/R-package/README.md
index 0fcf04981..60df71154 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -1,10 +1,21 @@
-This is subfolder for experimental version of R package.
+# R package for xgboost.
 
-Installation:
+## Installation
+
+For up-to-date version(which is recommended), please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
 
 ```r
 require(devtools)
 install_github('xgboost','tqchen',subdir='R-package')
 ```
 
-Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/demo/demo.R) for more details.
+For stable version on CRAN, please run
+
+```r
+install.packages('xgboost')
+```
+
+## Examples
+
+* Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for walk throughe example.
+* See also the [example scripts](https://github.com/tqchen/xgboost/tree/master/demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R) on this dataset.
diff --git a/R-package/demo/00Index b/R-package/demo/00Index
deleted file mode 100644
index 2ca4abd32..000000000
--- a/R-package/demo/00Index
+++ /dev/null
@@ -1 +0,0 @@
-demo R code for xgboost usages on agaricus data
diff --git a/R-package/demo/agaricus.txt.test b/R-package/inst/examples/agaricus.txt.test
similarity index 100%
rename from R-package/demo/agaricus.txt.test
rename to R-package/inst/examples/agaricus.txt.test
diff --git a/R-package/demo/agaricus.txt.train b/R-package/inst/examples/agaricus.txt.train
similarity index 100%
rename from R-package/demo/agaricus.txt.train
rename to R-package/inst/examples/agaricus.txt.train
diff --git a/R-package/demo/demo.R b/R-package/inst/examples/demo.R
similarity index 100%
rename from R-package/demo/demo.R
rename to R-package/inst/examples/demo.R
diff --git a/R-package/demo/featmap.txt b/R-package/inst/examples/featmap.txt
similarity index 100%
rename from R-package/demo/featmap.txt
rename to R-package/inst/examples/featmap.txt
diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd
index 4f63b5e92..05a25c152 100644
--- a/R-package/man/getinfo.Rd
+++ b/R-package/man/getinfo.Rd
@@ -1,14 +1,20 @@
 % Generated by roxygen2 (4.0.1): do not edit by hand
+\docType{methods}
 \name{getinfo}
 \alias{getinfo}
+\alias{getinfo,xgb.DMatrix-method}
 \title{Get information of an xgb.DMatrix object}
 \usage{
 getinfo(object, ...)
+
+\S4method{getinfo}{xgb.DMatrix}(object, name)
 }
 \arguments{
 \item{object}{Object of class "xgb.DMatrix"}
 
 \item{name}{the name of the field to get}
+
+\item{...}{other parameters}
 }
 \description{
 Get information of an xgb.DMatrix object
diff --git a/R-package/man/predict-xgb.Booster-method.Rd b/R-package/man/predict-xgb.Booster-method.Rd
index d43fd7362..d192997d2 100644
--- a/R-package/man/predict-xgb.Booster-method.Rd
+++ b/R-package/man/predict-xgb.Booster-method.Rd
@@ -4,7 +4,8 @@
 \alias{predict,xgb.Booster-method}
 \title{Predict method for eXtreme Gradient Boosting model}
 \usage{
-\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE)
+\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE,
+  ntreelimit = NULL)
 }
 \arguments{
 \item{object}{Object of class "xgb.Boost"}
@@ -13,9 +14,12 @@
 \code{xgb.DMatrix}.}
 
 \item{outputmargin}{whether the prediction should be shown in the original
-  value of sum of functions, when outputmargin=TRUE, the prediction is
-  untransformed margin value. In logistic regression, outputmargin=T will
-  output value before logistic transformation.}
+value of sum of functions, when outputmargin=TRUE, the prediction is
+untransformed margin value. In logistic regression, outputmargin=T will
+output value before logistic transformation.}
+
+\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
+set it to be value bigger than 0. It will use all trees by default.}
 }
 \description{
 Predicted values based on xgboost model object.
diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd
index 06d79f6c4..7acb14a32 100644
--- a/R-package/man/slice.Rd
+++ b/R-package/man/slice.Rd
@@ -1,15 +1,21 @@
 % Generated by roxygen2 (4.0.1): do not edit by hand
+\docType{methods}
 \name{slice}
 \alias{slice}
+\alias{slice,xgb.DMatrix-method}
 \title{Get a new DMatrix containing the specified rows of
 orginal xgb.DMatrix object}
 \usage{
 slice(object, ...)
+
+\S4method{slice}{xgb.DMatrix}(object, idxset, ...)
 }
 \arguments{
 \item{object}{Object of class "xgb.DMatrix"}
 
 \item{idxset}{a integer vector of indices of rows needed}
+
+\item{...}{other parameters}
 }
 \description{
 Get a new DMatrix containing the specified rows of
diff --git a/R-package/man/xgb.DMatrix.save.Rd b/R-package/man/xgb.DMatrix.save.Rd
index d4932fa42..e5e70501d 100644
--- a/R-package/man/xgb.DMatrix.save.Rd
+++ b/R-package/man/xgb.DMatrix.save.Rd
@@ -6,7 +6,7 @@
 xgb.DMatrix.save(DMatrix, fname)
 }
 \arguments{
-\item{model}{the model object.}
+\item{DMatrix}{the model object.}
 
 \item{fname}{the name of the binary file.}
 }
diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd
index 1e0360b31..4d6933811 100644
--- a/R-package/man/xgb.dump.Rd
+++ b/R-package/man/xgb.dump.Rd
@@ -10,9 +10,11 @@ xgb.dump(model, fname, fmap = "")
 
 \item{fname}{the name of the binary file.}
 
-\item{fmap}{feature map file representing the type of feature, to make it
-       look nice, run demo/demo.R for result and demo/featmap.txt for example
-       Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}
+\item{fmap}{feature map file representing the type of feature.
+       Detailed description could be found at
+       \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
+       Run inst/examples/demo.R for the result and inst/examples/featmap.txt
+       for example Format.}
 }
 \description{
 Save a xgboost model to text file. Could be parsed later.
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 1f29afa04..4da3b0013 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -20,7 +20,7 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
 }
 
   See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
-  further details. See also demo/demo.R for walkthrough example in R.}
+  further details. See also inst/examples/demo.R for walkthrough example in R.}
 
 \item{dtrain}{takes an \code{xgb.DMatrix} as the input.}
 
@@ -32,11 +32,12 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
    watchlist=list(validation1=mat1, validation2=mat2) to watch
    the performance of each round's model on mat1 and mat2}
 
-\item{obj}{customized objective function. Given prediction and dtrain,
-return gradient and second order gradient.}
+\item{obj}{customized objective function. Returns gradient and second order
+gradient with given prediction and dtrain,}
 
-\item{feval}{custimized evaluation function. Given prediction and dtrain,
-return a \code{list(metric='metric-name', value='metric-value')}.}
+\item{feval}{custimized evaluation function. Returns
+\code{list(metric='metric-name', value='metric-value')} with given
+prediction and dtrain,}
 
 \item{...}{other parameters to pass to \code{params}.}
 }
diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd
index a76ce5b3d..2b6c1a124 100644
--- a/R-package/man/xgboost.Rd
+++ b/R-package/man/xgboost.Rd
@@ -25,7 +25,7 @@ xgboost(data = NULL, label = NULL, params = list(), nrounds,
 }
 
   See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
-  further details. See also demo/demo.R for walkthrough example in R.}
+  further details. See also inst/examples/demo.R for walkthrough example in R.}
 
 \item{nrounds}{the max number of iterations}
 
diff --git a/R-package/src/Makevars b/R-package/src/Makevars
index 3539a913d..289f1a15a 100644
--- a/R-package/src/Makevars
+++ b/R-package/src/Makevars
@@ -1,32 +1,7 @@
 # package root
 PKGROOT=../../
 # _*_ mode: Makefile; _*_
-CXX=`R CMD config CXX`
-TCFLAGS=`R CMD config CFLAGS` 
-# expose these flags to R CMD SHLIB
-PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT)  $(SHLIB_OPENMP_CFLAGS)
-PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
-XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC  $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
-
-ifeq ($(no_omp),1)
-	PKG_CPPFLAGS += -DDISABLE_OPENMP 
-endif
-
-CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
-OBJECTS= xgboost_R.o $(CXXOBJ)
-
-.PHONY: all clean
-all: $(SHLIB)
-$(SHLIB): $(OBJECTS)
-
-xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
-xgboost_io.o: $(PKGROOT)/src/io/io.cpp 
-xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
-xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
-
-$(CXXOBJ) :
-	$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
-
-clean:
-	rm -rf *.so *.o *~ *.dll
-
+PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
+PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
+PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
+OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o
diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
index ae599fbf3..289f1a15a 100644
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -1,33 +1,7 @@
 # package root
 PKGROOT=../../
 # _*_ mode: Makefile; _*_
-CXX=`Rcmd config CXX`
-TCFLAGS=`Rcmd config CFLAGS` 
-# expose these flags to R CMD SHLIB
-PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT)  $(SHLIB_OPENMP_CFLAGS)
-PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
-XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC  $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
-PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
-
-ifeq ($(no_omp),1)
-	PKG_CPPFLAGS += -DDISABLE_OPENMP 
-endif
-
-CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
-OBJECTS= xgboost_R.o $(CXXOBJ)
-
-.PHONY: all clean
-all: $(SHLIB)
-$(SHLIB): $(OBJECTS)
-
-xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
-xgboost_io.o: $(PKGROOT)/src/io/io.cpp 
-xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
-xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
-
-$(CXXOBJ) :
-	$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
-
-clean:
-	rm -rf *.so *.o *~ *.dll
-
+PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
+PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
+PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
+OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o
diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp
index b358ef4ae..a7753dfa5 100644
--- a/R-package/src/xgboost_R.cpp
+++ b/R-package/src/xgboost_R.cpp
@@ -2,25 +2,55 @@
 #include <string>
 #include <utility>
 #include <cstring>
+#include <cstdio>
 #include "xgboost_R.h"
 #include "wrapper/xgboost_wrapper.h"
 #include "src/utils/utils.h"
 #include "src/utils/omp.h"
 #include "src/utils/matrix_csr.h"
-
+using namespace std;
 using namespace xgboost;
+
+extern "C" {
+  void XGBoostAssert_R(int exp, const char *fmt, ...);
+  void XGBoostCheck_R(int exp, const char *fmt, ...);
+  int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...);
+}
+
 // implements error handling
 namespace xgboost {
 namespace utils {
-void HandleAssertError(const char *msg) {
-  error("%s", msg);
-}
-void HandleCheckError(const char *msg) {
-  error("%s", msg);
+extern "C" {
+  void (*Printf)(const char *fmt, ...) = Rprintf;
+  int (*SPrintf)(char *buf, size_t size, const char *fmt, ...) = XGBoostSPrintf_R;
+  void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R;
+  void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R;
+  void (*Error)(const char *fmt, ...) = error;
 }
 }  // namespace utils
+
+namespace random {
+void Seed(unsigned seed) {
+  warning("parameter seed is ignored, please set random seed using set.seed");
+}
+double Uniform(void) {
+  return unif_rand();
+}
+double Normal(void) {
+  return norm_rand();
+}
+}  // namespace random
 }  // namespace xgboost
 
+// call before wrapper starts
+inline void _WrapperBegin(void) {
+  GetRNGstate();
+}
+// call after wrapper starts
+inline void _WrapperEnd(void) {
+  PutRNGstate();
+}
+
 extern "C" {
   void _DMatrixFinalizer(SEXP ext) {    
     if (R_ExternalPtrAddr(ext) == NULL) return;
@@ -28,14 +58,17 @@ extern "C" {
     R_ClearExternalPtr(ext);
   }
   SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
+    _WrapperBegin();
     void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
     SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
     UNPROTECT(1);
+    _WrapperEnd();
     return ret;
   }
   SEXP XGDMatrixCreateFromMat_R(SEXP mat, 
                                 SEXP missing) {
+    _WrapperBegin();
     SEXP dim = getAttrib(mat, R_DimSymbol);
     int nrow = INTEGER(dim)[0];
     int ncol = INTEGER(dim)[1];    
@@ -47,15 +80,17 @@ extern "C" {
         data[i * ncol +j] = din[i + nrow * j];
       }
     }
-    void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing));
+    void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
     SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
     UNPROTECT(1);
+    _WrapperEnd();
     return ret;    
   }
   SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
                                 SEXP indices,
                                 SEXP data) {
+    _WrapperBegin();
     const int *col_ptr = INTEGER(indptr);
     const int *row_index = INTEGER(indices);
     const double *col_data = REAL(data);
@@ -85,29 +120,36 @@ extern "C" {
       col_index[i] = csr_data[i].first;
       row_data[i] = csr_data[i].second;      
     }
-    void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata );
+    void *handle = XGDMatrixCreateFromCSR(BeginPtr(row_ptr), BeginPtr(col_index),
+                                          BeginPtr(row_data), row_ptr.size(), ndata );
     SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
     UNPROTECT(1);
+    _WrapperEnd();
     return ret;
   }
   SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
+    _WrapperBegin();
     int len = length(idxset);
     std::vector<int> idxvec(len);
     for (int i = 0; i < len; ++i) {
       idxvec[i] = INTEGER(idxset)[i] - 1;
     }
-    void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle),  &idxvec[0], len);
+    void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
     SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
     UNPROTECT(1);
+    _WrapperEnd();
     return ret;        
   }
   void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
+    _WrapperBegin();
     XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
                         CHAR(asChar(fname)), asInteger(silent));
+    _WrapperEnd();
   }
   void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
+    _WrapperBegin();
     int len = length(array);
     const char *name = CHAR(asChar(field));
     if (!strcmp("group", name)) {
@@ -116,7 +158,8 @@ extern "C" {
       for (int i = 0; i < len; ++i) {
         vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
       }
-      XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len);
+      XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
+      _WrapperEnd();
       return;
     }
     {
@@ -127,10 +170,12 @@ extern "C" {
       }
       XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), 
                             CHAR(asChar(field)),
-                            &vec[0], len);
+                            BeginPtr(vec), len);
     }
+    _WrapperEnd();
   }
   SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
+    _WrapperBegin();
     bst_ulong olen;
     const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
                                              CHAR(asChar(field)), &olen);
@@ -139,6 +184,7 @@ extern "C" {
       REAL(ret)[i] = res[i];
     }
     UNPROTECT(1);
+    _WrapperEnd();
     return ret;
   }
   // functions related to booster
@@ -148,28 +194,35 @@ extern "C" {
     R_ClearExternalPtr(ext);
   }
   SEXP XGBoosterCreate_R(SEXP dmats) {
+    _WrapperBegin();
     int len = length(dmats);
     std::vector<void*> dvec;
     for (int i = 0; i < len; ++i){
       dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
     }
-    void *handle = XGBoosterCreate(&dvec[0], dvec.size());
+    void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
     SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
     UNPROTECT(1);
+    _WrapperEnd();
     return ret;
   }
   void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
+    _WrapperBegin();
     XGBoosterSetParam(R_ExternalPtrAddr(handle),
                       CHAR(asChar(name)),
                       CHAR(asChar(val)));
+    _WrapperEnd();
   }
   void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
+    _WrapperBegin();
     XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
                            asInteger(iter),
                            R_ExternalPtrAddr(dtrain));
+    _WrapperEnd();
   }
   void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
+    _WrapperBegin();
     utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
     int len = length(grad);
     std::vector<float> tgrad(len), thess(len);
@@ -180,9 +233,11 @@ extern "C" {
     }
     XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
                           R_ExternalPtrAddr(dtrain),
-                          &tgrad[0], &thess[0], len);
+                          BeginPtr(tgrad), BeginPtr(thess), len);
+    _WrapperEnd();
   }
   SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
+    _WrapperBegin();
     utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
     int len = length(dmats);
     std::vector<void*> vec_dmats;
@@ -197,28 +252,37 @@ extern "C" {
     }
     return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
                                          asInteger(iter),
-                                         &vec_dmats[0], &vec_sptr[0], len));
+                                         BeginPtr(vec_dmats), BeginPtr(vec_sptr), len));
+    _WrapperEnd();
   }
-  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
+  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) {
+    _WrapperBegin();
     bst_ulong olen;
     const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
                                         R_ExternalPtrAddr(dmat),
                                         asInteger(output_margin),
+                                        asInteger(ntree_limit),
                                         &olen);
     SEXP ret = PROTECT(allocVector(REALSXP, olen));
     for (size_t i = 0; i < olen; ++i) {
       REAL(ret)[i] = res[i];
     }
     UNPROTECT(1);
+    _WrapperEnd();
     return ret;
   }
   void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
+    _WrapperBegin();
     XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
+    _WrapperEnd();
   }
   void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
+    _WrapperBegin();
     XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
+    _WrapperEnd();
   }
   void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
+    _WrapperBegin();
     bst_ulong olen;
     const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
                                           CHAR(asChar(fmap)),
@@ -229,5 +293,6 @@ extern "C" {
       fprintf(fo, "%s", res[i]);
     }
     fclose(fo);
+    _WrapperEnd();
   }
 }
diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h
index ecacdeced..c988ff1e5 100644
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -7,6 +7,7 @@
  */
 extern "C" {
 #include <Rinternals.h>
+#include <R_ext/Random.h>
 }
 
 extern "C" {
@@ -106,8 +107,9 @@ extern "C" {
    * \param handle handle
    * \param dmat data matrix
    * \param output_margin whether only output raw margin value
+   * \param ntree_limit limit number of trees used in prediction
    */
-  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
+  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit);
   /*!
    * \brief load model from existing file
    * \param handle handle
diff --git a/R-package/src/xgboost_assert.c b/R-package/src/xgboost_assert.c
new file mode 100644
index 000000000..20b789492
--- /dev/null
+++ b/R-package/src/xgboost_assert.c
@@ -0,0 +1,33 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <Rinternals.h>
+
+// implements error handling
+void XGBoostAssert_R(int exp, const char *fmt, ...) {
+  char buf[1024];
+  if (exp == 0) {
+    va_list args;  
+    va_start(args, fmt);
+    vsprintf(buf, fmt, args);
+    va_end(args);
+    error("AssertError:%s\n", buf);
+  }  
+}
+void XGBoostCheck_R(int exp, const char *fmt, ...) {
+  char buf[1024];
+  if (exp == 0) {
+    va_list args;  
+    va_start(args, fmt);
+    vsprintf(buf, fmt, args);
+    va_end(args);
+    error("%s\n", buf);
+  }
+}
+int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) {
+  int ret;
+  va_list args;  
+  va_start(args, fmt);
+  ret = vsnprintf(buf, size, fmt, args);
+  va_end(args);
+  return ret;
+}
diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw
index ed4447d57..19254abaf 100644
--- a/R-package/vignettes/xgboost.Rnw
+++ b/R-package/vignettes/xgboost.Rnw
@@ -173,7 +173,7 @@ objective function.
 We also have \verb@slice@ for row extraction. It is useful in 
 cross-validation.
 
-For a walkthrough demo, please see \verb@R-package/demo/demo.R@ for further 
+For a walkthrough demo, please see \verb@R-package/inst/examples/demo.R@ for further 
 details.
 
 \section{The Higgs Boson competition}
diff --git a/README.md b/README.md
index ba4b08bfd..38291b09d 100644
--- a/README.md
+++ b/README.md
@@ -35,11 +35,11 @@ Version
 ======
 * This version xgboost-0.3, the code has been refactored from 0.2x to be cleaner and more flexibility
 * This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
-  - This means the model and buffer file of previous version can not be loaded in xgboost-unity
+  - This means the model and buffer file of previous version can not be loaded in xgboost-3.0
 * For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22)
 * Change log in [CHANGES.md](CHANGES.md)
 
 XGBoost in Graphlab Create
 ======
-* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of big data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html
+* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of TeraBytes scale data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html
 * Nice blogpost by Jay Gu using GLC boosted tree to solve kaggle bike sharing challenge: http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand
diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp
index e9566f87e..a9d4c8d62 100644
--- a/src/gbm/gblinear-inl.hpp
+++ b/src/gbm/gblinear-inl.hpp
@@ -105,7 +105,10 @@ class GBLinear : public IGradBooster {
   virtual void Predict(IFMatrix *p_fmat,
                        int64_t buffer_offset,
                        const BoosterInfo &info,
-                       std::vector<float> *out_preds) {
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0) {
+    utils::Check(ntree_limit == 0,
+                 "GBLinear::Predict ntrees is only valid for gbtree predictor");
     std::vector<float> &preds = *out_preds;
     preds.resize(0);
     // start collecting the prediction
diff --git a/src/gbm/gbm.cpp b/src/gbm/gbm.cpp
index ae0e4af94..4713838e9 100644
--- a/src/gbm/gbm.cpp
+++ b/src/gbm/gbm.cpp
@@ -1,6 +1,7 @@
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #include <cstring>
+using namespace std;
 #include "./gbm.h"
 #include "./gbtree-inl.hpp"
 #include "./gblinear-inl.hpp"
diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h
index c548cab94..07dade4ac 100644
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -57,11 +57,14 @@ class IGradBooster {
    *  the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
    * \param info extra side information that may be needed for prediction
    * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means 
+   *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
    */
   virtual void Predict(IFMatrix *p_fmat,
                        int64_t buffer_offset,
                        const BoosterInfo &info,
-                       std::vector<float> *out_preds) = 0;
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0) = 0;
   /*!
    * \brief dump the model in text format
    * \param fmap feature map that may help give interpretations of feature
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index f66b49d00..8fea28727 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -105,7 +105,8 @@ class GBTree : public IGradBooster {
   virtual void Predict(IFMatrix *p_fmat,
                        int64_t buffer_offset,
                        const BoosterInfo &info,
-                       std::vector<float> *out_preds) {
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0) {
     int nthread;
     #pragma omp parallel
     {
@@ -137,7 +138,8 @@ class GBTree : public IGradBooster {
           this->Pred(batch[i],
                      buffer_offset < 0 ? -1 : buffer_offset + ridx,
                      gid, info.GetRoot(ridx), &feats,
-                     &preds[ridx * mparam.num_output_group + gid], stride);
+                     &preds[ridx * mparam.num_output_group + gid], stride, 
+                     ntree_limit);
         }
       }
     }
@@ -212,14 +214,16 @@ class GBTree : public IGradBooster {
                    int bst_group,
                    unsigned root_index,
                    tree::RegTree::FVec *p_feats,
-                   float *out_pred, size_t stride) {
+                   float *out_pred, size_t stride, unsigned ntree_limit) {
     size_t itop = 0;
     float  psum = 0.0f;
     // sum of leaf vector 
     std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
     const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
+    // number of valid trees
+    unsigned treeleft = ntree_limit == 0 ? std::numeric_limits<unsigned>::max() : ntree_limit;
     // load buffered results if any
-    if (bid >= 0) {
+    if (bid >= 0 && ntree_limit == 0) {
       itop = pred_counter[bid];
       psum = pred_buffer[bid];
       for (int i = 0; i < mparam.size_leaf_vector; ++i) {
@@ -235,12 +239,13 @@ class GBTree : public IGradBooster {
           for (int j = 0; j < mparam.size_leaf_vector; ++j) {
             vec_psum[j] += trees[i]->leafvec(tid)[j];
           }
+          if(--treeleft == 0) break;
         }
       }
       p_feats->Drop(inst);
     }
     // updated the buffered results
-    if (bid >= 0) {
+    if (bid >= 0 && ntree_limit == 0) {
       pred_counter[bid] = static_cast<unsigned>(trees.size());
       pred_buffer[bid] = psum;
       for (int i = 0; i < mparam.size_leaf_vector; ++i) {
diff --git a/src/io/io.cpp b/src/io/io.cpp
index e413b2799..c2d9e26d3 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -1,6 +1,7 @@
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #include <string>
+using namespace std;
 #include "./io.h"
 #include "../utils/io.h"
 #include "../utils/utils.h"
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index bd18f0476..59d5093d7 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -54,8 +54,10 @@ class DMatrixSimple : public DataMatrix {
       for (size_t i = 0; i < batch.size; ++i) {
         RowBatch::Inst inst = batch[i];
         row_data_.resize(row_data_.size() + inst.length);
-        memcpy(&row_data_[row_ptr_.back()], inst.data,
-               sizeof(RowBatch::Entry) * inst.length);
+        if (inst.length != 0) {
+          memcpy(&row_data_[row_ptr_.back()], inst.data,
+                 sizeof(RowBatch::Entry) * inst.length);
+        }
         row_ptr_.push_back(row_ptr_.back() + inst.length);
       }
     }
@@ -104,10 +106,10 @@ class DMatrixSimple : public DataMatrix {
     this->AddRow(feats);
 
     if (!silent) {
-      printf("%lux%lu matrix with %lu entries is loaded from %s\n",
-             static_cast<unsigned long>(info.num_row()),
-             static_cast<unsigned long>(info.num_col()),
-             static_cast<unsigned long>(row_data_.size()), fname);
+      utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
+                    static_cast<unsigned long>(info.num_row()),
+                    static_cast<unsigned long>(info.num_col()),
+                    static_cast<unsigned long>(row_data_.size()), fname);
     }
     fclose(file);
     // try to load in additional file
@@ -147,26 +149,26 @@ class DMatrixSimple : public DataMatrix {
    * \param fname file name, used to print message
    */
   inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
-    int magic;
-    utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
-    utils::Check(magic == kMagic, "invalid format,magic number mismatch");
+    int tmagic;
+    utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
+    utils::Check(tmagic == kMagic, "invalid format,magic number mismatch");
 
     info.LoadBinary(fs);
     FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
     fmat_->LoadColAccess(fs);
 
     if (!silent) {
-      printf("%lux%lu matrix with %lu entries is loaded",
-             static_cast<unsigned long>(info.num_row()),
-             static_cast<unsigned long>(info.num_col()),
-             static_cast<unsigned long>(row_data_.size()));
+      utils::Printf("%lux%lu matrix with %lu entries is loaded",
+                    static_cast<unsigned long>(info.num_row()),
+                    static_cast<unsigned long>(info.num_col()),
+                    static_cast<unsigned long>(row_data_.size()));
       if (fname != NULL) {
-        printf(" from %s\n", fname);
+        utils::Printf(" from %s\n", fname);
       } else {
-        printf("\n");
+        utils::Printf("\n");
       }
       if (info.group_ptr.size() != 0) {
-        printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
+        utils::Printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
       }
     }
   }
@@ -177,8 +179,8 @@ class DMatrixSimple : public DataMatrix {
    */
   inline void SaveBinary(const char* fname, bool silent = false) const {
     utils::FileStream fs(utils::FopenCheck(fname, "wb"));
-    int magic = kMagic;
-    fs.Write(&magic, sizeof(magic));
+    int tmagic = kMagic;
+    fs.Write(&tmagic, sizeof(tmagic));
 
     info.SaveBinary(fs);
     FMatrixS::SaveBinary(fs, row_ptr_, row_data_);
@@ -186,13 +188,13 @@ class DMatrixSimple : public DataMatrix {
     fs.Close();
 
     if (!silent) {
-      printf("%lux%lu matrix with %lu entries is saved to %s\n",
-             static_cast<unsigned long>(info.num_row()),
-             static_cast<unsigned long>(info.num_col()),
-             static_cast<unsigned long>(row_data_.size()), fname);
+      utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
+                    static_cast<unsigned long>(info.num_row()),
+                    static_cast<unsigned long>(info.num_col()),
+                    static_cast<unsigned long>(row_data_.size()), fname);
       if (info.group_ptr.size() != 0) {
-        printf("data contains %u groups\n",
-               static_cast<unsigned>(info.group_ptr.size()-1));
+        utils::Printf("data contains %u groups\n",
+                      static_cast<unsigned>(info.group_ptr.size()-1));
       }
     }
   }
@@ -244,8 +246,8 @@ class DMatrixSimple : public DataMatrix {
       at_first_ = false;
       batch_.size = parent_->row_ptr_.size() - 1;
       batch_.base_rowid = 0;
-      batch_.ind_ptr = &parent_->row_ptr_[0];
-      batch_.data_ptr = &parent_->row_data_[0];
+      batch_.ind_ptr = BeginPtr(parent_->row_ptr_);
+      batch_.data_ptr = BeginPtr(parent_->row_data_);
       return true;
     }
     virtual const RowBatch &Value(void) const {
diff --git a/src/io/simple_fmatrix-inl.hpp b/src/io/simple_fmatrix-inl.hpp
index f099eb1a9..997268ff3 100644
--- a/src/io/simple_fmatrix-inl.hpp
+++ b/src/io/simple_fmatrix-inl.hpp
@@ -110,9 +110,9 @@ class FMatrixS : public IFMatrix{
                                 const std::vector<RowBatch::Entry> &data) {
     size_t nrow = ptr.size() - 1;
     fo.Write(&nrow, sizeof(size_t));
-    fo.Write(&ptr[0], ptr.size() * sizeof(size_t));
+    fo.Write(BeginPtr(ptr), ptr.size() * sizeof(size_t));
     if (data.size() != 0) {
-      fo.Write(&data[0], data.size() * sizeof(RowBatch::Entry));
+      fo.Write(BeginPtr(data), data.size() * sizeof(RowBatch::Entry));
     }
   }
   /*!
@@ -127,11 +127,11 @@ class FMatrixS : public IFMatrix{
     size_t nrow;
     utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format");
     out_ptr->resize(nrow + 1);
-    utils::Check(fi.Read(&(*out_ptr)[0], out_ptr->size() * sizeof(size_t)) != 0,
+    utils::Check(fi.Read(BeginPtr(*out_ptr), out_ptr->size() * sizeof(size_t)) != 0,
                   "invalid input file format");
     out_data->resize(out_ptr->back());
     if (out_data->size() != 0) {
-      utils::Assert(fi.Read(&(*out_data)[0], out_data->size() * sizeof(RowBatch::Entry)) != 0,
+      utils::Assert(fi.Read(BeginPtr(*out_data), out_data->size() * sizeof(RowBatch::Entry)) != 0,
                     "invalid input file format");
     }
   }
@@ -213,8 +213,8 @@ class FMatrixS : public IFMatrix{
         col_data_[i] = SparseBatch::Inst(&data[0] + ptr[ridx],
                                          static_cast<bst_uint>(ptr[ridx+1] - ptr[ridx]));
       }
-      batch_.col_index = &col_index_[0];
-      batch_.col_data = &col_data_[0];
+      batch_.col_index = BeginPtr(col_index_);
+      batch_.col_data = BeginPtr(col_data_);
       this->BeforeFirst();
     }
     // data content
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index 542b6f6f5..bef84900a 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -98,8 +98,8 @@ struct MetaInfo {
       group_ptr.push_back(group_ptr.back()+nline);
     }
     if (!silent) {
-      printf("%u groups are loaded from %s\n",
-			  static_cast<unsigned>(group_ptr.size()-1), fname);
+      utils::Printf("%u groups are loaded from %s\n",
+                    static_cast<unsigned>(group_ptr.size()-1), fname);
     }
     fclose(fi);
     return true;
@@ -125,15 +125,15 @@ struct MetaInfo {
   }
   // try to load weight information from file, if exists
   inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
-    std::vector<float> &weights = this->GetFloatInfo(field);
+    std::vector<float> &data = this->GetFloatInfo(field);
     FILE *fi = fopen64(fname, "r");
     if (fi == NULL) return false;
     float wt;
     while (fscanf(fi, "%f", &wt) == 1) {
-      weights.push_back(wt);
+      data.push_back(wt);
     }
     if (!silent) {
-      printf("loading %s from %s\n", field, fname);
+      utils::Printf("loading %s from %s\n", field, fname);
     }
     fclose(fi);
     return true;
diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index 3058cf06b..52877e17b 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -8,8 +8,8 @@
 #include <vector>
 #include <utility>
 #include <string>
-#include <climits>
 #include <cmath>
+#include <climits>
 #include <algorithm>
 #include "./evaluation.h"
 #include "./helper_utils.h"
@@ -183,7 +183,7 @@ struct EvalAMS : public IEvaluator {
       }
     }
     if (ntop == ndata) {
-      fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
+      utils::Printf("\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
       return static_cast<float>(tams);
     } else {
       return static_cast<float>(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp)));
diff --git a/src/learner/evaluation.h b/src/learner/evaluation.h
index 90f4a5839..ec37e1f4a 100644
--- a/src/learner/evaluation.h
+++ b/src/learner/evaluation.h
@@ -73,7 +73,7 @@ class EvalSet{
     for (size_t i = 0; i < evals_.size(); ++i) {
       float res = evals_[i]->Eval(preds, info);
       char tmp[1024];
-      snprintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
+      utils::SPrintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
       result += tmp;
     }
     return result;
diff --git a/src/learner/helper_utils.h b/src/learner/helper_utils.h
index e2f8a3574..aa1e66bbc 100644
--- a/src/learner/helper_utils.h
+++ b/src/learner/helper_utils.h
@@ -7,6 +7,7 @@
  */
 #include <utility>
 #include <vector>
+#include <cmath>
 #include <algorithm>
 namespace xgboost {
 namespace learner {
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 8e7bce0a8..5d7c9d06a 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -63,14 +63,14 @@ class BoostLearner {
     }
     char str_temp[25];
     if (num_feature > mparam.num_feature) {
-      snprintf(str_temp, sizeof(str_temp), "%u", num_feature);
+      utils::SPrintf(str_temp, sizeof(str_temp), "%u", num_feature);
       this->SetParam("bst:num_feature", str_temp);
     }
-    snprintf(str_temp, sizeof(str_temp), "%lu",
+    utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
 			 static_cast<unsigned long>(buffer_size));
     this->SetParam("num_pbuffer", str_temp);
     if (!silent) {
-      printf("buffer_size=%ld\n", static_cast<long>(buffer_size));
+      utils::Printf("buffer_size=%ld\n", static_cast<long>(buffer_size));
     }
   }
   /*!
@@ -183,7 +183,7 @@ class BoostLearner {
                                  const std::vector<std::string> &evname) {
     std::string res;
     char tmp[256];
-    snprintf(tmp, sizeof(tmp), "[%d]", iter);
+    utils::SPrintf(tmp, sizeof(tmp), "[%d]", iter);
     res = tmp;
     for (size_t i = 0; i < evals.size(); ++i) {
       this->PredictRaw(*evals[i], &preds_);
@@ -212,11 +212,14 @@ class BoostLearner {
    * \param data input data
    * \param output_margin whether to only predict margin value instead of transformed prediction
    * \param out_preds output vector that stores the prediction
+   * \param ntree_limit limit number of trees used for boosted tree
+   *   predictor, when it equals 0, this means we are using all the trees
    */
   inline void Predict(const DMatrix &data,
                       bool output_margin,
-                      std::vector<float> *out_preds) const {
-    this->PredictRaw(data, out_preds);
+                      std::vector<float> *out_preds,
+                      unsigned ntree_limit = 0) const {
+    this->PredictRaw(data, out_preds, ntree_limit);
     if (!output_margin) {
       obj_->PredTransform(out_preds);
     }
@@ -246,11 +249,14 @@ class BoostLearner {
    * \brief get un-transformed prediction
    * \param data training data matrix
    * \param out_preds output vector that stores the prediction
+   * \param ntree_limit limit number of trees used for boosted tree
+   *   predictor, when it equals 0, this means we are using all the trees   
    */
   inline void PredictRaw(const DMatrix &data,
-                         std::vector<float> *out_preds) const {
+                         std::vector<float> *out_preds,
+                         unsigned ntree_limit = 0) const {
     gbm_->Predict(data.fmat(), this->FindBufferOffset(data),
-                  data.info.info, out_preds);
+                  data.info.info, out_preds, ntree_limit);
     // add base margin
     std::vector<float> &preds = *out_preds;
     const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index 9e338a6b2..576549eac 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -6,9 +6,9 @@
  * \author Tianqi Chen, Kailong Chen
  */
 #include <vector>
-#include <cmath>
 #include <algorithm>
 #include <utility>
+#include <cmath>
 #include <functional>
 #include "../data.h"
 #include "./objective.h"
@@ -37,7 +37,7 @@ struct LossType {
       case kLogisticRaw:
       case kLinearSquare: return x;
       case kLogisticClassify:
-      case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
+      case kLogisticNeglik: return 1.0f / (1.0f + std::exp(-x));
       default: utils::Error("unknown loss_type"); return 0.0f;
     }
   }
@@ -50,7 +50,7 @@ struct LossType {
   inline float FirstOrderGradient(float predt, float label) const {
     switch (loss_type) {
       case kLinearSquare: return predt - label;
-      case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
+      case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
       case kLogisticClassify:
       case kLogisticNeglik: return predt - label;
       default: utils::Error("unknown loss_type"); return 0.0f;
@@ -65,7 +65,7 @@ struct LossType {
   inline float SecondOrderGradient(float predt, float label) const {
     switch (loss_type) {
       case kLinearSquare: return 1.0f;
-      case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
+      case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
       case kLogisticClassify:
       case kLogisticNeglik: return predt * (1 - predt);
       default: utils::Error("unknown loss_type"); return 0.0f;
@@ -80,7 +80,7 @@ struct LossType {
         loss_type == kLogisticNeglik ) {
       utils::Check(base_score > 0.0f && base_score < 1.0f,
                    "base_score must be in (0,1) for logistic loss");
-      base_score = -logf(1.0f / base_score - 1.0f);
+      base_score = -std::log(1.0f / base_score - 1.0f);
     }
     return base_score;
   }
@@ -419,8 +419,8 @@ class LambdaRankObjNDCG : public LambdaRankObj {
       for (size_t i = 0; i < pairs.size(); ++i) {
         unsigned pos_idx = pairs[i].pos_index;
         unsigned neg_idx = pairs[i].neg_index;
-        float pos_loginv = 1.0f / logf(pos_idx + 2.0f);
-        float neg_loginv = 1.0f / logf(neg_idx + 2.0f);
+        float pos_loginv = 1.0f / std::log(pos_idx + 2.0f);
+        float neg_loginv = 1.0f / std::log(neg_idx + 2.0f);
         int pos_label = static_cast<int>(sorted_list[pos_idx].label);
         int neg_label = static_cast<int>(sorted_list[neg_idx].label);
         float original =
@@ -438,7 +438,7 @@ class LambdaRankObjNDCG : public LambdaRankObj {
     for (size_t i = 0; i < labels.size(); ++i) {
       const unsigned rel = static_cast<unsigned>(labels[i]);
       if (rel != 0) {
-        sumdcg += ((1 << rel) - 1) / logf(static_cast<float>(i + 2));
+        sumdcg += ((1 << rel) - 1) / std::log(static_cast<float>(i + 2));
       }
     }
     return static_cast<float>(sumdcg);
diff --git a/src/tree/param.h b/src/tree/param.h
index 92bc1c990..4b0f154f8 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -302,11 +302,11 @@ struct SplitEntry{
    * \param loss_chg the loss reduction get through the split
    * \param split_index the feature index where the split is on 
    */
-  inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const {
+  inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
     if (this->split_index() <= split_index) {
-      return loss_chg > this->loss_chg;
+      return new_loss_chg > this->loss_chg;
     } else {
-      return !(this->loss_chg > loss_chg);
+      return !(this->loss_chg > new_loss_chg);
     }
   }
   /*! 
@@ -332,13 +332,13 @@ struct SplitEntry{
    * \param default_left whether the missing value goes to left
    * \return whether the proposed split is better and can replace current split
    */
-  inline bool Update(bst_float loss_chg, unsigned split_index,
-                     float split_value, bool default_left) {
-    if (this->NeedReplace(loss_chg, split_index)) {
-      this->loss_chg = loss_chg;
+  inline bool Update(bst_float new_loss_chg, unsigned split_index,
+                     float new_split_value, bool default_left) {
+    if (this->NeedReplace(new_loss_chg, split_index)) {
+      this->loss_chg = new_loss_chg;
       if (default_left) split_index |= (1U << 31);
       this->sindex = split_index;
-      this->split_value = split_value;
+      this->split_value = new_split_value;
       return true;
     } else {
       return false;
diff --git a/src/tree/updater.cpp b/src/tree/updater.cpp
index 25bee7922..09b63eb49 100644
--- a/src/tree/updater.cpp
+++ b/src/tree/updater.cpp
@@ -1,6 +1,7 @@
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #include <cstring>
+using namespace std;
 #include "./updater.h"
 #include "./updater_prune-inl.hpp"
 #include "./updater_refresh-inl.hpp"
diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp
index e3d5be11a..98fdf5ee4 100644
--- a/src/tree/updater_prune-inl.hpp
+++ b/src/tree/updater_prune-inl.hpp
@@ -63,8 +63,8 @@ class TreePruner: public IUpdater {
       }
     }
     if (silent == 0) {
-      printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
-             tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
+      utils::Printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
+                    tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
     }
   }
 
diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp
index d184dcb39..a37630333 100644
--- a/src/tree/updater_refresh-inl.hpp
+++ b/src/tree/updater_refresh-inl.hpp
@@ -26,14 +26,14 @@ class TreeRefresher: public IUpdater {
   virtual void Update(const std::vector<bst_gpair> &gpair,
                       IFMatrix *p_fmat,
                       const BoosterInfo &info,
-                      const std::vector<RegTree*> &trees) {
+                      const std::vector<RegTree*> &trees) {    
     if (trees.size() == 0) return;
     // number of threads
-    int nthread;
     // thread temporal space
     std::vector< std::vector<TStats> > stemp;
     std::vector<RegTree::FVec> fvec_temp;
     // setup temp space for each thread
+    int nthread;
     #pragma omp parallel
     {
       nthread = omp_get_num_threads();
@@ -127,8 +127,6 @@ class TreeRefresher: public IUpdater {
       this->Refresh(gstats, tree[nid].cright(), p_tree);
     }
   }
-  // number of thread in the data
-  int nthread;
   // training parameter
   TrainParam param;
 };
diff --git a/src/utils/io.h b/src/utils/io.h
index 276dd7312..d98b3e4dc 100644
--- a/src/utils/io.h
+++ b/src/utils/io.h
@@ -100,12 +100,10 @@ class ISeekStream: public IStream {
 /*! \brief implementation of file i/o stream */
 class FileStream : public ISeekStream {
  public:
+  explicit FileStream(FILE *fp) : fp(fp) {}
   explicit FileStream(void) {
     this->fp = NULL;
   }
-  explicit FileStream(FILE *fp) {
-    this->fp = fp;
-  }
   virtual size_t Read(void *ptr, size_t size) {
     return fread(ptr, size, 1, fp);
   }
diff --git a/src/utils/matrix_csr.h b/src/utils/matrix_csr.h
index 44a3b8818..b2768b2ea 100644
--- a/src/utils/matrix_csr.h
+++ b/src/utils/matrix_csr.h
@@ -163,7 +163,7 @@ struct SparseCSRFileBuilder {
     fo->Write(rptr);
     // setup buffer space
     buffer_rptr.resize(rptr.size());
-    buffer.reserve(buffer_size);
+    buffer_temp.reserve(buffer_size);
     buffer_data.resize(buffer_size);
     saved_offset.clear();
     saved_offset.resize(rptr.size() - 1, 0);
diff --git a/src/utils/omp.h b/src/utils/omp.h
index 0380ebd67..5eb5612e0 100644
--- a/src/utils/omp.h
+++ b/src/utils/omp.h
@@ -9,13 +9,8 @@
 #include <omp.h>
 #else
 #ifndef DISABLE_OPENMP
-#ifndef _MSC_VER
-#warning "OpenMP is not available, compile to single thread code."\
-		 "You may want to ungrade your compiler to enable OpenMP support,"\
-		 "to get benefit of multi-threading."
-#else
-// TODO add warning for msvc
-#endif
+// use pragma message instead of warning
+#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading")
 #endif
 inline int omp_get_thread_num() { return 0; }
 inline int omp_get_num_threads() { return 1; }
diff --git a/src/utils/random.h b/src/utils/random.h
index bf8b04d9d..57e1f243d 100644
--- a/src/utils/random.h
+++ b/src/utils/random.h
@@ -16,30 +16,21 @@
 /*! namespace of PRNG */
 namespace xgboost {
 namespace random {
-
+#ifndef XGBOOST_CUSTOMIZE_PRNG_
 /*! \brief seed the PRNG */
-inline void Seed(uint32_t seed) {
+inline void Seed(unsigned seed) {
   srand(seed);
 }
-/*! \brief return a real number uniform in [0,1) */
-inline double NextDouble(void) {
+/*! \brief basic function, uniform */
+inline double Uniform(void) {
   return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
 }
 /*! \brief return a real numer uniform in (0,1) */
 inline double NextDouble2(void) {
   return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
 }
-
-/*! \brief return a random number */
-inline uint32_t NextUInt32(void) {
-  return (uint32_t)rand();
-}
-/*! \brief return a random number in n */
-inline uint32_t NextUInt32(uint32_t n) {
-  return (uint32_t)floor(NextDouble() * n);
-}
 /*! \brief return  x~N(0,1) */
-inline double SampleNormal() {
+inline double Normal(void) {
   double x, y, s;
   do {
     x = 2 * NextDouble2() - 1.0;
@@ -49,22 +40,24 @@ inline double SampleNormal() {
 
   return x * sqrt(-2.0 * log(s) / s);
 }
+#else
+// include declarations, to be implemented
+void Seed(unsigned seed);
+double Uniform(void);
+double Normal(void);
+#endif
 
-/*! \brief return iid x,y ~N(0,1) */
-inline void SampleNormal2D(double &xx, double &yy) {
-  double x, y, s;
-  do {
-    x = 2 * NextDouble2() - 1.0;
-    y = 2 * NextDouble2() - 1.0;
-    s = x*x + y*y;
-  } while (s >= 1.0 || s == 0.0);
-  double t = sqrt(-2.0 * log(s) / s);
-  xx = x * t;
-  yy = y * t;
+/*! \brief return a real number uniform in [0,1) */
+inline double NextDouble(void) {
+  return Uniform();
+}
+/*! \brief return a random number in n */
+inline uint32_t NextUInt32(uint32_t n) {
+  return (uint32_t)floor(NextDouble() * n);
 }
 /*! \brief return  x~N(mu,sigma^2) */
 inline double SampleNormal(double mu, double sigma) {
-  return SampleNormal() * sigma + mu;
+  return Normal() * sigma + mu;
 }
 /*! \brief  return 1 with probability p, coin flip */
 inline int SampleBinary(double p) {
@@ -90,7 +83,7 @@ struct Random{
   inline void Seed(unsigned sd) {
 	 this->rseed = sd;
 #if defined(_MSC_VER)||defined(_WIN32)
-    srand(rseed);
+     ::xgboost::random::Seed(sd);
 #endif
   }
   /*! \brief return a real number uniform in [0,1) */
@@ -98,8 +91,8 @@ struct Random{
 	// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
 	// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
 	// todo, replace with another PRNG
-#if defined(_MSC_VER)||defined(_WIN32)
-    return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX) + 1.0);
+#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_)
+    return Uniform();
 #else
     return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
 #endif
diff --git a/src/utils/utils.h b/src/utils/utils.h
index 501895224..5c3342d8e 100644
--- a/src/utils/utils.h
+++ b/src/utils/utils.h
@@ -7,11 +7,18 @@
  */
 #define _CRT_SECURE_NO_WARNINGS
 #include <cstdio>
-#include <cstdarg>
 #include <string>
 #include <cstdlib>
+#include <vector>
+
+#ifndef XGBOOST_STRICT_CXX98_
+#include <cstdarg>
+#endif
+
+#if !defined(__GNUC__)
+#define fopen64 std::fopen
+#endif
 #ifdef _MSC_VER
-#define fopen64 fopen
 // NOTE: sprintf_s is not equivalent to snprintf, 
 // they are equivalent when success, which is sufficient for our case
 #define snprintf sprintf_s
@@ -19,16 +26,15 @@
 #else
 #ifdef _FILE_OFFSET_BITS
 #if _FILE_OFFSET_BITS == 32
-#warning "FILE OFFSET BITS defined to be 32 bit"
+#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit")
 #endif
 #endif
 
-#ifdef __APPLE__
+#ifdef __APPLE__ 
 #define off64_t off_t
-#define fopen64 fopen
+#define fopen64 std::fopen
 #endif
 
-#define _FILE_OFFSET_BITS 64
 extern "C" {
 #include <sys/types.h>
 }
@@ -47,10 +53,11 @@ typedef long int64_t;
 namespace xgboost {
 /*! \brief namespace for helper utils of the project */
 namespace utils {
-/*! \brief error message buffer length */
-const int kErrorBuffer = 1 << 12;
 
-#ifndef XGBOOST_CUSTOMIZE_ERROR_
+/*! \brief error message buffer length */
+const int kPrintBuffer = 1 << 12;
+
+#ifndef XGBOOST_CUSTOMIZE_MSG_
 /*! 
  * \brief handling of Assert error, caused by in-apropriate input
  * \param msg error message 
@@ -67,19 +74,50 @@ inline void HandleCheckError(const char *msg) {
   fprintf(stderr, "%s\n", msg);
   exit(-1);
 }
+inline void HandlePrint(const char *msg) {
+  printf("%s", msg);
+}
 #else
+#ifndef XGBOOST_STRICT_CXX98_
 // include declarations, some one must implement this
 void HandleAssertError(const char *msg);
 void HandleCheckError(const char *msg);
+void HandlePrint(const char *msg);
 #endif
+#endif
+#ifdef XGBOOST_STRICT_CXX98_
+// these function pointers are to be assigned 
+extern "C" void (*Printf)(const char *fmt, ...);
+extern "C" int (*SPrintf)(char *buf, size_t size, const char *fmt, ...);
+extern "C" void (*Assert)(int exp, const char *fmt, ...);
+extern "C" void (*Check)(int exp, const char *fmt, ...);
+extern "C" void (*Error)(const char *fmt, ...);
+#else
+/*! \brief printf, print message to the console */
+inline void Printf(const char *fmt, ...) {  
+  std::string msg(kPrintBuffer, '\0');
+  va_list args;
+  va_start(args, fmt);
+  vsnprintf(&msg[0], kPrintBuffer, fmt, args);
+  va_end(args);
+  HandlePrint(msg.c_str());
+}
+/*! \brief portable version of snprintf */
+inline int SPrintf(char *buf, size_t size, const char *fmt, ...) {  
+  va_list args;
+  va_start(args, fmt);
+  int ret = vsnprintf(buf, size, fmt, args);
+  va_end(args);
+  return ret;
+}
 
 /*! \brief assert an condition is true, use this to handle debug information */
 inline void Assert(bool exp, const char *fmt, ...) {
   if (!exp) {
-    std::string msg(kErrorBuffer, '\0');
+    std::string msg(kPrintBuffer, '\0');
     va_list args;
     va_start(args, fmt);
-    vsnprintf(&msg[0], kErrorBuffer, fmt, args);
+    vsnprintf(&msg[0], kPrintBuffer, fmt, args);
     va_end(args);
     HandleAssertError(msg.c_str());
   }
@@ -88,10 +126,10 @@ inline void Assert(bool exp, const char *fmt, ...) {
 /*!\brief same as assert, but this is intended to be used as message for user*/
 inline void Check(bool exp, const char *fmt, ...) {
   if (!exp) {
-    std::string msg(kErrorBuffer, '\0');
+    std::string msg(kPrintBuffer, '\0');
     va_list args;
     va_start(args, fmt);
-    vsnprintf(&msg[0], kErrorBuffer, fmt, args);
+    vsnprintf(&msg[0], kPrintBuffer, fmt, args);
     va_end(args);
     HandleCheckError(msg.c_str());
   }
@@ -100,14 +138,15 @@ inline void Check(bool exp, const char *fmt, ...) {
 /*! \brief report error message, same as check */
 inline void Error(const char *fmt, ...) {
   {
-    std::string msg(kErrorBuffer, '\0');
+    std::string msg(kPrintBuffer, '\0');
     va_list args;
     va_start(args, fmt);
-    vsnprintf(&msg[0], kErrorBuffer, fmt, args);
+    vsnprintf(&msg[0], kPrintBuffer, fmt, args);
     va_end(args);
     HandleCheckError(msg.c_str());
   }
 }
+#endif
 
 /*! \brief replace fopen, report error when the file open fails */
 inline FILE *FopenCheck(const char *fname, const char *flag) {
@@ -115,7 +154,25 @@ inline FILE *FopenCheck(const char *fname, const char *flag) {
   Check(fp != NULL, "can not open file \"%s\"\n", fname);
   return fp;
 }
-
-}  // namespace utils
+} // namespace utils
+// easy utils that can be directly acessed in xgboost
+/*! \brief get the beginning address of a vector */
+template<typename T>
+inline T *BeginPtr(std::vector<T> &vec) {
+  if (vec.size() == 0) {
+    return NULL;
+  } else {
+    return &vec[0];
+  }
+}
+/*! \brief get the beginning address of a vector */
+template<typename T>
+inline const T *BeginPtr(const std::vector<T> &vec) {
+  if (vec.size() == 0) {
+    return NULL;
+  } else {
+    return &vec[0];
+  }
+}
 }  // namespace xgboost
 #endif  // XGBOOST_UTILS_UTILS_H_
diff --git a/src/xgboost_main.cpp b/src/xgboost_main.cpp
index 13acacda2..75544dd0e 100644
--- a/src/xgboost_main.cpp
+++ b/src/xgboost_main.cpp
@@ -50,6 +50,7 @@ class BoostLearnTask{
     if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
     if (!strcmp("num_round", name)) num_round = atoi(val);
     if (!strcmp("pred_margin", name)) pred_margin = atoi(val);
+    if (!strcmp("ntree_limit", name)) ntree_limit = atoi(val);
     if (!strcmp("save_period", name)) save_period = atoi(val);
     if (!strcmp("eval_train", name)) eval_train = atoi(val);
     if (!strcmp("task", name)) task = val;
@@ -79,6 +80,7 @@ class BoostLearnTask{
     save_period = 0;
     eval_train = 0;
     pred_margin = 0;
+    ntree_limit = 0;
     dump_model_stats = 0;
     task = "train";
     model_in = "NULL";
@@ -186,7 +188,7 @@ class BoostLearnTask{
   inline void TaskPred(void) {
     std::vector<float> preds;
     if (!silent) printf("start prediction...\n");
-    learner.Predict(*data, pred_margin != 0, &preds);
+    learner.Predict(*data, pred_margin != 0, &preds, ntree_limit);
     if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
     FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
     for (size_t i = 0; i < preds.size(); i++) {
@@ -217,6 +219,8 @@ class BoostLearnTask{
   std::string task;
   /*! \brief name of predict file */
   std::string name_pred;
+  /*!\brief limit number of trees in prediction */
+  int ntree_limit;
   /*!\brief whether to directly output margin value */
   int pred_margin;
   /*! \brief whether dump statistics along with model */
diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index e4338e0cd..b7bc0ab66 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -365,7 +365,7 @@ class Booster:
         return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
     def eval(self, mat, name = 'eval', it = 0):
         return self.eval_set( [(mat,name)], it)
-    def predict(self, data, output_margin=False):
+    def predict(self, data, output_margin=False, ntree_limit=0):
         """
         predict with data
             Args:
@@ -373,12 +373,14 @@ class Booster:
                       the dmatrix storing the input
                 output_margin: bool
                                whether output raw margin value that is untransformed
+                               
+                ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees
             Returns:
                 numpy array of prediction
         """
         length = ctypes.c_ulong()
         preds = xglib.XGBoosterPredict(self.handle, data.handle,
-                                       int(output_margin), ctypes.byref(length))
+                                       int(output_margin), ntree_limit, ctypes.byref(length))
         return ctypes2numpy(preds, length.value, 'float32')
     def save_model(self, fname):
         """ save model to file
diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp
index 70c7e87b0..abb844bce 100644
--- a/wrapper/xgboost_wrapper.cpp
+++ b/wrapper/xgboost_wrapper.cpp
@@ -6,10 +6,14 @@
 #include <string>
 #include <cstring>
 #include <algorithm>
+// include all std functions
+using namespace std;
+
 #include "./xgboost_wrapper.h"
 #include "../src/data.h"
 #include "../src/learner/learner-inl.hpp"
 #include "../src/io/io.h"
+#include "../src/utils/utils.h"
 #include "../src/io/simple_dmatrix-inl.hpp"
 
 using namespace xgboost;
@@ -25,11 +29,11 @@ class Booster: public learner::BoostLearner {
     this->init_model = false;
     this->SetCacheData(mats);
   }
-  const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) {
+  inline const float *Pred(const DataMatrix &dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
     this->CheckInitModel();
-    this->Predict(dmat, output_margin != 0, &this->preds_);
+    this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit);
     *len = static_cast<bst_ulong>(this->preds_.size());
-    return &this->preds_[0];
+    return BeginPtr(this->preds_);
   }
   inline void BoostOneIter(const DataMatrix &train,
                            float *grad, float *hess, bst_ulong len) {
@@ -57,7 +61,7 @@ class Booster: public learner::BoostLearner {
       model_dump_cptr[i] = model_dump[i].c_str();
     }
     *len = static_cast<bst_ulong>(model_dump.size());
-    return &model_dump_cptr[0];
+    return BeginPtr(model_dump_cptr);
   }
   // temporal fields
   // temporal data to save evaluation dump
@@ -174,13 +178,13 @@ extern "C"{
     std::vector<float> &vec = 
         static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
     vec.resize(len);
-    memcpy(&vec[0], info, sizeof(float) * len);
+    memcpy(BeginPtr(vec), info, sizeof(float) * len);
   }
   void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
     std::vector<unsigned> &vec =
         static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
     vec.resize(len);
-    memcpy(&vec[0], info, sizeof(unsigned) * len);
+    memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
   }
   void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
     DataMatrix *pmat = static_cast<DataMatrix*>(handle);
@@ -194,13 +198,13 @@ extern "C"{
     const std::vector<float> &vec =
         static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
     *len = static_cast<bst_ulong>(vec.size());
-    return &vec[0];
+    return BeginPtr(vec);
   }
   const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
     const std::vector<unsigned> &vec =
         static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
     *len = static_cast<bst_ulong>(vec.size());
-    return &vec[0];
+    return BeginPtr(vec);
   }
   bst_ulong XGDMatrixNumRow(const void *handle) {
     return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
@@ -249,8 +253,8 @@ extern "C"{
     bst->eval_str = bst->EvalOneIter(iter, mats, names);
     return bst->eval_str.c_str();
   }
-  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) {
-    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
+  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
+    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, ntree_limit, len);
   }
   void XGBoosterLoadModel(void *handle, const char *fname) {
     static_cast<Booster*>(handle)->LoadModel(fname);
diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h
index 65446aea6..9687ec0a3 100644
--- a/wrapper/xgboost_wrapper.h
+++ b/wrapper/xgboost_wrapper.h
@@ -165,9 +165,11 @@ extern "C" {
    * \param handle handle
    * \param dmat data matrix
    * \param output_margin whether only output raw margin value
+   * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
+   *    when the parameter is set to 0, we will use all the trees
    * \param len used to store length of returning result
    */
-  XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len);
+  XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len);
   /*!
    * \brief load model from existing file
    * \param handle handle