Merge branch 'master' of https://github.com/pommedeterresautee/xgboost

2015-06-16 21:40:09 +02:00
parent ad2e93f6c5 ab219d3331
commit 37714eb331
42 changed files with 1072 additions and 298 deletions
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: eXtreme Gradient Boosting
-Version: 0.3-4
-Date: 2014-12-28
+Version: 0.4-0
+Date: 2015-05-11
 Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>, Michael Benesty <michael@benesty.fr>
 Maintainer: Tong He <hetong007@gmail.com>
 Description: Xgboost is short for eXtreme Gradient Boosting, which is an 
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -54,6 +54,13 @@
 #' @param folds \code{list} provides a possibility of using a list of pre-defined CV folds (each element must be a vector of fold's indices).
 #'   If folds are supplied, the nfold and stratified parameters would be ignored.
 #' @param verbose \code{boolean}, print the statistics during the process
+#' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.
+#' @param early.stop.round If \code{NULL}, the early stopping function is not triggered. 
+#'     If set to an integer \code{k}, training with a validation set will stop if the performance 
+#'     keeps getting worse consecutively for \code{k} rounds.
+#' @param maximize If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
+#'     \code{maximize=TRUE} means the larger the evaluation score the better.
+#'     
 #' @param ... other parameters to pass to \code{params}.
 #' 
 #' @return
@@ -86,7 +93,8 @@
 #'
 xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NULL, 
                   prediction = FALSE, showsd = TRUE, metrics=list(), 
-                   obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T,...) {
+                   obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,
+                   early.stop.round = NULL, maximize = NULL, ...) {
  if (typeof(params) != "list") {
    stop("xgb.cv: first argument params must be list")
  }
@@ -109,7 +117,50 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
  for (mc in metrics) {
    params <- append(params, list("eval_metric"=mc))
  }
-
+  
+  # customized objective and evaluation metric interface
+  if (!is.null(params$objective) && !is.null(obj))
+    stop("xgb.cv: cannot assign two different objectives")
+  if (!is.null(params$objective))
+    if (class(params$objective)=='function') {
+      obj = params$objective
+      params$objective = NULL
+    }
+  if (!is.null(params$eval_metric) && !is.null(feval))
+    stop("xgb.cv: cannot assign two different evaluation metrics")
+  if (!is.null(params$eval_metric))
+    if (class(params$eval_metric)=='function') {
+      feval = params$eval_metric
+      params$eval_metric = NULL
+    }
+  
+  # Early Stopping
+  if (!is.null(early.stop.round)){
+    if (!is.null(feval) && is.null(maximize))
+      stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
+    if (is.null(maximize) && is.null(params$eval_metric))
+      stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
+    if (is.null(maximize))
+    {
+      if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
+        maximize = FALSE
+      } else {
+        maximize = TRUE
+      }
+    }
+    
+    if (maximize) {
+      bestScore = 0
+    } else {
+      bestScore = Inf
+    }
+    bestInd = 0
+    earlyStopflag = FALSE
+    
+    if (length(metrics)>1)
+      warning('Only the first metric is used for early stopping process.')
+  }
+  
  xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
  obj_type = params[['objective']]
  mat_pred = FALSE
@@ -124,6 +175,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
  else
    predictValues <- rep(0,xgb.numrow(dtrain))
  history <- c()
+  print.every.n = max(as.integer(print.every.n), 1L)
  for (i in 1:nrounds) {
    msg <- list()
    for (k in 1:nfold) {
@@ -148,7 +200,27 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
    }
    ret <- xgb.cv.aggcv(msg, showsd)
    history <- c(history, ret)
-    if(verbose) paste(ret, "\n", sep="") %>% cat
+    if(verbose)
+      if (0==(i-1L)%%print.every.n)
+        cat(ret, "\n", sep="")
+    
+    # early_Stopping
+    if (!is.null(early.stop.round)){
+      score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
+      score = strsplit(score,'\\+|:')[[1]][[2]]
+      score = as.numeric(score)
+      if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
+        bestScore = score
+        bestInd = i
+      } else {
+        if (i-bestInd>=early.stop.round) {
+          earlyStopflag = TRUE
+          cat('Stopping. Best iteration:',bestInd)
+          break
+        }
+      }
+    }
+    
  }
  
  colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -36,7 +36,7 @@
 #' 3. Task Parameters 
 #' 
 #' \itemize{
-#' \item \code{objective} specify the learning task and the corresponding learning objective, and the objective options are below:
+#' \item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
 #'   \itemize{
 #'     \item \code{reg:linear} linear regression (Default).
 #'     \item \code{reg:logistic} logistic regression.
@@ -48,7 +48,7 @@
 #'     \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
 #'   }
 #'   \item \code{base_score} the initial prediction score of all instances, global bias. Default: 0.5
-#'   \item \code{eval_metric} evaluation metrics for validation data. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
+#'   \item \code{eval_metric} evaluation metrics for validation data. Users can pass a self-defined function to it. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
 #' }
 #' 
 #' @param data takes an \code{xgb.DMatrix} as the input.
@@ -66,7 +66,12 @@
 #'   prediction and dtrain,
 #' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print 
 #'   information of performance. If 2, xgboost will print information of both
-#'
+#' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.
+#' @param early.stop.round If \code{NULL}, the early stopping function is not triggered. 
+#'     If set to an integer \code{k}, training with a validation set will stop if the performance 
+#'     keeps getting worse consecutively for \code{k} rounds.
+#' @param maximize If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
+#'     \code{maximize=TRUE} means the larger the evaluation score the better.
 #' @param ... other parameters to pass to \code{params}.
 #' 
 #' @details 
@@ -98,7 +103,6 @@
 #' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 #' dtest <- dtrain
 #' watchlist <- list(eval = dtest, train = dtrain)
-#' param <- list(max.depth = 2, eta = 1, silent = 1)
 #' logregobj <- function(preds, dtrain) {
 #'    labels <- getinfo(dtrain, "label")
 #'    preds <- 1/(1 + exp(-preds))
@@ -111,11 +115,13 @@
 #'   err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
 #'   return(list(metric = "error", value = err))
 #' }
-#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
+#' param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror)
+#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
 #' @export
 #' 
 xgb.train <- function(params=list(), data, nrounds, watchlist = list(), 
-                      obj = NULL, feval = NULL, verbose = 1, ...) {
+                      obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
+                      early.stop.round = NULL, maximize = NULL, ...) {
  dtrain <- data
  if (typeof(params) != "list") {
    stop("xgb.train: first argument params must be list")
@@ -130,19 +136,85 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
  }
  if (length(watchlist) != 0 && verbose == 0) {
    warning('watchlist is provided but verbose=0, no evaluation information will be printed')
-    watchlist <- list()
  }
  params = append(params, list(...))
  
+  # customized objective and evaluation metric interface
+  if (!is.null(params$objective) && !is.null(obj))
+    stop("xgb.train: cannot assign two different objectives")
+  if (!is.null(params$objective))
+    if (class(params$objective)=='function') {
+      obj = params$objective
+      params$objective = NULL
+    }
+  if (!is.null(params$eval_metric) && !is.null(feval))
+    stop("xgb.train: cannot assign two different evaluation metrics")
+  if (!is.null(params$eval_metric))
+    if (class(params$eval_metric)=='function') {
+      feval = params$eval_metric
+      params$eval_metric = NULL
+    }
+    
+  # Early stopping
+  if (!is.null(early.stop.round)){
+    if (!is.null(feval) && is.null(maximize))
+      stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
+    if (length(watchlist) == 0)
+      stop('For early stopping you need at least one set in watchlist.')
+    if (is.null(maximize) && is.null(params$eval_metric))
+      stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
+    if (is.null(maximize))
+    {
+      if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
+        maximize = FALSE
+      } else {
+        maximize = TRUE
+      }
+    }
+    
+    if (maximize) {
+      bestScore = 0
+    } else {
+      bestScore = Inf
+    }
+    bestInd = 0
+    earlyStopflag = FALSE
+    
+    if (length(watchlist)>1)
+      warning('Only the first data set in watchlist is used for early stopping process.')
+  }
+  
+  
  handle <- xgb.Booster(params, append(watchlist, dtrain))
  bst <- xgb.handleToBooster(handle)
+  print.every.n=max( as.integer(print.every.n), 1L)
  for (i in 1:nrounds) {
    succ <- xgb.iter.update(bst$handle, dtrain, i - 1, obj)
    if (length(watchlist) != 0) {
      msg <- xgb.iter.eval(bst$handle, watchlist, i - 1, feval)
-      cat(paste(msg, "\n", sep=""))
+      if (0== ( (i-1) %% print.every.n))
+	    cat(paste(msg, "\n", sep=""))
+      if (!is.null(early.stop.round))
+      {
+        score = strsplit(msg,':|\\s+')[[1]][3]
+        score = as.numeric(score)
+        if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
+          bestScore = score
+          bestInd = i
+        } else {
+          if (i-bestInd>=early.stop.round) {
+            earlyStopflag = TRUE
+            cat('Stopping. Best iteration:',bestInd)
+            break
+          }
+        }
+      }
    }
  }
  bst <- xgb.Booster.check(bst)
+  if (!is.null(early.stop.round)) {
+    bst$bestScore = bestScore
+    bst$bestInd = bestInd
+  }
  return(bst)
 } 
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -28,8 +28,14 @@
 #' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print 
 #'   information of performance. If 2, xgboost will print information of both
 #'   performance and construction progress information
+#' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.
 #' @param missing Missing is only used when input is dense matrix, pick a float 
 #'     value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.
+#' @param early.stop.round If \code{NULL}, the early stopping function is not triggered. 
+#'     If set to an integer \code{k}, training with a validation set will stop if the performance 
+#'     keeps getting worse consecutively for \code{k} rounds.
+#' @param maximize If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
+#'     \code{maximize=TRUE} means the larger the evaluation score the better.
 #' @param ... other parameters to pass to \code{params}.
 #' 
 #' @details 
@@ -51,7 +57,8 @@
 #' @export
 #' 
 xgboost <- function(data = NULL, label = NULL, missing = NULL, params = list(), nrounds, 
-                    verbose = 1, ...) {
+                    verbose = 1, print.every.n = 1L, early.stop.round = NULL,
+                    maximize = NULL, ...) {
  if (is.null(missing)) {
    dtrain <- xgb.get.DMatrix(data, label)
  } else {
@@ -66,7 +73,8 @@ xgboost <- function(data = NULL, label = NULL, missing = NULL, params = list(),
    watchlist <- list()
  }
  
-  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose=verbose)
+  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print.every.n=print.every.n,
+                   early.stop.round = early.stop.round)
  
  return(bst)
 } 
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -8,11 +8,6 @@ For up-to-date version (which is recommended), please install from github. Windo
 devtools::install_github('dmlc/xgboost',subdir='R-package')
 ```

-For stable version on CRAN, please run
-
-```r
-install.packages('xgboost')
-```

 ## Examples

--- a/R-package/demo/00Index
+++ b/R-package/demo/00Index
@@ -6,3 +6,5 @@ generalized_linear_model        Generalized Linear Model
 cross_validation                Cross validation
 create_sparse_matrix            Create Sparse Matrix
 predict_leaf_indices            Predicting the corresponding leaves
+early_stopping                  Early Stop in training
+poisson_regression              Poisson Regression on count data
--- a/R-package/demo/cross_validation.R
+++ b/R-package/demo/cross_validation.R
@@ -40,10 +40,10 @@ evalerror <- function(preds, dtrain) {
  return(list(metric = "error", value = err))
 }

-param <- list(max.depth=2,eta=1,silent=1)
+param <- list(max.depth=2,eta=1,silent=1,
+              objective = logregobj, eval_metric = evalerror)
 # train with customized objective
-xgb.cv(param, dtrain, nround, nfold = 5,
-       obj = logregobj, feval=evalerror)
+xgb.cv(param, dtrain, nround, nfold = 5)

 # do cross validation with prediction values for each fold
 res <- xgb.cv(param, dtrain, nround, nfold=5, prediction = TRUE)
--- a/R-package/demo/custom_objective.R
+++ b/R-package/demo/custom_objective.R
@@ -8,7 +8,6 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
-param <- list(max.depth=2,eta=1,nthread = 2, silent=1)
 watchlist <- list(eval = dtest, train = dtrain)
 num_round <- 2

@@ -33,10 +32,13 @@ evalerror <- function(preds, dtrain) {
  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
  return(list(metric = "error", value = err))
 }
+
+param <- list(max.depth=2,eta=1,nthread = 2, silent=1, 
+              objective=logregobj, eval_metric=evalerror)
 print ('start training with user customized objective')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
+bst <- xgb.train(param, dtrain, num_round, watchlist)

 #
 # there can be cases where you want additional information 
@@ -59,4 +61,5 @@ logregobjattr <- function(preds, dtrain) {
 print ('start training with user customized objective, with additional attributes in DMatrix')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist, logregobjattr, evalerror)
+bst <- xgb.train(param, dtrain, num_round, watchlist, 
+                 objective=logregobj, eval_metric=evalerror)
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@@ -0,0 +1,40 @@
+require(xgboost)
+# load in the agaricus dataset
+data(agaricus.train, package='xgboost')
+data(agaricus.test, package='xgboost')
+dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
+dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
+# note: for customized objective function, we leave objective as default
+# note: what we are getting is margin value in prediction
+# you must know what you are doing
+param <- list(max.depth=2,eta=1,nthread = 2, silent=1)
+watchlist <- list(eval = dtest)
+num_round <- 20
+# user define objective function, given prediction, return gradient and second order gradient
+# this is loglikelihood loss
+logregobj <- function(preds, dtrain) {
+  labels <- getinfo(dtrain, "label")
+  preds <- 1/(1 + exp(-preds))
+  grad <- preds - labels
+  hess <- preds * (1 - preds)
+  return(list(grad = grad, hess = hess))
+}
+# user defined evaluation function, return a pair metric_name, result
+# NOTE: when you do customized loss function, the default prediction value is margin
+# this may make buildin evalution metric not function properly
+# for example, we are doing logistic loss, the prediction is score before logistic transformation
+# the buildin evaluation error assumes input is after logistic transformation
+# Take this in mind when you use the customization, and maybe you need write customized evaluation function
+evalerror <- function(preds, dtrain) {
+  labels <- getinfo(dtrain, "label")
+  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
+  return(list(metric = "error", value = err))
+}
+print ('start training with early Stopping setting')
+
+bst <- xgb.train(param, dtrain, num_round, watchlist, 
+                 objective = logregobj, eval_metric = evalerror, maximize = FALSE,
+                 early.stop.round = 3)
+bst <- xgb.cv(param, dtrain, num_round, nfold = 5, 
+              objective = logregobj, eval_metric = evalerror,
+              maximize = FALSE, early.stop.round = 3)
--- a/R-package/demo/poisson_regression.R
+++ b/R-package/demo/poisson_regression.R
@@ -0,0 +1,7 @@
+data(mtcars)
+head(mtcars)
+bst = xgboost(data=as.matrix(mtcars[,-11]),label=mtcars[,11],
+              objective='count:poisson',nrounds=5)
+pred = predict(bst,as.matrix(mtcars[,-11]))
+sqrt(mean((pred-mtcars[,11])^2))
+
--- a/R-package/demo/runall.R
+++ b/R-package/demo/runall.R
@@ -7,3 +7,5 @@ demo(generalized_linear_model)
 demo(cross_validation)
 demo(create_sparse_matrix)
 demo(predict_leaf_indices)
+demo(early_stopping)
+demo(poisson_regression)
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -7,7 +7,8 @@
 xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
  missing = NULL, prediction = FALSE, showsd = TRUE, metrics = list(),
  obj = NULL, feval = NULL, stratified = TRUE, folds = NULL,
-  verbose = T, ...)
+  verbose = T, print.every.n = 1L, early.stop.round = NULL,
+  maximize = NULL, ...)
 }
 \arguments{
 \item{params}{the list of parameters. Commonly used ones are:
@@ -65,6 +66,15 @@ If folds are supplied, the nfold and stratified parameters would be ignored.}

 \item{verbose}{\code{boolean}, print the statistics during the process}

+\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
+
+\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered.
+If set to an integer \code{k}, training with a validation set will stop if the performance
+keeps getting worse consecutively for \code{k} rounds.}
+
+\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
+    \code{maximize=TRUE} means the larger the evaluation score the better.}
+
 \item{...}{other parameters to pass to \code{params}.}
 }
 \value{
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -5,7 +5,8 @@
 \title{eXtreme Gradient Boosting Training}
 \usage{
 xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
-  feval = NULL, verbose = 1, ...)
+  feval = NULL, verbose = 1, print.every.n = 1L,
+  early.stop.round = NULL, maximize = NULL, ...)
 }
 \arguments{
 \item{params}{the list of parameters.
@@ -42,7 +43,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
 3. Task Parameters

 \itemize{
-\item \code{objective} specify the learning task and the corresponding learning objective, and the objective options are below:
+\item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
  \itemize{
    \item \code{reg:linear} linear regression (Default).
    \item \code{reg:logistic} logistic regression.
@@ -54,7 +55,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
    \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
  }
  \item \code{base_score} the initial prediction score of all instances, global bias. Default: 0.5
-  \item \code{eval_metric} evaluation metrics for validation data. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
+  \item \code{eval_metric} evaluation metrics for validation data. Users can pass a self-defined function to it. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
 }}

 \item{data}{takes an \code{xgb.DMatrix} as the input.}
@@ -75,7 +76,16 @@ gradient with given prediction and dtrain,}
 prediction and dtrain,}

 \item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print
-  information of performance. If 2, xgboost will print information of both}
+information of performance. If 2, xgboost will print information of both}
+
+\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
+
+\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered.
+If set to an integer \code{k}, training with a validation set will stop if the performance
+keeps getting worse consecutively for \code{k} rounds.}
+
+\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
+\code{maximize=TRUE} means the larger the evaluation score the better.}

 \item{...}{other parameters to pass to \code{params}.}
 }
@@ -110,7 +120,6 @@ data(agaricus.train, package='xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- dtrain
 watchlist <- list(eval = dtest, train = dtrain)
-param <- list(max.depth = 2, eta = 1, silent = 1)
 logregobj <- function(preds, dtrain) {
   labels <- getinfo(dtrain, "label")
   preds <- 1/(1 + exp(-preds))
@@ -123,6 +132,7 @@ evalerror <- function(preds, dtrain) {
  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
  return(list(metric = "error", value = err))
 }
-bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
+param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror)
+bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
 }

--- a/R-package/man/xgboost.Rd
+++ b/R-package/man/xgboost.Rd
@@ -5,7 +5,8 @@
 \title{eXtreme Gradient Boosting (Tree) library}
 \usage{
 xgboost(data = NULL, label = NULL, missing = NULL, params = list(),
-  nrounds, verbose = 1, ...)
+  nrounds, verbose = 1, print.every.n = 1L, early.stop.round = NULL,
+  maximize = NULL, ...)
 }
 \arguments{
 \item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
@@ -41,6 +42,15 @@ Commonly used ones are:
 information of performance. If 2, xgboost will print information of both
 performance and construction progress information}

+\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
+
+\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered.
+If set to an integer \code{k}, training with a validation set will stop if the performance
+keeps getting worse consecutively for \code{k} rounds.}
+
+\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
+\code{maximize=TRUE} means the larger the evaluation score the better.}
+
 \item{...}{other parameters to pass to \code{params}.}
 }
 \description{
--- a/R-package/src/xgboost_R.cpp
+++ b/R-package/src/xgboost_R.cpp
@@ -70,10 +70,10 @@ extern "C" {
  SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
    _WrapperBegin();
    void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
+    _WrapperEnd();
    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
    UNPROTECT(1);
-    _WrapperEnd();
    return ret;
  }
  SEXP XGDMatrixCreateFromMat_R(SEXP mat, 
@@ -91,10 +91,10 @@ extern "C" {
      }
    }
    void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
+    _WrapperEnd();
    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
    UNPROTECT(1);
-    _WrapperEnd();
    return ret;    
  }
  SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
@@ -120,10 +120,10 @@ extern "C" {
    }
    void *handle = XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
                                          BeginPtr(data_), nindptr, ndata);
+    _WrapperEnd();
    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
    UNPROTECT(1);
-    _WrapperEnd();
    return ret;
  }
  SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
@@ -134,10 +134,10 @@ extern "C" {
      idxvec[i] = INTEGER(idxset)[i] - 1;
    }
    void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
+    _WrapperEnd();
    SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
    UNPROTECT(1);
-    _WrapperEnd();
    return ret;        
  }
  void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
@@ -157,10 +157,7 @@ extern "C" {
        vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
      }
      XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
-      _WrapperEnd();
-      return;
-    }
-    {
+    } else {
      std::vector<float> vec(len);
      #pragma omp parallel for schedule(static)
      for (int i = 0; i < len; ++i) {
@@ -177,12 +174,12 @@ extern "C" {
    bst_ulong olen;
    const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
                                             CHAR(asChar(field)), &olen);
+    _WrapperEnd();
    SEXP ret = PROTECT(allocVector(REALSXP, olen));
    for (size_t i = 0; i < olen; ++i) {
      REAL(ret)[i] = res[i];
    }
    UNPROTECT(1);
-    _WrapperEnd();
    return ret;
  }
  SEXP XGDMatrixNumRow_R(SEXP handle) {
@@ -203,10 +200,10 @@ extern "C" {
      dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
    }
    void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
+    _WrapperEnd();
    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
    R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
    UNPROTECT(1);
-    _WrapperEnd();
    return ret;
  }
  void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
@@ -252,10 +249,12 @@ extern "C" {
    for (int i = 0; i < len; ++i) {
      vec_sptr.push_back(vec_names[i].c_str());
    }
+    const char *ret =
+        XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
+                             asInteger(iter),
+                             BeginPtr(vec_dmats), BeginPtr(vec_sptr), len);  
    _WrapperEnd();
-    return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
-                                         asInteger(iter),
-                                         BeginPtr(vec_dmats), BeginPtr(vec_sptr), len));
+    return mkString(ret);
  }
  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
    _WrapperBegin();
@@ -265,12 +264,12 @@ extern "C" {
                                        asInteger(option_mask),
                                        asInteger(ntree_limit),
                                        &olen);
+    _WrapperEnd();
    SEXP ret = PROTECT(allocVector(REALSXP, olen));
    for (size_t i = 0; i < olen; ++i) {
      REAL(ret)[i] = res[i];
    }
    UNPROTECT(1);
-    _WrapperEnd();
    return ret;
  }
  void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
@@ -305,17 +304,18 @@ extern "C" {
  SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) {
    _WrapperBegin();
    bst_ulong olen;
-    const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
-    CHAR(asChar(fmap)),
-    asInteger(with_stats),
-    &olen);
+    const char **res =
+        XGBoosterDumpModel(R_ExternalPtrAddr(handle),
+                           CHAR(asChar(fmap)),
+                           asInteger(with_stats),
+                           &olen);
+    _WrapperEnd();
    SEXP out = PROTECT(allocVector(STRSXP, olen));    
    for (size_t i = 0; i < olen; ++i) {     
      stringstream stream;
      stream <<  "booster["<<i<<"]\n" << res[i];
      SET_STRING_ELT(out, i, mkChar(stream.str().c_str()));
    }
-    _WrapperEnd();
    UNPROTECT(1);
    return out;
  }
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -57,11 +57,9 @@ devtools::install_github('dmlc/xgboost', subdir='R-package')
 Cran version
 ------------

-For stable version on *CRAN*, run:
+As of 2015-03-13, ‘xgboost’ was removed from the CRAN repository.

-```{r installCran, eval=FALSE}
-install.packages('xgboost')
-```
+Formerly available versions can be obtained from the CRAN [archive](http://cran.r-project.org/src/contrib/Archive/xgboost)

 Learning
 ========