[R] various R code maintenance (#1964)

* [R] xgb.save must work when handle in nil but raw exists * [R] print.xgb.Booster should still print other info when handle is nil * [R] rename internal function xgb.Booster to xgb.Booster.handle to make its intent clear * [R] rename xgb.Booster.check to xgb.Booster.complete and make it visible; more docs * [R] storing evaluation_log should depend only on watchlist, not on verbose * [R] reduce the excessive chattiness of unit tests * [R] only disable some tests in windows when it's not 64-bit * [R] clean-up xgb.DMatrix * [R] test xgb.DMatrix loading from libsvm text file * [R] store feature_names in xgb.Booster, use them from utility functions * [R] remove non-functional co-occurence computation from xgb.importance * [R] verbose=0 is enough without a callback * [R] added forgotten xgb.Booster.complete.Rd; cran check fixes * [R] update installation instructions
2017-01-21 13:22:46 -06:00 · 2017-01-21 13:22:46 -06:00 · 2b5b96d760
commit 2b5b96d760
parent a073a2c3d4
27 changed files with 561 additions and 327 deletions
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -24,6 +24,7 @@ export(cb.save.model)
 export(getinfo)
 export(setinfo)
 export(slice)
 export(xgb.Booster.complete)
 export(xgb.DMatrix)
 export(xgb.DMatrix.save)
 export(xgb.attr)
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@ -507,7 +507,7 @@ cb.cv.predict <- function(save_models = FALSE) {
    if (save_models) {
      env$basket$models <- lapply(env$bst_folds, function(fd) {
        xgb.attr(fd$bst, 'niter') <- env$end_iteration - 1
-        xgb.Booster.check(xgb.handleToBooster(fd$bst), saveraw = TRUE)
+        xgb.Booster.complete(xgb.handleToBooster(fd$bst), saveraw = TRUE)
      })
    }
  }
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@ -1,6 +1,6 @@
-# Construct a Booster from cachelist
+# Construct an internal xgboost Booster and return a handle to it
 # internal utility function
-xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
+xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
  if (typeof(cachelist) != "list" ||
      any(sapply(cachelist, class) != 'xgb.DMatrix')) {
    stop("xgb.Booster only accepts list of DMatrix as cachelist")
@ -13,8 +13,8 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
    } else if (typeof(modelfile) == "raw") {
      .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
    } else if (class(modelfile) == "xgb.Booster") {
-      modelfile <- xgb.Booster.check(modelfile, saveraw=TRUE)
+      bst <- xgb.Booster.complete(modelfile, saveraw=TRUE)
-      .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile$raw, PACKAGE = "xgboost")
+      .Call("XGBoosterLoadModelFromRaw_R", handle, bst$raw, PACKAGE = "xgboost")
    } else {
      stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
    }
@ -34,6 +34,17 @@ xgb.handleToBooster <- function(handle, raw = NULL) {
  return(bst)
 }
 # Check whether xgb.Booster.handle is null
 # internal utility function
 is.null.handle <- function(handle) {
  if (class(handle) != "xgb.Booster.handle")
    stop("argument type must be xgb.Booster.handle")
  if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost"))
    return(TRUE)
  return(FALSE)
 }
 # Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster
 # internal utility function
 xgb.get.handle <- function(object) {
@ -42,32 +53,65 @@ xgb.get.handle <- function(object) {
    xgb.Booster.handle = object,
    stop("argument must be of either xgb.Booster or xgb.Booster.handle class")
  )
-  if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) {
+  if (is.null.handle(handle)) {
    stop("invalid xgb.Booster.handle")
  }
  handle
 }
-# Check whether an xgb.Booster object is complete
+#' Restore missing parts of an incomplete xgb.Booster object.
-# internal utility function
+#'
-xgb.Booster.check <- function(bst, saveraw = TRUE) {
+#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing 
-  if (class(bst) != "xgb.Booster")
+#' raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
 #' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid 
 #' but it has a raw Booster memory dump).
 #' 
 #' @param object object of class \code{xgb.Booster}
 #' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data 
 #'                when it doesn't already exist.
 #' 
 #' @details
 #' 
 #' While this method is primarily for internal use, it might be useful in some practical situations.
 #' 
 #' E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
 #' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods 
 #' should still work for such a model object since those methods would be using 
 #' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the  
 #' \code{xgb.Booster.complete} function once after loading a model as an R-object. That which would
 #' prevent further reconstruction (potentially, multiple times) of an internal booster model.
 #' 
 #' @return 
 #' An object of \code{xgb.Booster} class.
 #' 
 #' @examples
 #' 
 #' data(agaricus.train, package='xgboost')
 #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
 #'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 #' saveRDS(bst, "xgb.model.rds")
 #' 
 #' bst1 <- readRDS("xgb.model.rds")
 #' # the handle is invalid:
 #' print(bst1$handle)
 #' bst1 <- xgb.Booster.complete(bst1)
 #' # now the handle points to a valid internal booster model:
 #' print(bst1$handle)
 #' 
 #' @export
 xgb.Booster.complete <- function(object, saveraw = TRUE) {
  if (class(object) != "xgb.Booster")
    stop("argument type must be xgb.Booster")
-  isnull <- is.null(bst$handle)
+  if (is.null.handle(object$handle)) {
-  if (!isnull) {
+    object$handle <- xgb.Booster.handle(modelfile = object$raw)
    isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
  }
  if (isnull) {
    bst$handle <- xgb.Booster(modelfile = bst$raw)
  } else {
-    if (is.null(bst$raw) && saveraw)
+    if (is.null(object$raw) && saveraw)
-      bst$raw <- xgb.save.raw(bst$handle)
+      object$raw <- xgb.save.raw(object$handle)
  }
-  return(bst)
+  return(object)
 }
 #' Predict method for eXtreme Gradient Boosting model
 #' 
 #' Predicted values based on either xgboost model or model handle object.
@ -180,7 +224,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
 predict.xgb.Booster <- function(object, newdata, missing = NA,
    outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {
-  object <- xgb.Booster.check(object, saveraw = FALSE)
+  object <- xgb.Booster.complete(object, saveraw = FALSE)
  if (class(newdata) != "xgb.DMatrix")
    newdata <- xgb.DMatrix(newdata, missing = missing)
  if (is.null(ntreelimit))
@ -429,10 +473,9 @@ xgb.ntree <- function(bst) {
 print.xgb.Booster <- function(x, verbose=FALSE, ...) {
  cat('##### xgb.Booster\n')
-  if (is.null(x$handle) || .Call("XGCheckNullPtr_R", x$handle, PACKAGE="xgboost")) {
+  valid_handle <- is.null.handle(x$handle)
-    cat("handle is invalid\n")
+  if (!valid_handle)
-    return(x)
+    cat("Handle is invalid! Suggest using xgb.Booster.complete\n")
  }
  cat('raw: ')
  if (!is.null(x$raw)) {
@ -454,6 +497,8 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
  }
  # TODO: need an interface to access all the xgboosts parameters
  attrs <- character(0)
  if (valid_handle)
    attrs <- xgb.attributes(x)
  if (length(attrs) > 0) {
    cat('xgb.attributes:\n')
@ -474,11 +519,15 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
    })
  }
  if (!is.null(x$feature_names))
    cat('# of features:', length(x$feature_names), '\n')
  cat('niter: ', x$niter, '\n', sep='')
  # TODO: uncomment when faster xgb.ntree is implemented
  #cat('ntree: ', xgb.ntree(x), '\n', sep='')
-  for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks','evaluation_log','niter'))) {
+  for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
                                'evaluation_log','niter','feature_names'))) {
    if (is.atomic(x[[n]])) {
      cat(n, ':', x[[n]], '\n', sep=' ')
    } else {
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@ -31,18 +31,13 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
                    PACKAGE = "xgboost")
    cnames <- colnames(data)
  } else {
-    stop(paste("xgb.DMatrix: does not support to construct from ",
+    stop("xgb.DMatrix does not support construction from ", typeof(data))
               typeof(data)))
  }
  dmat <- handle
  attributes(dmat) <- list(.Dimnames = list(NULL, cnames), class = "xgb.DMatrix")
  #dmat <- list(handle = handle, colnames = cnames)
  #attr(dmat, 'class') <- "xgb.DMatrix"
  info <- append(info, list(...))
-  if (length(info) == 0)
+  for (i in seq_along(info)) {
    return(dmat)
  for (i in 1:length(info)) {
    p <- info[i]
    setinfo(dmat, names(p), p[[1]])
  }
@ -70,11 +65,10 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
      dtrain <- xgb.DMatrix(data)
    } else if (inClass == "xgb.DMatrix") {
      dtrain <- data
-    } else if (inClass == "data.frame") {
+    } else if ("data.frame" %in% inClass) {
-      stop("xgboost only support numerical matrix input,
+      stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
           use 'data.matrix' to transform the data.")
    } else {
-      stop("xgboost: Invalid input of data")
+      stop("xgboost: invalid input data")
    }
  }
  return (dtrain)
@ -190,7 +184,7 @@ getinfo.xgb.DMatrix <- function(object, name, ...) {
  if (typeof(name) != "character" ||
      length(name) != 1 ||
      !name %in% c('label', 'weight', 'base_margin', 'nrow')) {
-    stop("getinfo: name must one of the following\n",
+    stop("getinfo: name must be one of the following\n",
         "    'label', 'weight', 'base_margin', 'nrow'")
  }
  if (name != "nrow"){
@ -266,7 +260,7 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) {
          PACKAGE = "xgboost")
    return(TRUE)
  }
-  stop(paste("setinfo: unknown info name", name))
+  stop("setinfo: unknown info name ", name)
  return(FALSE)
 }
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@ -181,8 +181,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
  bst_folds <- lapply(1:length(folds), function(k) {
    dtest  <- slice(dall, folds[[k]])
    dtrain <- slice(dall, unlist(folds[-k]))
-    bst <- xgb.Booster(params, list(dtrain, dtest))
+    handle <- xgb.Booster.handle(params, list(dtrain, dtest))
-    list(dtrain=dtrain, bst=bst, watchlist=list(train=dtrain, test=dtest), index=folds[[k]])
+    list(dtrain=dtrain, bst=handle, watchlist=list(train=dtrain, test=dtest), index=folds[[k]])
  })
  # a "basket" to collect some results from callbacks
  basket <- list()
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@ -1,24 +1,26 @@
-#' Save xgboost model to text file
+#' Dump an xgboost model in text format.
 #' 
-#' Save a xgboost model to text file. Could be parsed later.
+#' Dump an xgboost model in text format.
 #' 
 #' @param model the model object.
-#' @param fname the name of the text file where to save the model text dump. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.
+#' @param fname the name of the text file where to save the model text dump. 
-#' @param fmap feature map file representing the type of feature. 
+#'        If not provided or set to \code{NULL}, the model is returned as a \code{character} vector.
 #' @param fmap feature map file representing feature types.
 #'        Detailed description could be found at 
 #'        \url{https://github.com/dmlc/xgboost/wiki/Binary-Classification#dump-model}.
 #'        See demo/ for walkthrough example in R, and
 #'        \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt} 
 #'        for example Format.
-#' @param with_stats whether dump statistics of splits 
+#' @param with_stats whether to dump some additional statistics about the splits.
-#'        When this option is on, the model dump comes with two additional statistics:
+#'        When this option is on, the model dump contains two additional values:
 #'        gain is the approximate loss function gain we get in each split;
 #'        cover is the sum of second order gradient in each node.
 #' @param dump_format either 'text' or 'json' format could be specified.
 #' @param ... currently not used
 #'
 #' @return
-#' if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
+#' If fname is not provided or set to \code{NULL} the function will return the model
 #' as a \code{character} vector. Otherwise it will return \code{TRUE}.
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
@ -37,7 +39,8 @@
 #' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
 #' 
 #' @export
-xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE, dump_format = c("text", "json"), ...) {
+xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE,
                     dump_format = c("text", "json"), ...) {
  check.deprecation(...)
  dump_format <- match.arg(dump_format)
  if (class(model) != "xgb.Booster")
@ -47,7 +50,7 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE, du
  if (!(class(fmap) %in% c("character", "NULL") && length(fmap) <= 1))
    stop("fmap: argument must be of type character (when provided)")
-  model <- xgb.Booster.check(model)
+  model <- xgb.Booster.complete(model)
  model_dump <- .Call("XGBoosterDumpModel_R", model$handle, fmap, as.integer(with_stats),
                      as.character(dump_format), PACKAGE = "xgboost")
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@ -1,102 +1,92 @@
-#' Show importance of features in a model
+#' Importance of features in a model.
 #' 
-#' Create a \code{data.table} of the most important features of a model. 
+#' Creates a \code{data.table} of feature importances in a model.
 #' 
-#' @param feature_names names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
+#' @param feature_names character vector of feature names. If the model already
-#' @param model generated by the \code{xgb.train} function.
+#'       contains feature names, those would be used when \code{feature_names=NULL} (default value).
-#' @param data the dataset used for the training step. Will be used with \code{label} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.
+#'       Non-null \code{feature_names} could be provided to override those in the model.
-#' @param label the label vector used for the training step. Will be used with \code{data} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.
+#' @param model object of class \code{xgb.Booster}.
-#' @param target a function which returns \code{TRUE} or \code{1} when an observation should be count as a co-occurence and \code{FALSE} or \code{0} otherwise. Default function is provided for computing co-occurences in a binary classification. The \code{target} function should have only one parameter. This parameter will be used to provide each important feature vector after having applied the split condition, therefore these vector will be only made of 0 and 1 only, whatever was the information before. More information in \code{Detail} part. This parameter is optional.
+#' @param data deprecated.
-#'
+#' @param label deprecated.
-#' @return A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
+#' @param target deprecated.
 #'
 #' @details 
 #' This function is for both linear and tree models.
 #' 
-#' \code{data.table} is returned by the function. 
+#' This function works for both linear and tree models.
-#' The columns are:
+#' 
 #' For linear models, the importance is the absolute magnitude of linear coefficients. 
 #' For that reason, in order to obtain a meaningful ranking by importance for a linear model, 
 #' the features need to be on the same scale (which you also would want to do when using either 
 #' L1 or L2 regularization).
 #' 
 #' @return
 #' 
 #' For a tree model, a \code{data.table} with the following columns:
 #' \itemize{
-#'   \item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump;
+#'   \item \code{Features} names of the features used in the model;
-#'   \item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means important feature to predict the \code{label} used for the training (only available for tree models);
+#'   \item \code{Gain} represents fractional contribution of each feature to the model based on
-#'   \item \code{Cover} metric of the number of observation related to this feature (only available for tree models);
+#'        the total gain of this feature's splits. Higher percentage means a more important 
-#'   \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees.
+#'        predictive feature.
 #'   \item \code{Cover} metric of the number of observation related to this feature;
 #'   \item \code{Frequency} percentage representing the relative number of times
 #'        a feature have been used in trees.
 #' }
 #' 
-#' If you don't provide \code{feature_names}, index of the features will be used instead.
+#' A linear model's importance \code{data.table} has only two columns:
 #' \itemize{
 #'   \item \code{Features} names of the features used in the model;
 #'   \item \code{Weight} the linear coefficient of this feature.
 #' }
 #' 
-#' Because the index is extracted from the model dump (made on the C++ side), it starts at 0 (usual in C++) instead of 1 (usual in R).
+#' If you don't provide or \code{model} doesn't have \code{feature_names}, 
-#' 
+#' index of the features will be used instead. Because the index is extracted from the model dump
-#' Co-occurence count
+#' (based on C++ code), it starts at 0 (as in C/C++ or Python) instead of 1 (usual in R).
 #' ------------------
 #' 
 #' The gain gives you indication about the information of how a feature is important in making a branch of a decision tree more pure. However, with this information only, you can't know if this feature has to be present or not to get a specific classification. In the example code, you may wonder if odor=none should be \code{TRUE} to not eat a mushroom.
 #' 
 #' Co-occurence computation is here to help in understanding this relation between a predictor and a specific class. It will count how many observations are returned as \code{TRUE} by the \code{target} function (see parameters). When you execute the example below, there are 92 times only over the 3140 observations of the train dataset where a mushroom have no odor and can be eaten safely.
 #' 
 #' If you need to remember only one thing: unless you want to leave us early, don't eat a mushroom which has no odor :-)
 #' 
 #' @examples
 #' 
 #' data(agaricus.train, package='xgboost')
 #' 
 #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
 #'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 #' 
-#' xgb.importance(colnames(agaricus.train$data), model = bst)
+#' xgb.importance(model = bst)
 #' 
 #' # Same thing with co-occurence computation this time
 #' xgb.importance(colnames(agaricus.train$data), model = bst, 
 #'                data = agaricus.train$data, label = agaricus.train$label)
 #' 
 #' @export
-xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ( (x + label) == 2)){
+xgb.importance <- function(feature_names = NULL, model = NULL, 
-  if (!class(feature_names) %in% c("character", "NULL")) {
+                           data = NULL, label = NULL, target = NULL){
    stop("feature_names: Has to be a vector of character or NULL if the model already contains feature name. Look at this function documentation to see where to get feature names.")
  }
-  if (class(model) != "xgb.Booster") {
+  if (!(is.null(data) && is.null(label) && is.null(target)))
-    stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
+    warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
  }
-  if((is.null(data) & !is.null(label)) | (!is.null(data) & is.null(label))) {
+  if (class(model) != "xgb.Booster")
-    stop("data/label: Provide the two arguments if you want co-occurence computation or none of them if you are not interested but not one of them only.")
+    stop("Either 'model' has to be an object of class xgb.Booster")
  }
-  if(class(label) == "numeric"){
+  if (is.null(feature_names) && !is.null(model$feature_names))
-    if(sum(label == 0) / length(label) > 0.5) label <- as(label, "sparseVector")
+    feature_names <- model$feature_names
  }
-  treeDump <- function(feature_names, text, keepDetail){
+  if (!class(feature_names) %in% c("character", "NULL"))
-    if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature"
+    stop("feature_names: Has to be a character vector")
    xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequency = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequency = Frequency / sum(Frequency))][order(Gain, decreasing = T)]
  }
-  linearDump <- function(feature_names, text){
+  model_text_dump <- xgb.dump(model = model, with_stats = TRUE)
    weights <- which(text == "weight:") %>% {a =. + 1; text[a:length(text)]} %>% as.numeric
    if(is.null(feature_names)) feature_names <- seq(to = length(weights))
    data.table(Feature = feature_names, Weight = weights)
  }
-  model.text.dump <- xgb.dump(model = model, with_stats = T)
+  # linear model
-  
+  if(model_text_dump[2] == "bias:"){
-  if(model.text.dump[2] == "bias:"){
+    weights <- which(model_text_dump == "weight:") %>%
-    result <- model.text.dump %>% linearDump(feature_names, .)
+               {model_text_dump[(. + 1):length(model_text_dump)]} %>%
-    if(!is.null(data) | !is.null(label)) warning("data/label: these parameters should only be provided with decision tree based models.")
+               as.numeric
    if(is.null(feature_names)) 
      feature_names <- seq(to = length(weights))
    result <- data.table(Feature = feature_names, Weight = weights)[order(-abs(Weight))]
  } else { 
-    result <- treeDump(feature_names, text = model.text.dump, keepDetail = !is.null(data))
+  # tree model
-
+    result <- xgb.model.dt.tree(feature_names = feature_names, text = model_text_dump)[
-    # Co-occurence computation
+      Feature != "Leaf", .(Gain = sum(Quality), 
-    if(!is.null(data) & !is.null(label) & nrow(result) > 0) {
+                           Cover = sum(Cover), 
-      # Take care of missing column
+                           Frequency = .N), by = Feature][
-      a <- data[, result[MissingNo == T,Feature], drop=FALSE] != 0
+      ,`:=`(Gain = Gain / sum(Gain), 
-      # Bind the two Matrix and reorder columns
+            Cover = Cover / sum(Cover),
-      c <- data[, result[MissingNo == F,Feature], drop=FALSE] %>% cBind(a,.) %>% .[,result[,Feature]]
+            Frequency = Frequency / sum(Frequency))][
-      rm(a)
+      order(Gain, decreasing = TRUE)]
      # Apply split
      d <- data[, result[,Feature], drop=FALSE] < as.numeric(result[,Split])
      apply(c & d, 2, . %>% target %>% sum) -> vec
      result <- result[, "RealCover" := as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][, MissingNo := NULL]
    }
  }
  result
 }
@ -104,4 +94,4 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe
 # Avoid error messages during CRAN check.
 # The reason is that these variables are never declared
 # They are mainly column names inferred by Data.table...
-globalVariables(c(".", ".N", "Gain", "Frequency", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover"))
+globalVariables(c(".", ".N", "Gain", "Cover", "Frequency", "Feature"))
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@ -1,8 +1,23 @@
 #' Load xgboost model from binary file
 #' 
-#' Load xgboost model from the binary model file
+#' Load xgboost model from the binary model file. 
 #' 
-#' @param modelfile the name of the binary file.
+#' @param modelfile the name of the binary input file.
 #' 
 #' @details 
 #' The input file is expected to contain a model saved in an xgboost-internal binary format
 #' using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some 
 #' appropriate methods from other xgboost interfaces. E.g., a model trained in Python and 
 #' saved from there in xgboost format, could be loaded from R.
 #' 
 #' Note: a model saved as an R-object, has to be loaded using corresponding R-methods,
 #' not \code{xgb.load}.
 #' 
 #' @return 
 #' An object of \code{xgb.Booster} class.
 #' 
 #' @seealso 
 #' \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}. 
 #' 
 #' @examples
 #' data(agaricus.train, package='xgboost')
@ -19,13 +34,13 @@ xgb.load <- function(modelfile) {
  if (is.null(modelfile))
    stop("xgb.load: modelfile cannot be NULL")
-  handle <- xgb.Booster(modelfile = modelfile)
+  handle <- xgb.Booster.handle(modelfile = modelfile)
  # re-use modelfile if it is raw so we do not need to serialize
  if (typeof(modelfile) == "raw") {
    bst <- xgb.handleToBooster(handle, modelfile)
  } else {
    bst <- xgb.handleToBooster(handle, NULL)
  }
-  bst <- xgb.Booster.check(bst, saveraw = TRUE)
+  bst <- xgb.Booster.complete(bst, saveraw = TRUE)
  return(bst)
 }
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@ -3,14 +3,16 @@
 #' Parse a boosted tree model text dump into a \code{data.table} structure.
 #' 
 #' @param feature_names character vector of feature names. If the model already
-#'          contains feature names, this argument should be \code{NULL} (default value)
+#'          contains feature names, those would be used when \code{feature_names=NULL} (default value).
 #'          Non-null \code{feature_names} could be provided to override those in the model.
 #' @param model object of class \code{xgb.Booster}
 #' @param text \code{character} vector previously generated by the \code{xgb.dump} 
 #'          function  (where parameter \code{with_stats = TRUE} should have been set).
 #'          \code{text} takes precedence over \code{model}.
 #' @param trees an integer vector of tree indices that should be parsed.
 #'          If set to \code{NULL}, all trees of the model are parsed.
 #'          It could be useful, e.g., in multiclass classification to get only
-#'          the trees of one certain class. IMPORTANT: the tree index in xgboost model
+#'          the trees of one certain class. IMPORTANT: the tree index in xgboost models
 #'          is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).
 #' @param ... currently not used.
 #'
@ -43,7 +45,9 @@
 #'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 #' 
 #' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
-#' 
+#' # This bst has feature_names stored in it, so those would be used when 
 #' # the feature_names parameter is not provided:
 #' (dt <- xgb.model.dt.tree(model = bst))
 #' 
 #' # How to match feature names of splits that are following a current 'Yes' branch:
 #' 
@ -53,11 +57,6 @@
 xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
                              trees = NULL, ...){
  check.deprecation(...)
  if (!class(feature_names) %in% c("character", "NULL")) {
    stop("feature_names: Has to be a vector of character\n",
         "  or NULL if the model dump already contains feature names.\n",
         "  Look at this function documentation to see where to get feature names.")
  }
  if (class(model) != "xgb.Booster" & class(text) != "character") {
    stop("Either 'model' has to be an object of class xgb.Booster\n",
@ -65,12 +64,19 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
         "  (or NULL if the model was provided).")
  }
  if (is.null(feature_names) && !is.null(model) && !is.null(model$feature_names))
    feature_names <- model$feature_names
  if (!class(feature_names) %in% c("character", "NULL")) {
    stop("feature_names: Has to be a character vector")
  }
  if (!class(trees) %in% c("integer", "numeric", "NULL")) {
    stop("trees: Has to be a vector of integers.")
  }
  if (is.null(text)){
-    text <- xgb.dump(model = model, with_stats = T)
+    text <- xgb.dump(model = model, with_stats = TRUE)
  }
  if (length(text) < 2 ||
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@ -126,4 +126,4 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
 # Avoid error messages during CRAN check.
 # The reason is that these variables are never declared
 # They are mainly column names inferred by Data.table...
-globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", ".", "shape", "filledcolor", "label"))
+globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", "Missing", ".", "shape", "filledcolor", "label"))
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@ -1,9 +1,22 @@
 #' Save xgboost model to binary file
 #' 
-#' Save xgboost model from xgboost or xgb.train
+#' Save xgboost model to a file in binary format.
 #' 
-#' @param model the model object.
+#' @param model model object of \code{xgb.Booster} class.
-#' @param fname the name of the file to write.
+#' @param fname name of the file to write.
 #' 
 #' @details 
 #' This methods allows to save a model in an xgboost-internal binary format which is universal 
 #' among the various xgboost interfaces. In R, the saved model file could be read-in later
 #' using either the \code{\link{xgb.load}} function or the \code{xgb_model} parameter 
 #' of \code{\link{xgb.train}}.
 #' 
 #' Note: a model can also be saved as an R-object (e.g., by using \code{\link[base]{readRDS}} 
 #' or \code{\link[base]{save}}). However, it would then only be compatible with R, and 
 #' corresponding R-methods would need to be used to load it.
 #' 
 #' @seealso 
 #' \code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}. 
 #' 
 #' @examples
 #' data(agaricus.train, package='xgboost')
@ -22,6 +35,7 @@ xgb.save <- function(model, fname) {
  if (class(model) != "xgb.Booster")
    stop("the input must be xgb.Booster. Use xgb.DMatrix.save to save xgb.DMatrix object.")
  model <- xgb.Booster.complete(model, saveraw = FALSE)
  .Call("XGBoosterSaveModel_R", model$handle, fname, PACKAGE = "xgboost")
  return(TRUE)
 }
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -1,6 +1,7 @@
 #' eXtreme Gradient Boosting Training
 #' 
-#' \code{xgb.train} is an advanced interface for training an xgboost model. The \code{xgboost} function provides a simpler interface.
+#' \code{xgb.train} is an advanced interface for training an xgboost model.
 #' The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
 #'
 #' @param params the list of parameters. 
 #'        The complete list of parameters is available at \url{http://xgboost.readthedocs.io/en/latest/parameter.html}.
@ -9,8 +10,7 @@
 #' 1. General Parameters
 #' 
 #' \itemize{
-#'   \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}
+#'   \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}.
 #'   \item \code{silent} 0 means printing running messages, 1 means silent mode. Default: 0
 #' }
 #'  
 #' 2. Booster Parameters
@ -54,24 +54,26 @@
 #'   \item \code{eval_metric} evaluation metrics for validation data. Users can pass a self-defined function to it. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
 #' }
 #' 
-#' @param data input dataset. \code{xgb.train} takes only an \code{xgb.DMatrix} as the input.
+#' @param data training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
-#'        \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or local data file.
+#'        \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.
-#' @param nrounds the max number of iterations
+#' @param nrounds max number of boosting iterations.
-#' @param watchlist what information should be printed when \code{verbose=1} or
+#' @param watchlist named list of xgb.DMatrix datasets to use for evaluating model performance.
-#'        \code{verbose=2}. Watchlist is used to specify validation set monitoring
+#'        Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
-#'        during training. For example user can specify
+#'        of these datasets during each boosting iteration, and stored in the end as a field named 
-#'        watchlist=list(validation1=mat1, validation2=mat2) to watch
+#'        \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or 
-#'        the performance of each round's model on mat1 and mat2
+#'        \code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
-#'
+#'        printed out during the training. 
 #'        E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
 #'        the performance of each round's model on mat1 and mat2.
 #' @param obj customized objective function. Returns gradient and second order 
 #'        gradient with given prediction and dtrain.
 #' @param feval custimized evaluation function. Returns 
 #'        \code{list(metric='metric-name', value='metric-value')} with given 
 #'        prediction and dtrain.
-#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print 
+#' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance.
-#'        information of performance. If 2, xgboost will print some additional information.
+#'        If 2, some additional information will be printed out.
-#'        Setting \code{verbose > 0} automatically engages the \code{\link{cb.evaluation.log}} and 
+#'        Note that setting \code{verbose > 0} automatically engages the 
-#'        \code{\link{cb.print.evaluation}} callback functions.
+#'        \code{cb.print.evaluation(period=1)} callback function.
 #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
 #'        Default is 1 which means all messages are printed. This parameter is passed to the 
 #'        \code{\link{cb.print.evaluation}} callback.
@ -106,7 +108,7 @@
 #' 
 #' The \code{xgb.train} interface supports advanced features such as \code{watchlist}, 
 #' customized objective and evaluation metric functions, therefore it is more flexible 
-#' than the \code{\link{xgboost}} interface.
+#' than the \code{xgboost} interface.
 #'
 #' Parallelization is automatically enabled if \code{OpenMP} is present. 
 #' Number of threads can also be manually specified via \code{nthread} parameter.
@ -132,7 +134,7 @@
 #' \itemize{
 #'   \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
 #'         and the \code{print_every_n} parameter is passed to it.
-#'   \item \code{cb.evaluation.log} is on when \code{verbose > 0} and \code{watchlist} is present.
+#'   \item \code{cb.evaluation.log} is on when \code{watchlist} is present.
 #'   \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
 #'   \item \code{cb.save.model}: when \code{save_period > 0} is set.
 #' }
@ -158,6 +160,8 @@
 #'         (only available with early stopping).
 #'   \item \code{best_score} the best evaluation metric value during early stopping.
 #'         (only available with early stopping).
 #'   \item \code{feature_names} names of the training dataset features
 #'         (only when comun names were defined in training data).
 #' }
 #' 
 #' @seealso
@ -171,7 +175,7 @@
 #' 
 #' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 #' dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-#' watchlist <- list(eval = dtest, train = dtrain)
+#' watchlist <- list(train = dtrain, eval = dtest)
 #' 
 #' ## A simple xgb.train example:
 #' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, 
@ -210,17 +214,15 @@
 #' 
 #' 
 #' ## An xgb.train example of using variable learning rates at each iteration:
-#' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2)
+#' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
 #'               objective = "binary:logistic", eval_metric = "auc")
 #' my_etas <- list(eta = c(0.5, 0.1))
 #' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 #'                  callbacks = list(cb.reset.parameters(my_etas)))
 #' 
-#' 
+#' ## Early stopping:
-#' ## Explicit use of the cb.evaluation.log callback allows to run 
+#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
-#' ## xgb.train silently but still store the evaluation results:
+#'                  early_stopping_rounds = 3)
 #' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 #'                  verbose = 0, callbacks = list(cb.evaluation.log()))
 #' print(bst$evaluation_log)
 #' 
 #' ## An 'xgboost' interface example:
 #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, 
@ -259,13 +261,13 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  # evaluation printing callback
  params <- c(params, list(silent = ifelse(verbose > 1, 0, 1)))
  print_every_n <- max( as.integer(print_every_n), 1L)
-  if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) {
+  if (!has.callbacks(callbacks, 'cb.print.evaluation') &&
      verbose) {
    callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n))
  }
-  # evaluation log callback:  it is automatically enabled only when verbose > 0
+  # evaluation log callback:  it is automatically enabled when watchlist is provided
  evaluation_log <- list()
-  if (verbose > 0 &&
+  if (!has.callbacks(callbacks, 'cb.evaluation.log') &&
      !has.callbacks(callbacks, 'cb.evaluation.log') &&
      length(watchlist) > 0) {
    callbacks <- add.cb(callbacks, cb.evaluation.log())
  }
@ -288,7 +290,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  is_update <- NVL(params[['process_type']], '.') == 'update'
  # Construct a booster (either a new one or load from xgb_model)
-  handle <- xgb.Booster(params, append(watchlist, dtrain), xgb_model)
+  handle <- xgb.Booster.handle(params, append(watchlist, dtrain), xgb_model)
  bst <- xgb.handleToBooster(handle)
  # extract parameters that can affect the relationship b/w #trees and #iterations
@ -332,7 +334,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  }
  for (f in cb$finalize) f(finalize=TRUE)
-  bst <- xgb.Booster.check(bst, saveraw = TRUE)
+  bst <- xgb.Booster.complete(bst, saveraw = TRUE)
  # store the total number of boosting iterations
  bst$niter = end_iteration
@ -354,6 +356,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  bst$call <- match.call()
  bst$params <- params
  bst$callbacks <- callbacks
  if (!is.null(colnames(dtrain)))
    bst$feature_names <- colnames(dtrain)
  return(bst)
 }
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@ -1,4 +1,4 @@
-# Simple interface for training an xgboost model.
+# Simple interface for training an xgboost model that wraps \code{xgb.train}
 # Its documentation is combined with xgb.train.
 #
 #' @rdname xgb.train
@ -12,9 +12,7 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
  dtrain <- xgb.get.DMatrix(data, label, missing, weight)
-  watchlist <- list()
+  watchlist <- list(train = dtrain)
  if (verbose > 0)
    watchlist$train = dtrain
  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n = print_every_n,
                   early_stopping_rounds = early_stopping_rounds, maximize = maximize,
--- a/R-package/man/xgb.Booster.complete.Rd
+++ b/R-package/man/xgb.Booster.complete.Rd
@ -0,0 +1,49 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/xgb.Booster.R
 \name{xgb.Booster.complete}
 \alias{xgb.Booster.complete}
 \title{Restore missing parts of an incomplete xgb.Booster object.}
 \usage{
 xgb.Booster.complete(object, saveraw = TRUE)
 }
 \arguments{
 \item{object}{object of class \code{xgb.Booster}}
 \item{saveraw}{a flag indicating whether to append \code{raw} Booster memory dump data 
 when it doesn't already exist.}
 }
 \value{
 An object of \code{xgb.Booster} class.
 }
 \description{
 It attempts to complete an \code{xgb.Booster} object by restoring either its missing 
 raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
 or its missing internal handle (when its \code{xgb.Booster.handle} is not valid 
 but it has a raw Booster memory dump).
 }
 \details{
 While this method is primarily for internal use, it might be useful in some practical situations.
 E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
 its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods 
 should still work for such a model object since those methods would be using 
 \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the  
 \code{xgb.Booster.complete} function once after loading a model as an R-object. That which would
 prevent further reconstruction (potentially, multiple times) of an internal booster model.
 }
 \examples{
 data(agaricus.train, package='xgboost')
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 saveRDS(bst, "xgb.model.rds")
 bst1 <- readRDS("xgb.model.rds")
 # the handle is invalid:
 print(bst1$handle)
 bst1 <- xgb.Booster.complete(bst1)
 # now the handle points to a valid internal booster model:
 print(bst1$handle)
 }
--- a/R-package/man/xgb.dump.Rd
+++ b/R-package/man/xgb.dump.Rd
@ -2,7 +2,7 @@
 % Please edit documentation in R/xgb.dump.R
 \name{xgb.dump}
 \alias{xgb.dump}
-\title{Save xgboost model to text file}
+\title{Dump an xgboost model in text format.}
 \usage{
 xgb.dump(model = NULL, fname = NULL, fmap = "", with_stats = FALSE,
  dump_format = c("text", "json"), ...)
@ -10,17 +10,18 @@ xgb.dump(model = NULL, fname = NULL, fmap = "", with_stats = FALSE,
 \arguments{
 \item{model}{the model object.}
-\item{fname}{the name of the text file where to save the model text dump. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.}
+\item{fname}{the name of the text file where to save the model text dump. 
 If not provided or set to \code{NULL}, the model is returned as a \code{character} vector.}
-\item{fmap}{feature map file representing the type of feature. 
+\item{fmap}{feature map file representing feature types.
 Detailed description could be found at 
 \url{https://github.com/dmlc/xgboost/wiki/Binary-Classification#dump-model}.
 See demo/ for walkthrough example in R, and
 \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt} 
 for example Format.}
-\item{with_stats}{whether dump statistics of splits 
+\item{with_stats}{whether to dump some additional statistics about the splits.
-When this option is on, the model dump comes with two additional statistics:
+When this option is on, the model dump contains two additional values:
 gain is the approximate loss function gain we get in each split;
 cover is the sum of second order gradient in each node.}
@ -29,10 +30,11 @@ cover is the sum of second order gradient in each node.}
 \item{...}{currently not used}
 }
 \value{
-if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
+If fname is not provided or set to \code{NULL} the function will return the model
 as a \code{character} vector. Otherwise it will return \code{TRUE}.
 }
 \description{
-Save a xgboost model to text file. Could be parsed later.
+Dump an xgboost model in text format.
 }
 \examples{
 data(agaricus.train, package='xgboost')
--- a/R-package/man/xgb.importance.Rd
+++ b/R-package/man/xgb.importance.Rd
@ -2,64 +2,65 @@
 % Please edit documentation in R/xgb.importance.R
 \name{xgb.importance}
 \alias{xgb.importance}
-\title{Show importance of features in a model}
+\title{Importance of features in a model.}
 \usage{
 xgb.importance(feature_names = NULL, model = NULL, data = NULL,
-  label = NULL, target = function(x) ((x + label) == 2))
+  label = NULL, target = NULL)
 }
 \arguments{
-\item{feature_names}{names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
+\item{feature_names}{character vector of feature names. If the model already
 contains feature names, those would be used when \code{feature_names=NULL} (default value).
 Non-null \code{feature_names} could be provided to override those in the model.}
-\item{model}{generated by the \code{xgb.train} function.}
+\item{model}{object of class \code{xgb.Booster}.}
-\item{data}{the dataset used for the training step. Will be used with \code{label} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.}
+\item{data}{deprecated.}
-\item{label}{the label vector used for the training step. Will be used with \code{data} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.}
+\item{label}{deprecated.}
-\item{target}{a function which returns \code{TRUE} or \code{1} when an observation should be count as a co-occurence and \code{FALSE} or \code{0} otherwise. Default function is provided for computing co-occurences in a binary classification. The \code{target} function should have only one parameter. This parameter will be used to provide each important feature vector after having applied the split condition, therefore these vector will be only made of 0 and 1 only, whatever was the information before. More information in \code{Detail} part. This parameter is optional.}
+\item{target}{deprecated.}
 }
 \value{
-A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
+For a tree model, a \code{data.table} with the following columns:
 \itemize{
  \item \code{Features} names of the features used in the model;
  \item \code{Gain} represents fractional contribution of each feature to the model based on
       the total gain of this feature's splits. Higher percentage means a more important 
       predictive feature.
  \item \code{Cover} metric of the number of observation related to this feature;
  \item \code{Frequency} percentage representing the relative number of times
       a feature have been used in trees.
 }
 A linear model's importance \code{data.table} has only two columns:
 \itemize{
  \item \code{Features} names of the features used in the model;
  \item \code{Weight} the linear coefficient of this feature.
 }
 If you don't provide or \code{model} doesn't have \code{feature_names}, 
 index of the features will be used instead. Because the index is extracted from the model dump
 (based on C++ code), it starts at 0 (as in C/C++ or Python) instead of 1 (usual in R).
 }
 \description{
-Create a \code{data.table} of the most important features of a model.
+Creates a \code{data.table} of feature importances in a model.
 }
 \details{
-This function is for both linear and tree models.
+This function works for both linear and tree models.
-\code{data.table} is returned by the function. 
+For linear models, the importance is the absolute magnitude of linear coefficients. 
-The columns are:
+For that reason, in order to obtain a meaningful ranking by importance for a linear model, 
-\itemize{
+the features need to be on the same scale (which you also would want to do when using either 
-  \item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump;
+L1 or L2 regularization).
  \item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means important feature to predict the \code{label} used for the training (only available for tree models);
  \item \code{Cover} metric of the number of observation related to this feature (only available for tree models);
  \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees.
 }
 If you don't provide \code{feature_names}, index of the features will be used instead.
 Because the index is extracted from the model dump (made on the C++ side), it starts at 0 (usual in C++) instead of 1 (usual in R).
 Co-occurence count
 ------------------
 The gain gives you indication about the information of how a feature is important in making a branch of a decision tree more pure. However, with this information only, you can't know if this feature has to be present or not to get a specific classification. In the example code, you may wonder if odor=none should be \code{TRUE} to not eat a mushroom.
 Co-occurence computation is here to help in understanding this relation between a predictor and a specific class. It will count how many observations are returned as \code{TRUE} by the \code{target} function (see parameters). When you execute the example below, there are 92 times only over the 3140 observations of the train dataset where a mushroom have no odor and can be eaten safely.
 If you need to remember only one thing: unless you want to leave us early, don't eat a mushroom which has no odor :-)
 }
 \examples{
 data(agaricus.train, package='xgboost')
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
-xgb.importance(colnames(agaricus.train$data), model = bst)
+xgb.importance(model = bst)
 # Same thing with co-occurence computation this time
 xgb.importance(colnames(agaricus.train$data), model = bst, 
               data = agaricus.train$data, label = agaricus.train$label)
 }
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@ -7,10 +7,22 @@
 xgb.load(modelfile)
 }
 \arguments{
-\item{modelfile}{the name of the binary file.}
+\item{modelfile}{the name of the binary input file.}
 }
 \value{
 An object of \code{xgb.Booster} class.
 }
 \description{
-Load xgboost model from the binary model file
+Load xgboost model from the binary model file.
 }
 \details{
 The input file is expected to contain a model saved in an xgboost-internal binary format
 using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some 
 appropriate methods from other xgboost interfaces. E.g., a model trained in Python and 
 saved from there in xgboost format, could be loaded from R.
 Note: a model saved as an R-object, has to be loaded using corresponding R-methods,
 not \code{xgb.load}.
 }
 \examples{
 data(agaricus.train, package='xgboost')
@ -23,4 +35,7 @@ xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 pred <- predict(bst, test$data)
 }
 \seealso{
 \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
 }
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@ -9,17 +9,19 @@ xgb.model.dt.tree(feature_names = NULL, model = NULL, text = NULL,
 }
 \arguments{
 \item{feature_names}{character vector of feature names. If the model already
-contains feature names, this argument should be \code{NULL} (default value)}
+contains feature names, those would be used when \code{feature_names=NULL} (default value).
 Non-null \code{feature_names} could be provided to override those in the model.}
 \item{model}{object of class \code{xgb.Booster}}
 \item{text}{\code{character} vector previously generated by the \code{xgb.dump} 
-function  (where parameter \code{with_stats = TRUE} should have been set).}
+function  (where parameter \code{with_stats = TRUE} should have been set).
 \code{text} takes precedence over \code{model}.}
 \item{trees}{an integer vector of tree indices that should be parsed.
 If set to \code{NULL}, all trees of the model are parsed.
 It could be useful, e.g., in multiclass classification to get only
-the trees of one certain class. IMPORTANT: the tree index in xgboost model
+the trees of one certain class. IMPORTANT: the tree index in xgboost models
 is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).}
 \item{...}{currently not used.}
@ -56,7 +58,9 @@ bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_dep
               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
-
+# This bst has feature_names stored in it, so those would be used when 
 # the feature_names parameter is not provided:
 (dt <- xgb.model.dt.tree(model = bst))
 # How to match feature names of splits that are following a current 'Yes' branch:
--- a/R-package/man/xgb.save.Rd
+++ b/R-package/man/xgb.save.Rd
@ -7,12 +7,22 @@
 xgb.save(model, fname)
 }
 \arguments{
-\item{model}{the model object.}
+\item{model}{model object of \code{xgb.Booster} class.}
-\item{fname}{the name of the file to write.}
+\item{fname}{name of the file to write.}
 }
 \description{
-Save xgboost model from xgboost or xgb.train
+Save xgboost model to a file in binary format.
 }
 \details{
 This methods allows to save a model in an xgboost-internal binary format which is universal 
 among the various xgboost interfaces. In R, the saved model file could be read-in later
 using either the \code{\link{xgb.load}} function or the \code{xgb_model} parameter 
 of \code{\link{xgb.train}}.
 Note: a model can also be saved as an R-object (e.g., by using \code{\link[base]{readRDS}} 
 or \code{\link[base]{save}}). However, it would then only be compatible with R, and 
 corresponding R-methods would need to be used to load it.
 }
 \examples{
 data(agaricus.train, package='xgboost')
@ -25,4 +35,7 @@ xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 pred <- predict(bst, test$data)
 }
 \seealso{
 \code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
 }
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@ -23,8 +23,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
 1. General Parameters
 \itemize{
-  \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}
+  \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}.
  \item \code{silent} 0 means printing running messages, 1 means silent mode. Default: 0
 }
 2. Booster Parameters
@ -68,16 +67,19 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
  \item \code{eval_metric} evaluation metrics for validation data. Users can pass a self-defined function to it. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
 }}
-\item{data}{input dataset. \code{xgb.train} takes only an \code{xgb.DMatrix} as the input.
+\item{data}{training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
-\code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or local data file.}
+\code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.}
-\item{nrounds}{the max number of iterations}
+\item{nrounds}{max number of boosting iterations.}
-\item{watchlist}{what information should be printed when \code{verbose=1} or
+\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
-\code{verbose=2}. Watchlist is used to specify validation set monitoring
+Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
-during training. For example user can specify
+of these datasets during each boosting iteration, and stored in the end as a field named 
-watchlist=list(validation1=mat1, validation2=mat2) to watch
+\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or 
-the performance of each round's model on mat1 and mat2}
+\code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
 printed out during the training. 
 E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
 the performance of each round's model on mat1 and mat2.}
 \item{obj}{customized objective function. Returns gradient and second order 
 gradient with given prediction and dtrain.}
@ -86,10 +88,10 @@ gradient with given prediction and dtrain.}
 \code{list(metric='metric-name', value='metric-value')} with given 
 prediction and dtrain.}
-\item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print 
+\item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.
-information of performance. If 2, xgboost will print some additional information.
+If 2, some additional information will be printed out.
-Setting \code{verbose > 0} automatically engages the \code{\link{cb.evaluation.log}} and 
+Note that setting \code{verbose > 0} automatically engages the 
-\code{\link{cb.print.evaluation}} callback functions.}
+\code{cb.print.evaluation(period=1)} callback function.}
 \item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
 Default is 1 which means all messages are printed. This parameter is passed to the 
@ -151,17 +153,20 @@ An object of class \code{xgb.Booster} with the following elements:
        (only available with early stopping).
  \item \code{best_score} the best evaluation metric value during early stopping.
        (only available with early stopping).
  \item \code{feature_names} names of the training dataset features
        (only when comun names were defined in training data).
 }
 }
 \description{
-\code{xgb.train} is an advanced interface for training an xgboost model. The \code{xgboost} function provides a simpler interface.
+\code{xgb.train} is an advanced interface for training an xgboost model.
 The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
 }
 \details{
 These are the training functions for \code{xgboost}. 
 The \code{xgb.train} interface supports advanced features such as \code{watchlist}, 
 customized objective and evaluation metric functions, therefore it is more flexible 
-than the \code{\link{xgboost}} interface.
+than the \code{xgboost} interface.
 Parallelization is automatically enabled if \code{OpenMP} is present. 
 Number of threads can also be manually specified via \code{nthread} parameter.
@ -187,7 +192,7 @@ The following callbacks are automatically created when certain parameters are se
 \itemize{
  \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
        and the \code{print_every_n} parameter is passed to it.
-  \item \code{cb.evaluation.log} is on when \code{verbose > 0} and \code{watchlist} is present.
+  \item \code{cb.evaluation.log} is on when \code{watchlist} is present.
  \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
  \item \code{cb.save.model}: when \code{save_period > 0} is set.
 }
@ -198,7 +203,7 @@ data(agaricus.test, package='xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-watchlist <- list(eval = dtest, train = dtrain)
+watchlist <- list(train = dtrain, eval = dtest)
 ## A simple xgb.train example:
 param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, 
@ -237,17 +242,15 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 ## An xgb.train example of using variable learning rates at each iteration:
-param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2)
+param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
              objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
                 callbacks = list(cb.reset.parameters(my_etas)))
-
+## Early stopping:
-## Explicit use of the cb.evaluation.log callback allows to run 
+bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
-## xgb.train silently but still store the evaluation results:
+                 early_stopping_rounds = 3)
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
                 verbose = 0, callbacks = list(cb.evaluation.log()))
 print(bst$evaluation_log)
 ## An 'xgboost' interface example:
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, 
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -8,7 +8,9 @@ train <- agaricus.train
 test <- agaricus.test
 set.seed(1994)
-windows_flag = grepl('Windows', Sys.info()[['sysname']])
+# disable some tests for Win32
 windows_flag = .Platform$OS.type == "windows" &&
               .Machine$sizeof.pointer != 8
 test_that("train and predict binary classification", {
  nrounds = 2
@ -109,7 +111,7 @@ test_that("train and predict RF with softprob", {
  set.seed(11)
  bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
                 max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
-                 objective = "multi:softprob", num_class=3,
+                 objective = "multi:softprob", num_class=3, verbose = 0,
                 num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
  expect_equal(bst$niter, 15)
  expect_equal(xgb.ntree(bst), 15*3*4)
@ -144,25 +146,25 @@ test_that("training continuation works", {
  # for the reference, use 4 iterations at once:
  set.seed(11)
-  bst <- xgb.train(param, dtrain, nrounds = 4, watchlist)
+  bst <- xgb.train(param, dtrain, nrounds = 4, watchlist, verbose = 0)
  # first two iterations:
  set.seed(11)
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist)
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
  # continue for two more:
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = bst1)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
  if (!windows_flag)
    expect_equal(bst$raw, bst2$raw)
  expect_false(is.null(bst2$evaluation_log))
  expect_equal(dim(bst2$evaluation_log), c(4, 2))
  expect_equal(bst2$evaluation_log, bst$evaluation_log)
  # test continuing from raw model data
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = bst1$raw)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
  if (!windows_flag)
    expect_equal(bst$raw, bst2$raw)
  expect_equal(dim(bst2$evaluation_log), c(2, 2))
  # test continuing from a model in file
  xgb.save(bst1, "xgboost.model")
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = "xgboost.model")
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model")
  if (!windows_flag)
    expect_equal(bst$raw, bst2$raw)
  expect_equal(dim(bst2$evaluation_log), c(2, 2))
@ -171,9 +173,11 @@ test_that("training continuation works", {
 test_that("xgb.cv works", {
  set.seed(11)
  expect_output(
    cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
                 eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
                 verbose=TRUE)
  , "train-error:")
  expect_is(cv, 'xgb.cv.synchronous')
  expect_false(is.null(cv$evaluation_log))
  expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@ -107,18 +107,27 @@ test_that("cb.evaluation.log works as expected", {
 param <- list(objective = "binary:logistic", max_depth = 4, nthread = 2)
 test_that("can store evaluation_log without printing", {
  expect_silent(
    bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, verbose = 0)
  )
  expect_false(is.null(bst$evaluation_log))
  expect_false(is.null(bst$evaluation_log$train_error))
  expect_lt(bst$evaluation_log[, min(train_error)], 0.2)
 })
 test_that("cb.reset.parameters works as expected", {
  # fixed eta
  set.seed(111)
-  bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9)
+  bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9, verbose = 0)
  expect_false(is.null(bst0$evaluation_log))
  expect_false(is.null(bst0$evaluation_log$train_error))
  # same eta but re-set as a vector parameter in the callback
  set.seed(111)
  my_par <- list(eta = c(0.9, 0.9))
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                    callbacks = list(cb.reset.parameters(my_par)))
  expect_false(is.null(bst1$evaluation_log$train_error))
  expect_equal(bst0$evaluation_log$train_error, 
@ -127,7 +136,7 @@ test_that("cb.reset.parameters works as expected", {
  # same eta but re-set via a function in the callback
  set.seed(111)
  my_par <- list(eta = function(itr, itr_end) 0.9)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                    callbacks = list(cb.reset.parameters(my_par)))
  expect_false(is.null(bst2$evaluation_log$train_error))
  expect_equal(bst0$evaluation_log$train_error, 
@ -136,7 +145,7 @@ test_that("cb.reset.parameters works as expected", {
  # different eta re-set as a vector parameter in the callback
  set.seed(111)
  my_par <- list(eta = c(0.6, 0.5))
-  bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+  bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                    callbacks = list(cb.reset.parameters(my_par)))
  expect_false(is.null(bst3$evaluation_log$train_error))
  expect_false(all(bst0$evaluation_log$train_error == bst3$evaluation_log$train_error))
@ -144,18 +153,18 @@ test_that("cb.reset.parameters works as expected", {
  # resetting multiple parameters at the same time runs with no error
  my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8))
  expect_error(
-    bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+    bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                      callbacks = list(cb.reset.parameters(my_par)))
  , NA) # NA = no error
  # CV works as well
  expect_error(
-    bst4 <- xgb.cv(param, dtrain, nfold = 2, nrounds = 2,
+    bst4 <- xgb.cv(param, dtrain, nfold = 2, nrounds = 2, verbose = 0,
                   callbacks = list(cb.reset.parameters(my_par)))
  , NA) # NA = no error
  # expect no learning with 0 learning rate
  my_par <- list(eta = c(0., 0.))
-  bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, 
+  bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                    callbacks = list(cb.reset.parameters(my_par)))
  expect_false(is.null(bstX$evaluation_log$train_error))
  er <- unique(bstX$evaluation_log$train_error)
@ -167,7 +176,7 @@ test_that("cb.save.model works as expected", {
  files <- c('xgboost_01.model', 'xgboost_02.model', 'xgboost.model')
  for (f in files) if (file.exists(f)) file.remove(f)
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1,
+  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
                   save_period = 1, save_name = "xgboost_%02d.model")
  expect_true(file.exists('xgboost_01.model'))
  expect_true(file.exists('xgboost_02.model'))
@ -178,7 +187,8 @@ test_that("cb.save.model works as expected", {
  expect_equal(bst$raw, b2$raw)
  # save_period = 0 saves the last iteration's model
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, save_period = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
                   save_period = 0)
  expect_true(file.exists('xgboost.model'))
  b2 <- xgb.load('xgboost.model')
  expect_equal(bst$raw, b2$raw)
@ -186,16 +196,6 @@ test_that("cb.save.model works as expected", {
  for (f in files) if (file.exists(f)) file.remove(f)
 })
 test_that("can store evaluation_log without printing", {
  expect_silent(
    bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1,
                     verbose = 0, callbacks = list(cb.evaluation.log()))
  )
  expect_false(is.null(bst$evaluation_log))
  expect_false(is.null(bst$evaluation_log$train_error))
  expect_lt(bst$evaluation_log[, min(train_error)], 0.2)
 })
 test_that("early stopping xgb.train works", {
  set.seed(11)
  expect_output(
@ -211,6 +211,13 @@ test_that("early stopping xgb.train works", {
  err_pred <- err(ltest, pred)
  err_log <- bst$evaluation_log[bst$best_iteration, test_error]
  expect_equal(err_log, err_pred, tolerance = 5e-6)
  set.seed(11)
  expect_silent(
    bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
                      early_stopping_rounds = 3, maximize = FALSE, verbose = 0)
  )
  expect_equal(bst$evaluation_log, bst0$evaluation_log)
 })
 test_that("early stopping using a specific metric works", {
@ -248,7 +255,7 @@ test_that("early stopping xgb.cv works", {
 test_that("prediction in xgb.cv works", {
  set.seed(11)
  nrounds = 4
-  cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE)
+  cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
  expect_false(is.null(cv$evaluation_log))
  expect_false(is.null(cv$pred))
  expect_length(cv$pred, nrow(train$data))
@ -258,7 +265,7 @@ test_that("prediction in xgb.cv works", {
  # save CV models
  set.seed(11)
-  cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE,
+  cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0,
                callbacks = list(cb.cv.predict(save_models = TRUE)))
  expect_equal(cv$evaluation_log, cvx$evaluation_log)
  expect_length(cvx$models, 5)
@ -268,7 +275,7 @@ test_that("prediction in xgb.cv works", {
 test_that("prediction in xgb.cv works for gblinear too", {
  set.seed(11)
  p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = 2)
-  cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE)
+  cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
  expect_false(is.null(cv$evaluation_log))
  expect_false(is.null(cv$pred))
  expect_length(cv$pred, nrow(train$data))
@ -300,7 +307,7 @@ test_that("prediction in xgb.cv for softprob works", {
  expect_warning(
    cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
                 eta = 0.5, nrounds = 5, max_depth = 3, nthread = 2,
-                 subsample = 0.8, gamma = 2,
+                 subsample = 0.8, gamma = 2, verbose = 0,
                 prediction = TRUE, objective = "multi:softprob", num_class = 3)
  , NA)
  expect_false(is.null(cv$pred))
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@ -21,6 +21,15 @@ test_that("xgb.DMatrix: basic construction, saving, loading", {
  dtest3 <- xgb.DMatrix(tmp_file)
  unlink(tmp_file)
  expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
  # from a libsvm text file
  tmp <- c("0 1:1 2:1","1 3:1","0 1:1")
  tmp_file <- 'tmp.libsvm'
  writeLines(tmp, tmp_file)
  dtest4 <- xgb.DMatrix(tmp_file)
  expect_equal(dim(dtest4), c(3, 4))
  expect_equal(getinfo(dtest4, 'label'), c(0,1,0))
  unlink(tmp_file)
 })
 test_that("xgb.DMatrix: getinfo & setinfo", {
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@ -3,7 +3,7 @@ context('Test helper functions')
 require(xgboost)
 require(data.table)
 require(Matrix)
-require(vcd)
+require(vcd, quietly = TRUE)
 set.seed(1982)
 data(Arthritis)
@ -15,10 +15,12 @@ sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
 label <- df[, ifelse(Improved == "Marked", 1, 0)]
 bst.Tree <- xgboost(data = sparse_matrix, label = label, max_depth = 9,
-               eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic", booster = "gbtree")
+                    eta = 1, nthread = 2, nrounds = 10, verbose = 0,
                    objective = "binary:logistic", booster = "gbtree")
 bst.GLM <- xgboost(data = sparse_matrix, label = label,
-                   eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic", booster = "gblinear")
+                   eta = 1, nthread = 2, nrounds = 10, verbose = 0,
                   objective = "binary:logistic", booster = "gblinear")
 feature.names <- colnames(sparse_matrix)
@ -100,12 +102,37 @@ if (grepl('Windows', Sys.info()[['sysname']]) || grepl('Linux', Sys.info()[['sys
    })
 }
 test_that("xgb.Booster serializing as R object works", {
  saveRDS(bst.Tree, 'xgb.model.rds')
  bst <- readRDS('xgb.model.rds')
  dtrain <- xgb.DMatrix(sparse_matrix, label = label)
  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
  expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
  xgb.save(bst, 'xgb.model')
  nil_ptr <- new("externalptr")
  class(nil_ptr) <- "xgb.Booster.handle"
  expect_true(identical(bst$handle, nil_ptr))
  bst <- xgb.Booster.complete(bst)
  expect_true(!identical(bst$handle, nil_ptr))
  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
 })
 test_that("xgb.model.dt.tree works with and without feature names", {
  names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
  dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree)
  expect_equal(names.dt.trees, names(dt.tree))
  expect_equal(dim(dt.tree), c(162, 10))
-  expect_output(str(xgb.model.dt.tree(model = bst.Tree)), 'Feature.*\\"3\\"')
+  expect_output(str(dt.tree), 'Feature.*\\"Age\\"')
  dt.tree.0 <- xgb.model.dt.tree(model = bst.Tree)
  expect_equal(dt.tree, dt.tree.0)
  # when model contains no feature names:
  bst.Tree.x <- bst.Tree
  bst.Tree.x$feature_names <- NULL
  dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.x)
  expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
  expect_equal(dt.tree[, -4, with=FALSE], dt.tree.x[, -4, with=FALSE])
 })
 test_that("xgb.model.dt.tree throws error for gblinear", {
@ -116,7 +143,17 @@ test_that("xgb.importance works with and without feature names", {
  importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree)
  expect_equal(dim(importance.Tree), c(7, 4))
  expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency"))
-  expect_output(str(xgb.importance(model = bst.Tree)), 'Feature.*\\"3\\"')
+  expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
  importance.Tree.0 <- xgb.importance(model = bst.Tree)
  expect_equal(importance.Tree, importance.Tree.0)
  # when model contains no feature names:
  bst.Tree.x <- bst.Tree
  bst.Tree.x$feature_names <- NULL
  importance.Tree.x <- xgb.importance(model = bst.Tree)
  expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE])
  imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
  expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))
  xgb.ggplot.importance(importance_matrix = importance.Tree)
--- a/R-package/tests/testthat/test_monotone.R
+++ b/R-package/tests/testthat/test_monotone.R
@ -10,7 +10,7 @@ train = matrix(x, ncol = 1)
 test_that("monotone constraints for regression", {
  bst = xgboost(data = train, label = y, max_depth = 2,
-                 eta = 0.1, nthread = 2, nrounds = 100,
+                eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
                monotone_constraints = -1)
  pred = predict(bst, train)
--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@ -9,24 +9,23 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 test_that("updating the model works", {
  watchlist = list(train = dtrain, test = dtest)
  cb = list(cb.evaluation.log()) # to run silent, but store eval. log
  # no-subsampling
  p1 <- list(objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2)
  set.seed(11)
-  bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb)
+  bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0)
  tr1 <- xgb.model.dt.tree(model = bst1)
  # with subsampling
  p2 <- modifyList(p1, list(subsample = 0.1))
  set.seed(11)
-  bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb)
+  bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0)
  tr2 <- xgb.model.dt.tree(model = bst2)
  # the same no-subsampling boosting with an extra 'refresh' updater:
  p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
  set.seed(11)
-  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb)
+  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0)
  tr1r <- xgb.model.dt.tree(model = bst1r)
  # all should be the same when no subsampling
  expect_equal(bst1$evaluation_log, bst1r$evaluation_log)
@ -35,7 +34,7 @@ test_that("updating the model works", {
  # the same boosting with subsampling with an extra 'refresh' updater:
  p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
  set.seed(11)
-  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb)
+  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0)
  tr2r <- xgb.model.dt.tree(model = bst2r)
  # should be the same evaluation but different gains and larger cover
  expect_equal(bst2$evaluation_log, bst2r$evaluation_log)
@ -45,7 +44,7 @@ test_that("updating the model works", {
  # process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
  p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb, xgb_model = bst1)
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
  tr1u <- xgb.model.dt.tree(model = bst1u)
  # all should be the same when no subsampling
  expect_equal(bst1$evaluation_log, bst1u$evaluation_log)
@ -53,7 +52,7 @@ test_that("updating the model works", {
  # process type 'update' for model with subsampling, refreshing only the tree stats from training data:
  p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, callbacks = cb, xgb_model = bst2)
+  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
  tr2u <- xgb.model.dt.tree(model = bst2u)
  # should be the same evaluation but different gains and larger cover
  expect_equal(bst2$evaluation_log, bst2u$evaluation_log)
@ -66,7 +65,7 @@ test_that("updating the model works", {
  # process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
  p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, callbacks = cb, xgb_model = bst1)
+  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
  tr1ut <- xgb.model.dt.tree(model = bst1ut)
  # should be the same evaluations but different gains and smaller cover (test data is smaller)
  expect_equal(bst1$evaluation_log, bst1ut$evaluation_log)
--- a/doc/build.md
+++ b/doc/build.md
@ -189,7 +189,15 @@ There are several ways to install the package:
 ## R Package Installation
-You can install R package from cran just like other packages, or you can install from our weekly updated drat repo:
+### Installing pre-packaged version
 You can install xgboost from CRAN just like any other R package:
 ```r
 install.packages("xgboost")
 ```
 Or you can install it from our weekly updated drat repo:
 ```r
 install.packages("drat", repos="https://cran.rstudio.com")
@ -197,10 +205,8 @@ drat:::addRepo("dmlc")
 install.packages("xgboost", repos="http://dmlc.ml/drat/", type = "source")
 ```
-If you would like to use the latest xgboost version and already compiled xgboost, use `library(devtools); install('xgboost/R-package')` to install manually xgboost package (change the path accordingly to where you compiled xgboost).
+For OSX users, single threaded version will be installed. To install multi-threaded version,
-
+first follow [Building on OSX](#building-on-osx) to get the OpenMP enabled compiler, then:
 For OSX users, single threaded version will be installed, to install multi-threaded version.
 First follow [Building on OSX](#building-on-osx) to get the OpenMP enabled compiler, then:
 - Set the `Makevars` file in highest piority for R.
@ -214,24 +220,35 @@ First follow [Building on OSX](#building-on-osx) to get the OpenMP enabled compi
  install.packages("xgboost", repos="http://dmlc.ml/drat/", type = "source")
  ```
-Due to the usage of submodule, `install_github` is no longer support to install the
+### Installing the development version
-latest version of R package. To install the latest version run the following bash script,
+
 Make sure you have installed git and a recent C++ compiler supporting C++11 (e.g., g++-4.6 or higher).
 On Windows, Rtools must be installed, and its bin directory has to be added to PATH during the installation.
 And see the previous subsection for an OSX tip.
 Due to the use of git-submodules, `devtools::install_github` can no longer be used to install the latest version of R package.
 Thus, one has to run git to check out the code first:
 ```bash
 git clone --recursive https://github.com/dmlc/xgboost
 cd xgboost
 git submodule init
 git submodule update
-alias make='mingw32-make'
+cd R-package
-cd dmlc-core
+R CMD INSTALL .
 make -j4
 cd ../rabit
 make lib/librabit_empty.a -j4
 cd ..
 cp make/mingw64.mk config.mk
 make -j4
 ```
 If the last line fails because of "R: command not found", it means that R was not set up to run from command line.
 In this case, just start R as you would normally do and run the following:
 ```r
 setwd('wherever/you/cloned/it/xgboost/R-package/')
 install.packages('.', repos = NULL, type="source")
 ```
 If all fails, try [building the shared library](#build-the-shared-library) to see whether a problem is specific to R package or not.
 ## Trouble Shooting
 1. **Compile failed after `git pull`**