From a0aa305268566390144a6684fc1c7c521b5bdc0c Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Mon, 27 Jun 2016 01:59:58 -0500
Subject: [PATCH] [R] docs update - callbacks and parameter style

---
 R-package/man/callbacks.Rd            |  21 ++---
 R-package/man/cb.cv.predict.Rd        |  43 ++++++++++
 R-package/man/cb.early.stop.Rd        |  63 ++++++++++++++
 R-package/man/cb.evaluation.log.Rd    |  32 +++++++
 R-package/man/cb.print.evaluation.Rd  |  28 ++++++
 R-package/man/cb.reset.parameters.Rd  |  37 ++++++++
 R-package/man/cb.save.model.Rd        |  34 ++++++++
 R-package/man/get.paths.to.leaf.Rd    |   2 +-
 R-package/man/predict.xgb.Booster.Rd  | 117 ++++++++++++++++++++-----
 R-package/man/print.xgb.Booster.Rd    |   4 +-
 R-package/man/print.xgb.cv.Rd         |   4 +-
 R-package/man/xgb.DMatrix.save.Rd     |   6 +-
 R-package/man/xgb.attr.Rd             |  10 ++-
 R-package/man/xgb.create.features.Rd  |   8 +-
 R-package/man/xgb.cv.Rd               |  82 ++++++++++++------
 R-package/man/xgb.dump.Rd             |  12 +--
 R-package/man/xgb.importance.Rd       |   9 +-
 R-package/man/xgb.load.Rd             |   4 +-
 R-package/man/xgb.model.dt.tree.Rd    |   6 +-
 R-package/man/xgb.parameters.Rd       |   4 +-
 R-package/man/xgb.plot.deepness.Rd    |   6 +-
 R-package/man/xgb.plot.importance.Rd  |  13 +--
 R-package/man/xgb.plot.multi.trees.Rd |  20 +++--
 R-package/man/xgb.plot.tree.Rd        |  15 ++--
 R-package/man/xgb.save.Rd             |   6 +-
 R-package/man/xgb.save.raw.Rd         |   5 +-
 R-package/man/xgb.train.Rd            | 118 +++++++++++++++++---------
 R-package/man/xgboost-deprecated.Rd   |  17 ++++
 28 files changed, 564 insertions(+), 162 deletions(-)
 create mode 100644 R-package/man/cb.cv.predict.Rd
 create mode 100644 R-package/man/cb.early.stop.Rd
 create mode 100644 R-package/man/cb.evaluation.log.Rd
 create mode 100644 R-package/man/cb.print.evaluation.Rd
 create mode 100644 R-package/man/cb.reset.parameters.Rd
 create mode 100644 R-package/man/cb.save.model.Rd
 create mode 100644 R-package/man/xgboost-deprecated.Rd

diff --git a/R-package/man/callbacks.Rd b/R-package/man/callbacks.Rd
index 9a7adeb77..d49f104f2 100644
--- a/R-package/man/callbacks.Rd
+++ b/R-package/man/callbacks.Rd
@@ -19,18 +19,19 @@ WARNING: side-effects!!! Be aware that these callback functions access and modif
 the environment from which they are called from, which is a fairly uncommon thing to do in R.
 
 To write a custom callback closure, make sure you first understand the main concepts about R envoronments.
-Check either the R docs on \code{\link[base]{environment}} or the 
-\href{http://adv-r.had.co.nz/Environments.html}{Environments chapter} from Hadley Wickham's "Advanced R" book.
-Then take a look at the code of \code{cb.reset_learning_rate} for a simple example, 
-and see the \code{cb.log_evaluation} code for something more involved.
-Also, you would need to get familiar with the objects available inside of the \code{xgb.train} internal environment.
+Check either R documentation on \code{\link[base]{environment}} or the 
+\href{http://adv-r.had.co.nz/Environments.html}{Environments chapter} from the "Advanced R" 
+book by Hadley Wickham. Further, the best option is to read the code of some of the existing callbacks -
+choose ones that do something similar to what you want to achieve. Also, you would need to get familiar 
+with the objects available inside of the \code{xgb.train} and \code{xgb.cv} internal environments.
 }
 \seealso{
-\code{\link{cb.print_evaluation}},
-\code{\link{cb.log_evaluation}},
-\code{\link{cb.reset_parameters}},
-\code{\link{cb.early_stop}},
-\code{\link{cb.save_model}},
+\code{\link{cb.print.evaluation}},
+\code{\link{cb.evaluation.log}},
+\code{\link{cb.reset.parameters}},
+\code{\link{cb.early.stop}},
+\code{\link{cb.save.model}},
+\code{\link{cb.cv.predict}},
 \code{\link{xgb.train}},
 \code{\link{xgb.cv}}
 }
diff --git a/R-package/man/cb.cv.predict.Rd b/R-package/man/cb.cv.predict.Rd
new file mode 100644
index 000000000..34e9f813e
--- /dev/null
+++ b/R-package/man/cb.cv.predict.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{cb.cv.predict}
+\alias{cb.cv.predict}
+\title{Callback closure for returning cross-validation based predictions.}
+\usage{
+cb.cv.predict(save_models = FALSE)
+}
+\arguments{
+\item{save_models}{a flag for whether to save the folds' models.}
+}
+\value{
+Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
+depending on the number of prediction outputs per data row. The order of predictions corresponds 
+to the order of rows in the original dataset. Note that when a custom \code{folds} list is 
+provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a 
+non-overlapping list of k sets of indices, as in a standard k-fold CV. The predictions would not be 
+meaningful when user-profided folds have overlapping indices as in, e.g., random sampling splits.
+When some of the indices in the training dataset are not included into user-provided \code{folds},
+their prediction value would be \code{NA}.
+}
+\description{
+Callback closure for returning cross-validation based predictions.
+}
+\details{
+This callback function saves predictions for all of the test folds,
+and also allows to save the folds' models.
+
+It is a "finalizer" callback and it uses early stopping information whenever it is available,
+thus it must be run after the early stopping callback if the early stopping is used.
+
+Callback function expects the following values to be set in its calling frame:
+\code{bst_folds},
+\code{basket},
+\code{data},
+\code{end_iteration},
+\code{num_parallel_tree},
+\code{num_class}.
+}
+\seealso{
+\code{\link{callbacks}}
+}
+
diff --git a/R-package/man/cb.early.stop.Rd b/R-package/man/cb.early.stop.Rd
new file mode 100644
index 000000000..eec30d7b5
--- /dev/null
+++ b/R-package/man/cb.early.stop.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{cb.early.stop}
+\alias{cb.early.stop}
+\title{Callback closure to activate the early stopping.}
+\usage{
+cb.early.stop(stopping_rounds, maximize = FALSE, metric_name = NULL,
+  verbose = TRUE)
+}
+\arguments{
+\item{stopping_rounds}{The number of rounds with no improvement in 
+the evaluation metric in order to stop the training.}
+
+\item{maximize}{whether to maximize the evaluation metric}
+
+\item{metric_name}{the name of an evaluation column to use as a criteria for early
+stopping. If not set, the last column would be used.
+Let's say the test data in \code{watchlist} was labelled as \code{dtest}, 
+and one wants to use the AUC in test data for early stopping regardless of where 
+it is in the \code{watchlist}, then one of the following would need to be set:
+\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
+All dash '-' characters in metric names are considered equivalent to '_'.}
+
+\item{verbose}{whether to print the early stopping information.}
+}
+\description{
+Callback closure to activate the early stopping.
+}
+\details{
+This callback function determines the condition for early stopping 
+by setting the \code{stop_condition = TRUE} flag in its calling frame.
+
+The following additional fields are assigned to the model's R object:
+\itemize{
+\item \code{best_score} the evaluation score at the best iteration
+\item \code{best_iteration} at which boosting iteration the best score has occurred (1-based index)
+\item \code{best_ntreelimit} to use with the \code{ntreelimit} parameter in \code{predict}.
+     It differs from \code{best_iteration} in multiclass or random forest settings.
+}
+
+The Same values are also stored as xgb-attributes:
+\itemize{
+\item \code{best_iteration} is stored as a 0-based iteration index (for interoperability of binary models)
+\item \code{best_msg} message string is also stored.
+}
+
+At least one data element is required in the evaluation watchlist for early stopping to work.
+
+Callback function expects the following values to be set in its calling frame:
+\code{stop_condition},
+\code{bst_evaluation},
+\code{rank},
+\code{bst} (or \code{bst_folds} and \code{basket}),
+\code{iteration},
+\code{begin_iteration},
+\code{end_iteration},
+\code{num_parallel_tree}.
+}
+\seealso{
+\code{\link{callbacks}},
+\code{\link{xgb.attr}}
+}
+
diff --git a/R-package/man/cb.evaluation.log.Rd b/R-package/man/cb.evaluation.log.Rd
new file mode 100644
index 000000000..a71b7f8d3
--- /dev/null
+++ b/R-package/man/cb.evaluation.log.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{cb.evaluation.log}
+\alias{cb.evaluation.log}
+\title{Callback closure for logging the evaluation history}
+\usage{
+cb.evaluation.log()
+}
+\description{
+Callback closure for logging the evaluation history
+}
+\details{
+This callback function appends the current iteration evaluation results \code{bst_evaluation}
+available in the calling parent frame to the \code{evaluation_log} list in a calling frame.
+
+The finalizer callback (called with \code{finalize = TURE} in the end) converts 
+the \code{evaluation_log} list into a final data.table.
+
+The iteration evaluation result \code{bst_evaluation} must be a named numeric vector. 
+
+Note: in the column names of the final data.table, the dash '-' character is replaced with 
+the underscore '_' in order to make the column names more like regular R identifiers.
+
+Callback function expects the following values to be set in its calling frame:
+\code{evaluation_log},
+\code{bst_evaluation},
+\code{iteration}.
+}
+\seealso{
+\code{\link{callbacks}}
+}
+
diff --git a/R-package/man/cb.print.evaluation.Rd b/R-package/man/cb.print.evaluation.Rd
new file mode 100644
index 000000000..aec57fe2d
--- /dev/null
+++ b/R-package/man/cb.print.evaluation.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{cb.print.evaluation}
+\alias{cb.print.evaluation}
+\title{Callback closure for printing the result of evaluation}
+\usage{
+cb.print.evaluation(period = 1)
+}
+\arguments{
+\item{period}{results would be printed every number of periods}
+}
+\description{
+Callback closure for printing the result of evaluation
+}
+\details{
+The callback function prints the result of evaluation at every \code{period} iterations.
+The initial and the last iteration's evaluations are always printed.
+
+Callback function expects the following values to be set in its calling frame:
+\code{bst_evaluation} (also \code{bst_evaluation_err} when available),
+\code{iteration},
+\code{begin_iteration},
+\code{end_iteration}.
+}
+\seealso{
+\code{\link{callbacks}}
+}
+
diff --git a/R-package/man/cb.reset.parameters.Rd b/R-package/man/cb.reset.parameters.Rd
new file mode 100644
index 000000000..24965c815
--- /dev/null
+++ b/R-package/man/cb.reset.parameters.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{cb.reset.parameters}
+\alias{cb.reset.parameters}
+\title{Callback closure for restetting the booster's parameters at each iteration.}
+\usage{
+cb.reset.parameters(new_params)
+}
+\arguments{
+\item{new_params}{a list where each element corresponds to a parameter that needs to be reset.
+Each element's value must be either a vector of values of length \code{nrounds} 
+to be set at each iteration, 
+or a function of two parameters \code{learning_rates(iteration, nrounds)} 
+which returns a new parameter value by using the current iteration number 
+and the total number of boosting rounds.}
+}
+\description{
+Callback closure for restetting the booster's parameters at each iteration.
+}
+\details{
+This is a "pre-iteration" callback function used to reset booster's parameters
+at the beginning of each iteration.
+
+Note that when training is resumed from some previous model, and a function is used to 
+reset a parameter value, the \code{nround} argument in this function would be the 
+the number of boosting rounds in the current training.
+
+Callback function expects the following values to be set in its calling frame:
+\code{bst} or \code{bst_folds},
+\code{iteration},
+\code{begin_iteration},
+\code{end_iteration}.
+}
+\seealso{
+\code{\link{callbacks}}
+}
+
diff --git a/R-package/man/cb.save.model.Rd b/R-package/man/cb.save.model.Rd
new file mode 100644
index 000000000..eef9b6295
--- /dev/null
+++ b/R-package/man/cb.save.model.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{cb.save.model}
+\alias{cb.save.model}
+\title{Callback closure for saving a model file.}
+\usage{
+cb.save.model(save_period = 0, save_name = "xgboost.model")
+}
+\arguments{
+\item{save_period}{save the model to disk after every 
+\code{save_period} iterations; 0 means save the model at the end.}
+
+\item{save_name}{the name or path for the saved model file.
+It can contain a \code{\link[base]{sprintf}} formatting specifier 
+to include the integer iteration number in the file name.
+E.g., with \code{save_name} = 'xgboost_%04d.model', 
+the file saved at iteration 50 would be named "xgboost_0050.model".}
+}
+\description{
+Callback closure for saving a model file.
+}
+\details{
+This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end.
+
+Callback function expects the following values to be set in its calling frame:
+\code{bst},
+\code{iteration},
+\code{begin_iteration},
+\code{end_iteration}.
+}
+\seealso{
+\code{\link{callbacks}}
+}
+
diff --git a/R-package/man/get.paths.to.leaf.Rd b/R-package/man/get.paths.to.leaf.Rd
index 1fdcfd5d7..8b19ae6d8 100644
--- a/R-package/man/get.paths.to.leaf.Rd
+++ b/R-package/man/get.paths.to.leaf.Rd
@@ -4,7 +4,7 @@
 \alias{get.paths.to.leaf}
 \title{Extract path from root to leaf from data.table}
 \usage{
-get.paths.to.leaf(dt.tree)
+get.paths.to.leaf(dt_tree)
 }
 \arguments{
 \item{dt.tree}{data.table containing the nodes and edges of the trees}
diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd
index 504037937..2dc537112 100644
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -6,53 +6,124 @@
 \title{Predict method for eXtreme Gradient Boosting model}
 \usage{
 \method{predict}{xgb.Booster}(object, newdata, missing = NA,
-  outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE)
+  outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE,
+  reshape = FALSE)
 
 \method{predict}{xgb.Booster.handle}(object, ...)
 }
 \arguments{
 \item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}}
 
-\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or 
-\code{xgb.DMatrix}.}
+\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or \code{xgb.DMatrix}.}
 
-\item{missing}{Missing is only used when input is dense matrix, pick a float 
-value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
+\item{missing}{Missing is only used when input is dense matrix. Pick a float value that represents
+missing values in data (e.g., sometimes 0 or some other extreme value is used).}
 
-\item{outputmargin}{whether the prediction should be shown in the original
-value of sum of functions, when outputmargin=TRUE, the prediction is 
-untransformed margin value. In logistic regression, outputmargin=T will
-output value before logistic transformation.}
+\item{outputmargin}{whether the prediction should be returned in the for of original untransformed 
+sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for 
+logistic regression would result in predictions for log-odds instead of probabilities.}
 
-\item{ntreelimit}{limit number of trees used in prediction, this parameter is
-only valid for gbtree, but not for gblinear. set it to be value bigger 
-than 0. It will use all trees by default.}
+\item{ntreelimit}{limit the number of model's trees or boosting iterations used in prediction (see Details).
+It will use all the trees by default (\code{NULL} value).}
 
-\item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.}
+\item{predleaf}{whether predict leaf index instead.}
 
-\item{...}{Parameters pass to \code{predict.xgb.Booster}}
+\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several 
+prediction outputs per case. This option has no effect when \code{predleaf = TRUE}.}
+
+\item{...}{Parameters passed to \code{predict.xgb.Booster}}
+}
+\value{
+For regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
+For multiclass classification, either a \code{num_class * nrows(newdata)} vector or 
+a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on 
+the \code{reshape} value.
+
+When \code{predleaf = TRUE}, the output is a matrix object with the 
+number of columns corresponding to the number of trees.
 }
 \description{
 Predicted values based on either xgboost model or model handle object.
 }
 \details{
-The option \code{ntreelimit} purpose is to let the user train a model with lots 
-of trees but use only the first trees for prediction to avoid overfitting 
-(without having to train a new model with less trees).
+Note that \code{ntreelimit} is not necesserily equal to the number of boosting iterations
+and it is not necesserily equal to the number of trees in a model.
+E.g., in a random forest-like model, \code{ntreelimit} would limit the number of trees.
+But for multiclass classification, there are multiple trees per iteration, 
+but \code{ntreelimit} limits the number of boosting iterations.
 
-The option \code{predleaf} purpose is inspired from §3.1 of the paper 
-\code{Practical Lessons from Predicting Clicks on Ads at Facebook}.
-The idea is to use the model as a generator of new features which capture non linear link 
-from original features.
+Also note that \code{ntreelimit} would currently do nothing for predictions from gblinear, 
+since gblinear doesn't keep its boosting history. 
+
+One possible practical applications of the \code{predleaf} option is to use the model 
+as a generator of new features which capture non-linearity and interactions, 
+e.g., as implemented in \code{\link{xgb.create.features}}.
 }
 \examples{
+## binary classification:
+
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
 
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2, 
-               eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
+               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+# use all trees by default
 pred <- predict(bst, test$data)
+# use only the 1st tree
+pred <- predict(bst, test$data, ntreelimit = 1)
+
+
+## multiclass classification in iris dataset:
+
+lb <- as.numeric(iris$Species) - 1
+num_class <- 3
+set.seed(11)
+bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
+               max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
+               objective = "multi:softprob", num_class = num_class)
+# predict for softmax returns num_class probability numbers per case:
+pred <- predict(bst, as.matrix(iris[, -5]))
+str(pred)
+# reshape it to a num_class-columns matrix
+pred <- matrix(pred, ncol=num_class, byrow=TRUE)
+# convert the probabilities to softmax labels
+pred_labels <- max.col(pred) - 1
+# the following should result in the same error as seen in the last iteration
+sum(pred_labels != lb)/length(lb)
+
+# compare that to the predictions from softmax:
+set.seed(11)
+bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
+               max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
+               objective = "multi:softmax", num_class = num_class)
+pred <- predict(bst, as.matrix(iris[, -5]))
+str(pred)
+all.equal(pred, pred_labels)
+# prediction from using only 5 iterations should result 
+# in the same error as seen in iteration 5:
+pred5 <- predict(bst, as.matrix(iris[, -5]), ntreelimit=5)
+sum(pred5 != lb)/length(lb)
+
+
+## random forest-like model of 25 trees for binary classification:
+
+set.seed(11)
+bst <- xgboost(data = train$data, label = train$label, max_depth = 5,
+               nthread = 2, nrounds = 1, objective = "binary:logistic",
+               num_parallel_tree = 25, subsample = 0.6, colsample_bytree = 0.1)
+# Inspect the prediction error vs number of trees:
+lb <- test$label
+dtest <- xgb.DMatrix(test$data, label=lb)
+err <- sapply(1:25, function(n) {
+  pred <- predict(bst, dtest, ntreelimit=n)
+  sum((pred > 0.5) != lb)/length(lb)
+})
+plot(err, type='l', ylim=c(0,0.1), xlab='#trees')
+
+}
+\seealso{
+\code{\link{xgb.train}}.
 }
 
diff --git a/R-package/man/print.xgb.Booster.Rd b/R-package/man/print.xgb.Booster.Rd
index 73c91295e..7f13c328c 100644
--- a/R-package/man/print.xgb.Booster.Rd
+++ b/R-package/man/print.xgb.Booster.Rd
@@ -19,8 +19,8 @@ Print information about xgb.Booster.
 \examples{
 data(agaricus.train, package='xgboost')
 train <- agaricus.train
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
-               eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 attr(bst, 'myattr') <- 'memo'
 
 print(bst)
diff --git a/R-package/man/print.xgb.cv.Rd b/R-package/man/print.xgb.cv.Rd
index 00282ce07..cfe8878c6 100644
--- a/R-package/man/print.xgb.cv.Rd
+++ b/R-package/man/print.xgb.cv.Rd
@@ -23,8 +23,8 @@ including the best iteration (when available).
 \examples{
 data(agaricus.train, package='xgboost')
 train <- agaricus.train
-cv <- xgb.cv(data = train$data, label = train$label, nfold = 5, max.depth = 2,
-               eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+cv <- xgb.cv(data = train$data, label = train$label, nfold = 5, max_depth = 2,
+               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 print(cv)
 print(cv, verbose=TRUE)
 
diff --git a/R-package/man/xgb.DMatrix.save.Rd b/R-package/man/xgb.DMatrix.save.Rd
index 78348c3fa..9b0e835be 100644
--- a/R-package/man/xgb.DMatrix.save.Rd
+++ b/R-package/man/xgb.DMatrix.save.Rd
@@ -4,12 +4,12 @@
 \alias{xgb.DMatrix.save}
 \title{Save xgb.DMatrix object to binary file}
 \usage{
-xgb.DMatrix.save(DMatrix, fname)
+xgb.DMatrix.save(dmatrix, fname)
 }
 \arguments{
-\item{DMatrix}{the DMatrix object}
+\item{dmatrix}{the \code{xgb.DMatrix} object}
 
-\item{fname}{the name of the binary file.}
+\item{fname}{the name of the file to write.}
 }
 \description{
 Save xgb.DMatrix object to binary file
diff --git a/R-package/man/xgb.attr.Rd b/R-package/man/xgb.attr.Rd
index 3429da958..e8992e714 100644
--- a/R-package/man/xgb.attr.Rd
+++ b/R-package/man/xgb.attr.Rd
@@ -52,16 +52,20 @@ Use \code{\link{`xgb.parameters<-`}} to set or change model parameters.
 
 The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
 than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
+That would only matter if attributes need to be set many times.
+Note, however, that when feeding a handle of an \code{xgb.Booster} object to the attribute setters,
+the raw model cache of an \code{xgb.Booster} object would not be automatically updated, 
+and it would be user's responsibility to call \code{xgb.save.raw} to update it.
 
 The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes, 
-but doesn't delete the existing attributes which don't have their names in \code{names(attributes)}.
+but it doesn't delete the other existing attributes.
 }
 \examples{
 data(agaricus.train, package='xgboost')
 train <- agaricus.train
 
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
-               eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 
 xgb.attr(bst, "my_attribute") <- "my attribute value"
 print(xgb.attr(bst, "my_attribute"))
diff --git a/R-package/man/xgb.create.features.Rd b/R-package/man/xgb.create.features.Rd
index cab2ab654..4d1b8a152 100644
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -4,12 +4,14 @@
 \alias{xgb.create.features}
 \title{Create new features from a previously learned model}
 \usage{
-xgb.create.features(model, training.data)
+xgb.create.features(model, data, ...)
 }
 \arguments{
 \item{model}{decision tree boosting model learned on the original data}
 
-\item{training.data}{original data (usually provided as a \code{dgCMatrix} matrix)}
+\item{data}{original data (usually provided as a \code{dgCMatrix} matrix)}
+
+\item{...}{currently not used}
 }
 \value{
 \code{dgCMatrix} matrix including both the original data and the new features.
@@ -60,7 +62,7 @@ data(agaricus.test, package='xgboost')
 dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
 
-param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic')
+param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
 nround = 4
 
 bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 8a54f287d..954702d34 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -7,7 +7,7 @@
 xgb.cv(params = list(), data, nrounds, nfold, label = NULL, missing = NA,
   prediction = FALSE, showsd = TRUE, metrics = list(), obj = NULL,
   feval = NULL, stratified = TRUE, folds = NULL, verbose = TRUE,
-  print.every.n = 1L, early.stop.round = NULL, maximize = NULL,
+  print_every_n = 1L, early_stopping_rounds = NULL, maximize = NULL,
   callbacks = list(), ...)
 }
 \arguments{
@@ -19,11 +19,11 @@ xgb.cv(params = list(), data, nrounds, nfold, label = NULL, missing = NA,
     \item \code{binary:logistic} logistic regression for classification
   }
   \item \code{eta} step size of each boosting step
-  \item \code{max.depth} maximum depth of the tree
+  \item \code{max_depth} maximum depth of the tree
   \item \code{nthread} number of thread used in training, if not set, all threads are used
 }
 
-  See \link{xgb.train} for further details.
+  See \code{\link{xgb.train}} for further details.
   See also demo/ for walkthrough example in R.}
 
 \item{data}{takes an \code{xgb.DMatrix} or \code{Matrix} as the input.}
@@ -32,14 +32,16 @@ xgb.cv(params = list(), data, nrounds, nfold, label = NULL, missing = NA,
 
 \item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
 
-\item{label}{option field, when data is \code{Matrix}}
+\item{label}{vector of response values. Should be provided only when data is \code{DMatrix}.}
 
-\item{missing}{Missing is only used when input is dense matrix, pick a float
-value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
+\item{missing}{is only used when input is a dense matrix. By default is set to NA, which means 
+that NA values should be considered as 'missing' by the algorithm. 
+Sometimes, 0 or other extreme value might be used to represent missing values.}
 
-\item{prediction}{A logical value indicating whether to return the prediction vector.}
+\item{prediction}{A logical value indicating whether to return the test fold predictions 
+from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.}
 
-\item{showsd}{\code{boolean}, whether show standard deviation of cross validation}
+\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
 
 \item{metrics, }{list of evaluation metrics to be used in cross validation,
   when it is not specified, the evaluation metric is chosen according to objective function.
@@ -59,34 +61,61 @@ gradient with given prediction and dtrain.}
 \code{list(metric='metric-name', value='metric-value')} with given 
 prediction and dtrain.}
 
-\item{stratified}{\code{boolean} whether sampling of folds should be stratified by the values of labels in \code{data}}
+\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified 
+by the values of outcome labels.}
 
-\item{folds}{\code{list} provides a possibility of using a list of pre-defined CV folds (each element must be a vector of fold's indices).
-If folds are supplied, the nfold and stratified parameters would be ignored.}
+\item{folds}{\code{list} provides a possibility to use a list of pre-defined CV folds
+(each element must be a vector of test fold's indices). When folds are supplied, 
+the \code{nfold} and \code{stratified} parameters are ignored.}
 
 \item{verbose}{\code{boolean}, print the statistics during the process}
 
-\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
+\item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
+Default is 1 which means all messages are printed. This parameter is passed to the 
+\code{\link{cb.print.evaluation}} callback.}
 
-\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered. 
+\item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered. 
 If set to an integer \code{k}, training with a validation set will stop if the performance 
-doesn't improve for \code{k} rounds.}
+doesn't improve for \code{k} rounds.
+Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
 
-\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
-\code{maximize=TRUE} means the larger the evaluation score the better.}
+\item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set,
+then this parameter must be set as well.
+When it is \code{TRUE}, it means the larger the evaluation score the better.
+This parameter is passed to the \code{\link{cb.early.stop}} callback.}
+
+\item{callbacks}{a list of callback functions to perform various task during boosting.
+See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the 
+parameters' values. User can provide either existing or their own callback methods in order 
+to customize the training process.}
 
 \item{...}{other parameters to pass to \code{params}.}
 }
 \value{
-TODO: update this...
-
-If \code{prediction = TRUE}, a list with the following elements is returned:
+An object of class \code{xgb.cv.synchronous} with the following elements:
 \itemize{
-  \item \code{dt} a \code{data.table} with each mean and standard deviation stat for training set and test set
-  \item \code{pred} an array or matrix (for multiclass classification) with predictions for each CV-fold for the model having been trained on the data in all other folds.
+  \item \code{call} a function call.
+  \item \code{params} parameters that were passed to the xgboost library. Note that it does not 
+        capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
+  \item \code{callbacks} callback functions that were either automatically assigned or 
+        explicitely passed.
+  \item \code{evaluation_log} evaluation history storead as a \code{data.table} with the
+        first column corresponding to iteration number and the rest corresponding to the 
+        CV-based evaluation means and standard deviations for the training and test CV-sets.
+        It is created by the \code{\link{cb.evaluation.log}} callback.
+  \item \code{niter} number of boosting iterations.
+  \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds} 
+        parameter or randomly generated.
+  \item \code{best_iteration} iteration number with the best evaluation metric value
+        (only available with early stopping).
+  \item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration, 
+        which could further be used in \code{predict} method
+        (only available with early stopping).
+  \item \code{pred} CV prediction values available when \code{prediction} is set. 
+        It is either vector or matrix (see \code{\link{cb.cv.predict}}).
+  \item \code{models} a liost of the CV folds' models. It is only available with the explicit 
+        setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
 }
-
-If \code{prediction = FALSE}, just a \code{data.table} with each mean and standard deviation stat for training set and test set is returned.
 }
 \description{
 The cross valudation function of xgboost
@@ -105,9 +134,10 @@ Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%
 \examples{
 data(agaricus.train, package='xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-history <- xgb.cv(data = dtrain, nround=3, nthread = 2, nfold = 5, metrics=list("rmse","auc"),
-                  max.depth =3, eta = 1, objective = "binary:logistic")
-print(history)
+cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
+                  max_depth = 3, eta = 1, objective = "binary:logistic")
+print(cv)
+print(cv, verbose=TRUE)
 
 }
 
diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd
index cafa8ac14..efbf8b629 100644
--- a/R-package/man/xgb.dump.Rd
+++ b/R-package/man/xgb.dump.Rd
@@ -4,7 +4,7 @@
 \alias{xgb.dump}
 \title{Save xgboost model to text file}
 \usage{
-xgb.dump(model = NULL, fname = NULL, fmap = "", with.stats = FALSE)
+xgb.dump(model = NULL, fname = NULL, fmap = "", with_stats = FALSE, ...)
 }
 \arguments{
 \item{model}{the model object.}
@@ -18,10 +18,12 @@ See demo/ for walkthrough example in R, and
 \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt} 
 for example Format.}
 
-\item{with.stats}{whether dump statistics of splits 
+\item{with_stats}{whether dump statistics of splits 
 When this option is on, the model dump comes with two additional statistics:
 gain is the approximate loss function gain we get in each split;
 cover is the sum of second order gradient in each node.}
+
+\item{...}{currently not used}
 }
 \value{
 if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
@@ -34,10 +36,10 @@ data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2, 
-               eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
+               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 # save the model in file 'xgb.model.dump'
-xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)
+xgb.dump(bst, 'xgb.model.dump', with_stats = TRUE)
 
 # print the model without saving it to a file
 print(xgb.dump(bst))
diff --git a/R-package/man/xgb.importance.Rd b/R-package/man/xgb.importance.Rd
index f30f8149a..10258a07d 100644
--- a/R-package/man/xgb.importance.Rd
+++ b/R-package/man/xgb.importance.Rd
@@ -52,14 +52,13 @@ If you need to remember one thing only: until you want to leave us early, don't
 \examples{
 data(agaricus.train, package='xgboost')
 
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2, 
-               eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
+               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 
-# agaricus.train$data@Dimnames[[2]] represents the column names of the sparse matrix.
-xgb.importance(agaricus.train$data@Dimnames[[2]], model = bst)
+xgb.importance(colnames(agaricus.train$data), model = bst)
 
 # Same thing with co-occurence computation this time
-xgb.importance(agaricus.train$data@Dimnames[[2]], model = bst, data = agaricus.train$data, label = agaricus.train$label)
+xgb.importance(colnames(agaricus.train$data), model = bst, data = agaricus.train$data, label = agaricus.train$label)
 
 }
 
diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd
index 92576ad95..1499df2d4 100644
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@@ -17,8 +17,8 @@ data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2, 
-               eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
+               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 pred <- predict(bst, test$data)
diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd
index 6b5193bc0..8c839be20 100644
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@@ -14,7 +14,7 @@ contains feature names, this argument should be \code{NULL} (default value)}
 \item{model}{object of class \code{xgb.Booster}}
 
 \item{text}{\code{character} vector previously generated by the \code{xgb.dump} 
-function  (where parameter \code{with.stats = TRUE} should have been set).}
+function  (where parameter \code{with_stats = TRUE} should have been set).}
 
 \item{n_first_tree}{limit the parsing to the \code{n} first trees. 
 If set to \code{NULL}, all trees of the model are parsed.}
@@ -47,8 +47,8 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
 
 data(agaricus.train, package='xgboost')
 
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2, 
-               eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
+               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 
 (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
 
diff --git a/R-package/man/xgb.parameters.Rd b/R-package/man/xgb.parameters.Rd
index e531b5668..3df866816 100644
--- a/R-package/man/xgb.parameters.Rd
+++ b/R-package/man/xgb.parameters.Rd
@@ -23,8 +23,8 @@ than for \code{xgb.Booster}, since only just a handle would need to be copied.
 data(agaricus.train, package='xgboost')
 train <- agaricus.train
 
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
-               eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 
 xgb.parameters(bst) <- list(eta = 0.1)
 
diff --git a/R-package/man/xgb.plot.deepness.Rd b/R-package/man/xgb.plot.deepness.Rd
index e11a7495e..71d43ec8f 100644
--- a/R-package/man/xgb.plot.deepness.Rd
+++ b/R-package/man/xgb.plot.deepness.Rd
@@ -20,7 +20,7 @@ Display both the number of \code{leaf} and the distribution of \code{weighted ob
 by tree deepness level.
 
 The purpose of this function is to help the user to find the best trade-off to set
-the \code{max.depth} and \code{min_child_weight} parameters according to the bias / variance trade-off.
+the \code{max_depth} and \code{min_child_weight} parameters according to the bias / variance trade-off.
 
 See \link{xgb.train} for more information about these parameters.
 
@@ -36,8 +36,8 @@ This function is inspired by the blog post \url{http://aysent.github.io/2015/11/
 \examples{
 data(agaricus.train, package='xgboost')
 
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 15,
-                 eta = 1, nthread = 2, nround = 30, objective = "binary:logistic",
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
+                 eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
                  min_child_weight = 50)
 
 xgb.plot.deepness(model = bst)
diff --git a/R-package/man/xgb.plot.importance.Rd b/R-package/man/xgb.plot.importance.Rd
index 2f9d5651d..0f7ec588a 100644
--- a/R-package/man/xgb.plot.importance.Rd
+++ b/R-package/man/xgb.plot.importance.Rd
@@ -4,12 +4,14 @@
 \alias{xgb.plot.importance}
 \title{Plot feature importance bar graph}
 \usage{
-xgb.plot.importance(importance_matrix = NULL, numberOfClusters = c(1:10))
+xgb.plot.importance(importance_matrix = NULL, n_clusters = c(1:10), ...)
 }
 \arguments{
 \item{importance_matrix}{a \code{data.table} returned by the \code{xgb.importance} function.}
 
-\item{numberOfClusters}{a \code{numeric} vector containing the min and the max range of the possible number of clusters of bars.}
+\item{n_clusters}{a \code{numeric} vector containing the min and the max range of the possible number of clusters of bars.}
+
+\item{...}{currently not used}
 }
 \value{
 A \code{ggplot2} bar graph representing each feature by a horizontal bar. Longer is the bar, more important is the feature. Features are classified by importance and clustered by importance. The group is represented through the color of the bar.
@@ -29,11 +31,10 @@ data(agaricus.train, package='xgboost')
 #(labels = outcome column which will be learned).
 #Each column of the sparse Matrix is a feature in one hot encoding format.
 
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2,
-               eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
+               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 
-#agaricus.train$data@Dimnames[[2]] represents the column names of the sparse matrix.
-importance_matrix <- xgb.importance(agaricus.train$data@Dimnames[[2]], model = bst)
+importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
 xgb.plot.importance(importance_matrix)
 
 }
diff --git a/R-package/man/xgb.plot.multi.trees.Rd b/R-package/man/xgb.plot.multi.trees.Rd
index 4d97c58b4..c7186ce92 100644
--- a/R-package/man/xgb.plot.multi.trees.Rd
+++ b/R-package/man/xgb.plot.multi.trees.Rd
@@ -4,19 +4,21 @@
 \alias{xgb.plot.multi.trees}
 \title{Project all trees on one tree and plot it}
 \usage{
-xgb.plot.multi.trees(model, feature_names = NULL, features.keep = 5,
-  plot.width = NULL, plot.height = NULL)
+xgb.plot.multi.trees(model, feature_names = NULL, features_keep = 5,
+  plot_width = NULL, plot_height = NULL, ...)
 }
 \arguments{
 \item{model}{dump generated by the \code{xgb.train} function.}
 
 \item{feature_names}{names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
 
-\item{features.keep}{number of features to keep in each position of the multi trees.}
+\item{features_keep}{number of features to keep in each position of the multi trees.}
 
-\item{plot.width}{width in pixels of the graph to produce}
+\item{plot_width}{width in pixels of the graph to produce}
 
-\item{plot.height}{height in pixels of the graph to produce}
+\item{plot_height}{height in pixels of the graph to produce}
+
+\item{...}{currently not used}
 }
 \value{
 Two graphs showing the distribution of the model deepness.
@@ -39,7 +41,7 @@ its deepness (therefore in a boosting model, all trees have the same shape).
 Moreover, the trees tend to reuse the same features.
 
 The function will project each tree on one, and keep for each position the 
-\code{features.keep} first features (based on Gain per feature measure).
+\code{features_keep} first features (based on Gain per feature measure).
 
 This function is inspired by this blog post:
 \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
@@ -47,11 +49,11 @@ This function is inspired by this blog post:
 \examples{
 data(agaricus.train, package='xgboost')
 
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 15,
-                 eta = 1, nthread = 2, nround = 30, objective = "binary:logistic",
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
+                 eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
                  min_child_weight = 50)
 
-p <- xgb.plot.multi.trees(model = bst, feature_names = agaricus.train$data@Dimnames[[2]], features.keep = 3)
+p <- xgb.plot.multi.trees(model = bst, feature_names = colnames(agaricus.train$data), features_keep = 3)
 print(p)
 
 }
diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd
index c087059e0..3620699bd 100644
--- a/R-package/man/xgb.plot.tree.Rd
+++ b/R-package/man/xgb.plot.tree.Rd
@@ -5,7 +5,7 @@
 \title{Plot a boosted tree model}
 \usage{
 xgb.plot.tree(feature_names = NULL, model = NULL, n_first_tree = NULL,
-  plot.width = NULL, plot.height = NULL)
+  plot_width = NULL, plot_height = NULL, ...)
 }
 \arguments{
 \item{feature_names}{names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
@@ -14,9 +14,11 @@ xgb.plot.tree(feature_names = NULL, model = NULL, n_first_tree = NULL,
 
 \item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
 
-\item{plot.width}{the width of the diagram in pixels.}
+\item{plot_width}{the width of the diagram in pixels.}
 
-\item{plot.height}{the height of the diagram in pixels.}
+\item{plot_height}{the height of the diagram in pixels.}
+
+\item{...}{currently not used.}
 }
 \value{
 A \code{DiagrammeR} of the model.
@@ -38,11 +40,10 @@ The function uses \href{http://www.graphviz.org/}{GraphViz} library for that pur
 \examples{
 data(agaricus.train, package='xgboost')
 
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2, 
-               eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
+               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 
-# agaricus.train$data@Dimnames[[2]] represents the column names of the sparse matrix.
-xgb.plot.tree(feature_names = agaricus.train$data@Dimnames[[2]], model = bst)
+xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst)
 
 }
 
diff --git a/R-package/man/xgb.save.Rd b/R-package/man/xgb.save.Rd
index db335105c..85acdecd0 100644
--- a/R-package/man/xgb.save.Rd
+++ b/R-package/man/xgb.save.Rd
@@ -9,7 +9,7 @@ xgb.save(model, fname)
 \arguments{
 \item{model}{the model object.}
 
-\item{fname}{the name of the binary file.}
+\item{fname}{the name of the file to write.}
 }
 \description{
 Save xgboost model from xgboost or xgb.train
@@ -19,8 +19,8 @@ data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2, 
-               eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
+               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 pred <- predict(bst, test$data)
diff --git a/R-package/man/xgb.save.raw.Rd b/R-package/man/xgb.save.raw.Rd
index 1e9f4a4db..7f808529e 100644
--- a/R-package/man/xgb.save.raw.Rd
+++ b/R-package/man/xgb.save.raw.Rd
@@ -18,10 +18,11 @@ data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2, 
-               eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
+               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 raw <- xgb.save.raw(bst)
 bst <- xgb.load(raw)
 pred <- predict(bst, test$data)
+
 }
 
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index d74e42153..a06e21b7b 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -6,13 +6,13 @@
 \title{eXtreme Gradient Boosting Training}
 \usage{
 xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
-  feval = NULL, verbose = 1, print.every.n = 1L,
-  early.stop.round = NULL, maximize = NULL, save_period = NULL,
+  feval = NULL, verbose = 1, print_every_n = 1L,
+  early_stopping_rounds = NULL, maximize = NULL, save_period = NULL,
   save_name = "xgboost.model", xgb_model = NULL, callbacks = list(), ...)
 
 xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
-  params = list(), nrounds, verbose = 1, print.every.n = 1L,
-  early.stop.round = NULL, maximize = NULL, save_period = 0,
+  params = list(), nrounds, verbose = 1, print_every_n = 1L,
+  early_stopping_rounds = NULL, maximize = NULL, save_period = 0,
   save_name = "xgboost.model", xgb_model = NULL, callbacks = list(), ...)
 }
 \arguments{
@@ -59,8 +59,8 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
     \item \code{binary:logistic} logistic regression for binary classification. Output probability.
     \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
     \item \code{num_class} set the number of classes. To use only with multiclass objectives.
-    \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class}.
-    \item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
+    \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class - 1}.
+    \item \code{multi:softprob} same as softmax, but prediction outputs a vector of ndata * nclass elements, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
     \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
   }
   \item \code{base_score} the initial prediction score of all instances, global bias. Default: 0.5
@@ -79,51 +79,78 @@ watchlist=list(validation1=mat1, validation2=mat2) to watch
 the performance of each round's model on mat1 and mat2}
 
 \item{obj}{customized objective function. Returns gradient and second order 
-gradient with given prediction and dtrain,}
+gradient with given prediction and dtrain.}
 
 \item{feval}{custimized evaluation function. Returns 
 \code{list(metric='metric-name', value='metric-value')} with given 
-prediction and dtrain,}
+prediction and dtrain.}
 
 \item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print 
-information of performance. If 2, xgboost will print information of both}
+information of performance. If 2, xgboost will print some additional information.
+Setting \code{verbose > 0} automatically engages the \code{\link{cb.evaluation.log}} and 
+\code{\link{cb.print.evaluation}} callback functions.}
 
-\item{print.every.n}{Print every N progress messages when \code{verbose>0}.
-Default is 1 which means all messages are printed.}
+\item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
+Default is 1 which means all messages are printed. This parameter is passed to the 
+\code{\link{cb.print.evaluation}} callback.}
 
-\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered. 
+\item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered. 
 If set to an integer \code{k}, training with a validation set will stop if the performance 
-keeps getting worse consecutively for \code{k} rounds.}
+doesn't improve for \code{k} rounds.
+Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
 
-\item{maximize}{If \code{feval} and \code{early.stop.round} are set, 
-then \code{maximize} must be set as well.
-\code{maximize=TRUE} means the larger the evaluation score the better.}
+\item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set,
+then this parameter must be set as well.
+When it is \code{TRUE}, it means the larger the evaluation score the better.
+This parameter is passed to the \code{\link{cb.early.stop}} callback.}
 
-\item{save_period}{save the model to the disk after every \code{save_period} rounds, 0 means save at the end.}
+\item{save_period}{when it is non-NULL, model is saved to disk after every \code{save_period} rounds,
+0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.}
 
 \item{save_name}{the name or path for periodically saved model file.}
 
-\item{xgb_model}{the previously built model to continue the trainig from. 
+\item{xgb_model}{a previously built model to continue the trainig from.
 Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a 
 file with a previously saved model.}
 
-\item{callbacks}{a list of callback functions to perform various task during boosting. 
-See \code{\link{callbacks}}. Some of the callbacks are currently automatically 
-created when specific parameters are set.}
+\item{callbacks}{a list of callback functions to perform various task during boosting.
+See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the 
+parameters' values. User can provide either existing or their own callback methods in order 
+to customize the training process.}
 
 \item{...}{other parameters to pass to \code{params}.}
 
-\item{label}{the response variable. User should not set this field,
-if data is local data file or \code{xgb.DMatrix}.}
+\item{label}{vector of response values. Should not be provided when data is 
+a local data file name or an \code{xgb.DMatrix}.}
 
 \item{missing}{by default is set to NA, which means that NA values should be considered as 'missing'
 by the algorithm. Sometimes, 0 or other extreme value might be used to represent missing values.
-This parameter is only used when input is dense matrix,}
+This parameter is only used when input is a dense matrix.}
 
 \item{weight}{a vector indicating the weight for each row of the input.}
 }
 \value{
-TODO
+An object of class \code{xgb.Booster} with the following elements:
+\itemize{
+  \item \code{handle} a handle (pointer) to the xgboost model in memory.
+  \item \code{raw} a cached memory dump of the xgboost model saved as R's \code{raw} type.
+  \item \code{niter} number of boosting iterations.
+  \item \code{evaluation_log} evaluation history storead as a \code{data.table} with the
+        first column corresponding to iteration number and the rest corresponding to evaluation
+        metrics' values. It is created by the \code{\link{cb.evaluation.log}} callback.
+  \item \code{call} a function call.
+  \item \code{params} parameters that were passed to the xgboost library. Note that it does not 
+        capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
+  \item \code{callbacks} callback functions that were either automatically assigned or 
+        explicitely passed.
+  \item \code{best_iteration} iteration number with the best evaluation metric value
+        (only available with early stopping).
+  \item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration, 
+        which could further be used in \code{predict} method
+        (only available with early stopping).
+  \item \code{best_score} the best evaluation metric value during early stopping.
+        (only available with early stopping).
+}
 }
 \description{
 \code{xgb.train} is an advanced interface for training an xgboost model. The \code{xgboost} function provides a simpler interface.
@@ -147,21 +174,21 @@ The folloiwing is the list of built-in metrics for which Xgboost provides optimi
      \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
      \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
      \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss}
-     \item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
+     \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
            By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
            Different threshold (e.g., 0.) could be specified as "error@0."
-     \item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
+     \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
      \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
      \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
   }
 
 The following callbacks are automatically created when certain parameters are set:
 \itemize{
-  \item \code{cb.print_evaluation} is turned on when \code{verbose > 0};
-        and the \code{print.every.n} parameter is passed to it.
-  \item \code{cb.log_evaluation} is on when \code{verbose > 0} and \code{watchlist} is present.
-  \item \code{cb.early_stop}: when \code{early.stop.round} is set.
-  \item \code{cb.save_model}: when \code{save_period > 0} is set.
+  \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
+        and the \code{print_every_n} parameter is passed to it.
+  \item \code{cb.evaluation.log} is on when \code{verbose > 0} and \code{watchlist} is present.
+  \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
+  \item \code{cb.save.model}: when \code{save_period > 0} is set.
 }
 }
 \examples{
@@ -173,8 +200,9 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 watchlist <- list(eval = dtest, train = dtrain)
 
 ## A simple xgb.train example:
-param <- list(max.depth = 2, eta = 1, silent = 1, objective="binary:logistic", eval_metric="auc")
-bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
+param <- list(max_depth = 2, eta = 1, silent = 1, 
+              objective = "binary:logistic", eval_metric = "auc")
+bst <- xgb.train(param, dtrain, nthread = 2, nrounds = 2, watchlist)
 
 ## An xgb.train example where custom objective and evaluation metric are used:
 logregobj <- function(preds, dtrain) {
@@ -189,23 +217,29 @@ evalerror <- function(preds, dtrain) {
   err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
   return(list(metric = "error", value = err))
 }
-bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
+bst <- xgb.train(param, dtrain, nthread = 2, nrounds = 2, watchlist)
 
 ## An xgb.train example of using variable learning rates at each iteration:
 my_etas <- list(eta = c(0.5, 0.1))
-bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist,
-                 callbacks = list(cb.reset_parameters(my_etas)))
+bst <- xgb.train(param, dtrain, nthread = 2, nrounds = 2, watchlist,
+                 callbacks = list(cb.reset.parameters(my_etas)))
 
-## Explicit use of the cb.log_evaluation callback allows to run 
+## Explicit use of the cb.evaluation.log callback allows to run 
 ## xgb.train silently but still store the evaluation results:
-bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist,
-                 verbose = 0, callbacks = list(cb.log_evaluation()))
+bst <- xgb.train(param, dtrain, nthread = 2, nrounds = 2, watchlist,
+                 verbose = 0, callbacks = list(cb.evaluation.log()))
 print(bst$evaluation_log)
 
 ## An 'xgboost' interface example:
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2, 
-               eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, 
+               max_depth = 2, eta = 1, nthread = 2, nrounds = 2, 
+               objective = "binary:logistic")
 pred <- predict(bst, agaricus.test$data)
 
 }
+\seealso{
+\code{\link{callbacks}},
+\code{\link{predict.xgb.Booster}},
+\code{\link{xgb.cv}}
+}
 
diff --git a/R-package/man/xgboost-deprecated.Rd b/R-package/man/xgboost-deprecated.Rd
new file mode 100644
index 000000000..2cb546212
--- /dev/null
+++ b/R-package/man/xgboost-deprecated.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{xgboost-deprecated}
+\alias{xgboost-deprecated}
+\title{Deprecation notices.}
+\description{
+At this time, some of the parameter names were changed in order to make the code style more uniform.
+The deprecated parameters would be removed in the next release.
+}
+\details{
+To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table.
+
+A deprecation warning is shown when any of the deprecated parameters is used in a call. 
+An additional warning is shown when there was a partial match to a deprecated parameter 
+(as R is able to partially match parameter names).
+}
+