[R] improve docstrings for "xgb.Booster.R" (#9906)

2023-12-21 03:01:30 +01:00
parent 252e018275
commit b807f3e30c
28 changed files with 661 additions and 533 deletions
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -63,7 +63,8 @@ Imports:
    Matrix (>= 1.1-0),
    methods,
    data.table (>= 1.9.6),
-    jsonlite (>= 1.0),
+    jsonlite (>= 1.0)
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
 Encoding: UTF-8
 SystemRequirements: GNU make, C++17
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -79,36 +79,45 @@ xgb.get.handle <- function(object) {
  handle
 }
-#' Restore missing parts of an incomplete xgb.Booster object.
+#' Restore missing parts of an incomplete xgb.Booster object
 #'
-#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing
+#' It attempts to complete an `xgb.Booster` object by restoring either its missing
-#' raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
+#' raw model memory dump (when it has no `raw` data but its `xgb.Booster.handle` is valid)
-#' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid
+#' or its missing internal handle (when its `xgb.Booster.handle` is not valid
 #' but it has a raw Booster memory dump).
 #'
-#' @param object object of class \code{xgb.Booster}
+#' @param object Object of class `xgb.Booster`.
-#' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data
+#' @param saveraw A flag indicating whether to append `raw` Booster memory dump data
 #'                when it doesn't already exist.
 #'
 #' @details
 #'
 #' While this method is primarily for internal use, it might be useful in some practical situations.
 #'
-#' E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
+#' E.g., when an `xgb.Booster` model is saved as an R object and then is loaded as an R object,
 #' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
 #' should still work for such a model object since those methods would be using
-#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
+#' `xgb.Booster.complete()` internally. However, one might find it to be more efficient to call the
-#' \code{xgb.Booster.complete} function explicitly once after loading a model as an R-object.
+#' `xgb.Booster.complete()` function explicitly once after loading a model as an R-object.
 #' That would prevent further repeated implicit reconstruction of an internal booster model.
 #'
 #' @return
-#' An object of \code{xgb.Booster} class.
+#' An object of `xgb.Booster` class.
 #'
 #' @examples
 #'
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
-#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
+#'
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' bst <- xgboost(
 #'   data = agaricus.train$data,
 #'   label = agaricus.train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = 2,
 #'   nrounds = 2,
 #'   objective = "binary:logistic"
 #' )
 #'
 #' saveRDS(bst, "xgb.model.rds")
 #'
 #' # Warning: The resulting RDS file is only compatible with the current XGBoost version.
@@ -161,112 +170,100 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
  return(object)
 }
-#' Predict method for eXtreme Gradient Boosting model
+#' Predict method for XGBoost model
 #'
 #' Predicted values based on either xgboost model or model handle object.
 #'
-#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}
+#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
-#' @param newdata takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
+#' @param newdata Takes `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,
-#'        local data file or \code{xgb.DMatrix}.
+#'        local data file, or `xgb.DMatrix`.
-#'
+#'        For single-row predictions on sparse data, it is recommended to use the CSR format.
-#'        For single-row predictions on sparse data, it's recommended to use CSR format. If passing
+#'        If passing a sparse vector, it will take it as a row vector.
-#'        a sparse vector, it will take it as a row vector.
+#' @param missing Only used when input is a dense matrix. Pick a float value that represents
-#' @param missing Missing is only used when input is dense matrix. Pick a float value that represents
+#'        missing values in data (e.g., 0 or some other extreme value).
-#'        missing values in data (e.g., sometimes 0 or some other extreme value is used).
+#' @param outputmargin Whether the prediction should be returned in the form of original untransformed
-#' @param outputmargin whether the prediction should be returned in the for of original untransformed
+#'        sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
-#'        sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
+#'        logistic regression would return log-odds instead of probabilities.
-#'        logistic regression would result in predictions for log-odds instead of probabilities.
+#' @param ntreelimit Deprecated, use `iterationrange` instead.
-#' @param ntreelimit Deprecated, use \code{iterationrange} instead.
+#' @param predleaf Whether to predict pre-tree leaf indices.
-#' @param predleaf whether predict leaf index.
+#' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
-#' @param predcontrib whether to return feature contributions to individual predictions (see Details).
+#' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
-#' @param approxcontrib whether to use a fast approximation for feature contributions (see Details).
+#' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details).
-#' @param predinteraction whether to return contributions of feature interactions to individual predictions (see Details).
+#' @param reshape Whether to reshape the vector of predictions to matrix form when there are several
-#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
+#'        prediction outputs per case. No effect if `predleaf`, `predcontrib`,
-#'        prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
+#'        or `predinteraction` is `TRUE`.
-#'        or predinteraction flags is TRUE.
+#' @param training Whether the predictions are used for training. For dart booster,
 #' @param training whether is the prediction result used for training.  For dart booster,
 #'        training predicting will perform dropout.
-#' @param iterationrange Specifies which layer of trees are used in prediction.  For
+#' @param iterationrange Specifies which trees are used in prediction. For
-#'        example, if a random forest is trained with 100 rounds.  Specifying
+#'        example, take a random forest with 100 rounds.
-#'        `iterationrange=(1, 21)`, then only the forests built during [1, 21) (half open set)
+#'        With `iterationrange=c(1, 21)`, only the trees built during `[1, 21)` (half open set)
-#'        rounds are used in this prediction.  It's 1-based index just like R vector.  When set
+#'        rounds are used in this prediction. The index is 1-based just like an R vector. When set
-#'        to \code{c(1, 1)} XGBoost will use all trees.
+#'        to `c(1, 1)`, XGBoost will use all trees.
-#' @param strict_shape  Default is \code{FALSE}. When it's set to \code{TRUE}, output
+#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
-#'        type and shape of prediction are invariant to model type.
+#'        type and shape of predictions are invariant to the model type.
 #'
 #' @param ... Not used.
 #'
 #' @details
 #'
-#' Note that \code{iterationrange} would currently do nothing for predictions from gblinear,
+#' Note that `iterationrange` would currently do nothing for predictions from "gblinear",
-#' since gblinear doesn't keep its boosting history.
+#' since "gblinear" doesn't keep its boosting history.
 #'
-#' One possible practical applications of the \code{predleaf} option is to use the model
+#' One possible practical applications of the `predleaf` option is to use the model
 #' as a generator of new features which capture non-linearity and interactions,
-#' e.g., as implemented in \code{\link{xgb.create.features}}.
+#' e.g., as implemented in [xgb.create.features()].
 #'
-#' Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
+#' Setting `predcontrib = TRUE` allows to calculate contributions of each feature to
 #' individual predictions. For "gblinear" booster, feature contributions are simply linear terms
 #' (feature_beta * feature_value). For "gbtree" booster, feature contributions are SHAP
 #' values (Lundberg 2017) that sum to the difference between the expected output
 #' of the model and the current prediction (where the hessian weights are used to compute the expectations).
-#' Setting \code{approxcontrib = TRUE} approximates these values following the idea explained
+#' Setting `approxcontrib = TRUE` approximates these values following the idea explained
 #' in \url{http://blog.datadive.net/interpreting-random-forests/}.
 #'
-#' With \code{predinteraction = TRUE}, SHAP values of contributions of interaction of each pair of features
+#' With `predinteraction = TRUE`, SHAP values of contributions of interaction of each pair of features
 #' are computed. Note that this operation might be rather expensive in terms of compute and memory.
 #' Since it quadratically depends on the number of features, it is recommended to perform selection
 #' of the most important features first. See below about the format of the returned results.
 #'
-#' The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
+#' The `predict()` method uses as many threads as defined in `xgb.Booster` object (all by default).
-#' If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
+#' If you want to change their number, assign a new number to `nthread` using [xgb.parameters<-()].
-#' Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
+#' Note that converting a matrix to [xgb.DMatrix()] uses multiple threads too.
 #'
 #' @return
-#' The return type is different depending whether \code{strict_shape} is set to \code{TRUE}.  By default,
+#' The return type depends on `strict_shape`. If `FALSE` (default):
-#' for regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
+#' - For regression or binary classification: A vector of length `nrows(newdata)`.
-#' For multiclass classification, either a \code{num_class * nrows(newdata)} vector or
+#' - For multiclass classification: A vector of length `num_class * nrows(newdata)` or
-#' a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on
+#'   a `(nrows(newdata), num_class)` matrix, depending on the `reshape` value.
-#' the \code{reshape} value.
+#' - When `predleaf = TRUE`: A matrix with one column per tree.
-#'
+#' - When `predcontrib = TRUE`: When not multiclass, a matrix with
-#' When \code{predleaf = TRUE}, the output is a matrix object with the
+#' ` num_features + 1` columns. The last "+ 1" column corresponds to the baseline value.
-#' number of columns corresponding to the number of trees.
+#'   In the multiclass case, a list of `num_class` such matrices.
-#'
+#'   The contribution values are on the scale of untransformed margin
-#' When \code{predcontrib = TRUE} and it is not a multiclass setting, the output is a matrix object with
+#'   (e.g., for binary classification, the values are log-odds deviations from the baseline).
-#' \code{num_features + 1} columns. The last "+ 1" column in a matrix corresponds to bias.
+#' - When `predinteraction = TRUE`: When not multiclass, the output is a 3d array of
-#' For a multiclass case, a list of \code{num_class} elements is returned, where each element is
+#'   dimension `c(nrow, num_features + 1, num_features + 1)`. The off-diagonal (in the last two dimensions)
-#' such a matrix. The contribution values are on the scale of untransformed margin
+#'   elements represent different feature interaction contributions. The array is symmetric WRT the last
-#' (e.g., for binary classification would mean that the contributions are log-odds deviations from bias).
+#'   two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should
-#'
+#'   produce practically the same result as `predcontrib = TRUE`.
-#' When \code{predinteraction = TRUE} and it is not a multiclass setting, the output is a 3d array with
+#'   In the multiclass case, a list of `num_class` such arrays.
 #' dimensions \code{c(nrow, num_features + 1, num_features + 1)}. The off-diagonal (in the last two dimensions)
 #' elements represent different features interaction contributions. The array is symmetric WRT the last
 #' two dimensions. The "+ 1" columns corresponds to bias. Summing this array along the last dimension should
 #' produce practically the same result as predict with \code{predcontrib = TRUE}.
 #' For a multiclass case, a list of \code{num_class} elements is returned, where each element is
 #' such an array.
 #'
 #' When \code{strict_shape} is set to \code{TRUE}, the output is always an array.  For
 #' normal prediction, the output is a 2-dimension array \code{(num_class, nrow(newdata))}.
 #'
 #' For \code{predcontrib = TRUE}, output is \code{(ncol(newdata) + 1, num_class, nrow(newdata))}
 #' For \code{predinteraction = TRUE}, output is \code{(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))}
 #' For \code{predleaf = TRUE}, output is \code{(n_trees_in_forest, num_class, n_iterations, nrow(newdata))}
 #'
 #' @seealso
 #' \code{\link{xgb.train}}.
 #'
 #' When `strict_shape = TRUE`, the output is always an array:
 #' - For normal predictions, the output has dimension `(num_class, nrow(newdata))`.
 #' - For `predcontrib = TRUE`, the dimension is `(ncol(newdata) + 1, num_class, nrow(newdata))`.
 #' - For `predinteraction = TRUE`, the dimension is `(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))`.
 #' - For `predleaf = TRUE`, the dimension is `(n_trees_in_forest, num_class, n_iterations, nrow(newdata))`.
 #' @seealso [xgb.train()]
 #' @references
-#'
+#' 1. Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions",
-#' Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
+#'   NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
-#'
+#' 2. Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles",
-#' Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles", \url{https://arxiv.org/abs/1706.06060}
+#'   \url{https://arxiv.org/abs/1706.06060}
 #'
 #' @examples
 #' ## binary classification:
 #'
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
-#' data(agaricus.test, package='xgboost')
+#' data(agaricus.test, package = "xgboost")
 #'
 #' ## Keep the number of threads to 2 for examples
 #' nthread <- 2
@@ -275,8 +272,16 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' train <- agaricus.train
 #' test <- agaricus.test
 #'
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+#' bst <- xgboost(
-#'                eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
+#'   data = train$data,
 #'   label = train$label,
 #'   max_depth = 2,
 #'   eta = 0.5,
 #'   nthread = nthread,
 #'   nrounds = 5,
 #'   objective = "binary:logistic"
 #' )
 #'
 #' # use all trees by default
 #' pred <- predict(bst, test$data)
 #' # use only the 1st tree
@@ -308,10 +313,21 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #'
 #' lb <- as.numeric(iris$Species) - 1
 #' num_class <- 3
 #'
 #' set.seed(11)
-#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
+#'
-#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
+#' bst <- xgboost(
-#'                objective = "multi:softprob", num_class = num_class)
+#'   data = as.matrix(iris[, -5]),
 #'   label = lb,
 #'   max_depth = 4,
 #'   eta = 0.5,
 #'   nthread = 2,
 #'   nrounds = 10,
 #'   subsample = 0.5,
 #'   objective = "multi:softprob",
 #'   num_class = num_class
 #' )
 #'
 #' # predict for softmax returns num_class probability numbers per case:
 #' pred <- predict(bst, as.matrix(iris[, -5]))
 #' str(pred)
@@ -322,11 +338,21 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' # the following should result in the same error as seen in the last iteration
 #' sum(pred_labels != lb) / length(lb)
 #'
-#' # compare that to the predictions from softmax:
+#' # compare with predictions from softmax:
 #' set.seed(11)
-#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
+#'
-#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
+#' bst <- xgboost(
-#'                objective = "multi:softmax", num_class = num_class)
+#'   data = as.matrix(iris[, -5]),
 #'   label = lb,
 #'   max_depth = 4,
 #'   eta = 0.5,
 #'   nthread = 2,
 #'   nrounds = 10,
 #'   subsample = 0.5,
 #'   objective = "multi:softmax",
 #'   num_class = num_class
 #' )
 #'
 #' pred <- predict(bst, as.matrix(iris[, -5]))
 #' str(pred)
 #' all.equal(pred, pred_labels)
@@ -497,63 +523,69 @@ predict.xgb.Booster.handle <- function(object, ...) {
 }
-#' Accessors for serializable attributes of a model.
+#' Accessors for serializable attributes of a model
 #'
 #' These methods allow to manipulate the key-value attribute strings of an xgboost model.
 #'
-#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
+#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
-#' @param name a non-empty character string specifying which attribute is to be accessed.
+#' @param name A non-empty character string specifying which attribute is to be accessed.
-#' @param value a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-}
+#' @param value For `xgb.attr<-`, a value of an attribute; for `xgb.attributes<-`,
-#'        it's a list (or an object coercible to a list) with the names of attributes to set
+#'        it is a list (or an object coercible to a list) with the names of attributes to set
 #'        and the elements corresponding to attribute values.
 #'        Non-character values are converted to character.
-#'        When attribute value is not a scalar, only the first index is used.
+#'        When an attribute value is not a scalar, only the first index is used.
-#'        Use \code{NULL} to remove an attribute.
+#'        Use `NULL` to remove an attribute.
 #'
 #' @details
-#' The primary purpose of xgboost model attributes is to store some meta-data about the model.
+#' The primary purpose of xgboost model attributes is to store some meta data about the model.
 #' Note that they are a separate concept from the object attributes in R.
 #' Specifically, they refer to key-value strings that can be attached to an xgboost model,
 #' stored together with the model's binary representation, and accessed later
 #' (from R or any other interface).
-#' In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
+#' In contrast, any R attribute assigned to an R object of `xgb.Booster` class
-#' would not be saved by \code{xgb.save} because an xgboost model is an external memory object
+#' would not be saved by [xgb.save()] because an xgboost model is an external memory object
 #' and its serialization is handled externally.
 #' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
 #' change the value of that parameter for a model.
-#' Use \code{\link{xgb.parameters<-}} to set or change model parameters.
+#' Use [xgb.parameters<-()] to set or change model parameters.
 #'
-#' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
+#' The attribute setters would usually work more efficiently for `xgb.Booster.handle`
-#' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
+#' than for `xgb.Booster`, since only just a handle (pointer) would need to be copied.
 #' That would only matter if attributes need to be set many times.
-#' Note, however, that when feeding a handle of an \code{xgb.Booster} object to the attribute setters,
+#' Note, however, that when feeding a handle of an `xgb.Booster` object to the attribute setters,
-#' the raw model cache of an \code{xgb.Booster} object would not be automatically updated,
+#' the raw model cache of an `xgb.Booster` object would not be automatically updated,
-#' and it would be user's responsibility to call \code{xgb.serialize} to update it.
+#' and it would be the user's responsibility to call [xgb.serialize()] to update it.
 #'
-#' The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
+#' The `xgb.attributes<-` setter either updates the existing or adds one or several attributes,
 #' but it doesn't delete the other existing attributes.
 #'
 #' @return
-#' \code{xgb.attr} returns either a string value of an attribute
+#' - `xgb.attr()` returns either a string value of an attribute
-#' or \code{NULL} if an attribute wasn't stored in a model.
+#'   or `NULL` if an attribute wasn't stored in a model.
-#'
+#' - `xgb.attributes()` returns a list of all attributes stored in a model
-#' \code{xgb.attributes} returns a list of all attribute stored in a model
+#'   or `NULL` if a model has no stored attributes.
 #' or \code{NULL} if a model has no stored attributes.
 #'
 #' @examples
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
 #'
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+#' bst <- xgboost(
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#'   data = train$data,
 #'   label = train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = 2,
 #'   nrounds = 2,
 #'   objective = "binary:logistic"
 #' )
 #'
 #' xgb.attr(bst, "my_attribute") <- "my attribute value"
 #' print(xgb.attr(bst, "my_attribute"))
 #' xgb.attributes(bst) <- list(a = 123, b = "abc")
 #'
-#' xgb.save(bst, 'xgb.model')
+#' xgb.save(bst, "xgb.model")
-#' bst1 <- xgb.load('xgb.model')
+#' bst1 <- xgb.load("xgb.model")
-#' if (file.exists('xgb.model')) file.remove('xgb.model')
+#' if (file.exists("xgb.model")) file.remove("xgb.model")
 #' print(xgb.attr(bst1, "my_attribute"))
 #' print(xgb.attributes(bst1))
 #'
@@ -632,22 +664,29 @@ xgb.attributes <- function(object) {
  object
 }
-#' Accessors for model parameters as JSON string.
+#' Accessors for model parameters as JSON string
 #'
-#' @param object Object of class \code{xgb.Booster}
+#' @param object Object of class `xgb.Booster`.
 #' @param value A JSON string.
 #'
 #' @examples
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
 #'
 #' ## Keep the number of threads to 1 for examples
 #' nthread <- 1
 #' data.table::setDTthreads(nthread)
 #' train <- agaricus.train
 #'
 #' bst <- xgboost(
-#'   data = train$data, label = train$label, max_depth = 2,
+#'   data = train$data,
-#'   eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+#'   label = train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = nthread,
 #'   nrounds = 2,
 #'   objective = "binary:logistic"
 #' )
 #'
 #' config <- xgb.config(bst)
 #'
 #' @rdname xgb.config
@@ -667,24 +706,31 @@ xgb.config <- function(object) {
  object
 }
-#' Accessors for model parameters.
+#' Accessors for model parameters
 #'
 #' Only the setter for xgboost parameters is currently implemented.
 #'
-#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
+#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
-#' @param value a list (or an object coercible to a list) with the names of parameters to set
+#' @param value A list (or an object coercible to a list) with the names of parameters to set
 #'        and the elements corresponding to parameter values.
 #'
 #' @details
-#' Note that the setter would usually work more efficiently for \code{xgb.Booster.handle}
+#' Note that the setter would usually work more efficiently for `xgb.Booster.handle`
-#' than for \code{xgb.Booster}, since only just a handle would need to be copied.
+#' than for `xgb.Booster`, since only just a handle would need to be copied.
 #'
 #' @examples
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
 #'
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+#' bst <- xgboost(
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#'   data = train$data,
 #'   label = train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = 2,
 #'   nrounds = 2,
 #'   objective = "binary:logistic"
 #' )
 #'
 #' xgb.parameters(bst) <- list(eta = 0.1)
 #'
@@ -724,23 +770,31 @@ xgb.ntree <- function(bst) {
 #' Print xgb.Booster
 #'
-#' Print information about xgb.Booster.
+#' Print information about `xgb.Booster`.
 #'
-#' @param x an xgb.Booster object
+#' @param x An `xgb.Booster` object.
-#' @param verbose whether to print detailed data (e.g., attribute values)
+#' @param verbose Whether to print detailed data (e.g., attribute values).
-#' @param ... not currently used
+#' @param ... Not currently used.
 #'
 #' @examples
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+#'
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' bst <- xgboost(
-#' attr(bst, 'myattr') <- 'memo'
+#'   data = train$data,
 #'   label = train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = 2,
 #'   nrounds = 2,
 #'   objective = "binary:logistic"
 #' )
 #'
 #' attr(bst, "myattr") <- "memo"
 #'
 #' print(bst)
 #' print(bst, verbose = TRUE)
 #'
 #' @method print xgb.Booster
 #' @export
 print.xgb.Booster <- function(x, verbose = FALSE, ...) {
  cat('##### xgb.Booster\n')
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@@ -51,7 +51,7 @@
 #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
 #'
-#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
+#' param <- list(max_depth=2, eta=1, objective='binary:logistic')
 #' nrounds = 4
 #'
 #' bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
--- a/R-package/R/xgb.plot.shap.R
+++ b/R-package/R/xgb.plot.shap.R
@@ -7,7 +7,7 @@
 #'          \code{data}. When it is NULL, it is computed internally using \code{model} and \code{data}.
 #' @param features a vector of either column indices or of feature names to plot. When it is NULL,
 #'          feature importance is calculated, and \code{top_n} high ranked features are taken.
-#' @param top_n when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.
+#' @param top_n when \code{features} is NULL, top_n `[1, 100]` most important features in a model are taken.
 #' @param model an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
 #'          or \code{features} is missing.
 #' @param trees passed to \code{\link{xgb.importance}} when \code{features = NULL}.
@@ -197,7 +197,7 @@ xgb.plot.shap <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
 #' hence allows us to see which features have a negative / positive contribution
 #' on the model prediction, and whether the contribution is different for larger
 #' or smaller values of the feature. We effectively try to replicate the
-#' \code{summary_plot} function from https://github.com/shap/shap.
+#' \code{summary_plot} function from <https://github.com/shap/shap>.
 #'
 #' @inheritParams xgb.plot.shap
 #'
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -40,10 +40,10 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
 #' }
 #'
 #' @references
-#' https://archive.ics.uci.edu/ml/datasets/Mushroom
+#' <https://archive.ics.uci.edu/ml/datasets/Mushroom>
 #'
 #' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+#' <http://archive.ics.uci.edu/ml>. Irvine, CA: University of California,
 #' School of Information and Computer Science.
 #'
 #' @docType data
@@ -67,10 +67,10 @@ NULL
 #' }
 #'
 #' @references
-#' https://archive.ics.uci.edu/ml/datasets/Mushroom
+#' <https://archive.ics.uci.edu/ml/datasets/Mushroom>
 #'
 #' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+#' <http://archive.ics.uci.edu/ml>. Irvine, CA: University of California,
 #' School of Information and Computer Science.
 #'
 #' @docType data
--- a/R-package/man/agaricus.test.Rd
+++ b/R-package/man/agaricus.test.Rd
@@ -24,10 +24,10 @@ This data set includes the following fields:
 }
 }
 \references{
-https://archive.ics.uci.edu/ml/datasets/Mushroom
+\url{https://archive.ics.uci.edu/ml/datasets/Mushroom}
 Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+\url{http://archive.ics.uci.edu/ml}. Irvine, CA: University of California,
 School of Information and Computer Science.
 }
 \keyword{datasets}
--- a/R-package/man/agaricus.train.Rd
+++ b/R-package/man/agaricus.train.Rd
@@ -24,10 +24,10 @@ This data set includes the following fields:
 }
 }
 \references{
-https://archive.ics.uci.edu/ml/datasets/Mushroom
+\url{https://archive.ics.uci.edu/ml/datasets/Mushroom}
 Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+\url{http://archive.ics.uci.edu/ml}. Irvine, CA: University of California,
 School of Information and Computer Science.
 }
 \keyword{datasets}
--- a/R-package/man/cb.save.model.Rd
+++ b/R-package/man/cb.save.model.Rd
@@ -13,7 +13,7 @@ cb.save.model(save_period = 0, save_name = "xgboost.model")
 \item{save_name}{the name or path for the saved model file.
 It can contain a \code{\link[base]{sprintf}} formatting specifier
 to include the integer iteration number in the file name.
-E.g., with \code{save_name} = 'xgboost_%04d.model',
+E.g., with \code{save_name} = 'xgboost_\%04d.model',
 the file saved at iteration 50 would be named "xgboost_0050.model".}
 }
 \description{
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -3,7 +3,7 @@
 \name{predict.xgb.Booster}
 \alias{predict.xgb.Booster}
 \alias{predict.xgb.Booster.handle}
-\title{Predict method for eXtreme Gradient Boosting model}
+\title{Predict method for XGBoost model}
 \usage{
 \method{predict}{xgb.Booster}(
  object,
@@ -25,90 +25,86 @@
 \method{predict}{xgb.Booster.handle}(object, ...)
 }
 \arguments{
-\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}}
+\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
-\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
+\item{newdata}{Takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
-       local data file or \code{xgb.DMatrix}.
+local data file, or \code{xgb.DMatrix}.
 For single-row predictions on sparse data, it is recommended to use the CSR format.
 If passing a sparse vector, it will take it as a row vector.}
-       For single-row predictions on sparse data, it's recommended to use CSR format. If passing
+\item{missing}{Only used when input is a dense matrix. Pick a float value that represents
-       a sparse vector, it will take it as a row vector.}
+missing values in data (e.g., 0 or some other extreme value).}
-\item{missing}{Missing is only used when input is dense matrix. Pick a float value that represents
+\item{outputmargin}{Whether the prediction should be returned in the form of original untransformed
 missing values in data (e.g., sometimes 0 or some other extreme value is used).}
 \item{outputmargin}{whether the prediction should be returned in the for of original untransformed
 sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
-logistic regression would result in predictions for log-odds instead of probabilities.}
+logistic regression would return log-odds instead of probabilities.}
 \item{ntreelimit}{Deprecated, use \code{iterationrange} instead.}
-\item{predleaf}{whether predict leaf index.}
+\item{predleaf}{Whether to predict pre-tree leaf indices.}
-\item{predcontrib}{whether to return feature contributions to individual predictions (see Details).}
+\item{predcontrib}{Whether to return feature contributions to individual predictions (see Details).}
-\item{approxcontrib}{whether to use a fast approximation for feature contributions (see Details).}
+\item{approxcontrib}{Whether to use a fast approximation for feature contributions (see Details).}
-\item{predinteraction}{whether to return contributions of feature interactions to individual predictions (see Details).}
+\item{predinteraction}{Whether to return contributions of feature interactions to individual predictions (see Details).}
-\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
+\item{reshape}{Whether to reshape the vector of predictions to matrix form when there are several
-prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
+prediction outputs per case. No effect if \code{predleaf}, \code{predcontrib},
-or predinteraction flags is TRUE.}
+or \code{predinteraction} is \code{TRUE}.}
-\item{training}{whether is the prediction result used for training.  For dart booster,
+\item{training}{Whether the predictions are used for training. For dart booster,
 training predicting will perform dropout.}
-\item{iterationrange}{Specifies which layer of trees are used in prediction.  For
+\item{iterationrange}{Specifies which trees are used in prediction. For
-example, if a random forest is trained with 100 rounds.  Specifying
+example, take a random forest with 100 rounds.
-`iterationrange=(1, 21)`, then only the forests built during [1, 21) (half open set)
+With \code{iterationrange=c(1, 21)}, only the trees built during \verb{[1, 21)} (half open set)
-rounds are used in this prediction.  It's 1-based index just like R vector.  When set
+rounds are used in this prediction. The index is 1-based just like an R vector. When set
-to \code{c(1, 1)} XGBoost will use all trees.}
+to \code{c(1, 1)}, XGBoost will use all trees.}
-\item{strict_shape}{Default is \code{FALSE}. When it's set to \code{TRUE}, output
+\item{strict_shape}{Default is \code{FALSE}. When set to \code{TRUE}, the output
-type and shape of prediction are invariant to model type.}
+type and shape of predictions are invariant to the model type.}
 \item{...}{Not used.}
 }
 \value{
-The return type is different depending whether \code{strict_shape} is set to \code{TRUE}.  By default,
+The return type depends on \code{strict_shape}. If \code{FALSE} (default):
-for regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
+\itemize{
-For multiclass classification, either a \code{num_class * nrows(newdata)} vector or
+\item For regression or binary classification: A vector of length \code{nrows(newdata)}.
-a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on
+\item For multiclass classification: A vector of length \code{num_class * nrows(newdata)} or
-the \code{reshape} value.
+a \verb{(nrows(newdata), num_class)} matrix, depending on the \code{reshape} value.
 \item When \code{predleaf = TRUE}: A matrix with one column per tree.
 \item When \code{predcontrib = TRUE}: When not multiclass, a matrix with
 \code{ num_features + 1} columns. The last "+ 1" column corresponds to the baseline value.
 In the multiclass case, a list of \code{num_class} such matrices.
 The contribution values are on the scale of untransformed margin
 (e.g., for binary classification, the values are log-odds deviations from the baseline).
 \item When \code{predinteraction = TRUE}: When not multiclass, the output is a 3d array of
 dimension \code{c(nrow, num_features + 1, num_features + 1)}. The off-diagonal (in the last two dimensions)
 elements represent different feature interaction contributions. The array is symmetric WRT the last
 two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should
 produce practically the same result as \code{predcontrib = TRUE}.
 In the multiclass case, a list of \code{num_class} such arrays.
 }
-When \code{predleaf = TRUE}, the output is a matrix object with the
+When \code{strict_shape = TRUE}, the output is always an array:
-number of columns corresponding to the number of trees.
+\itemize{
-
+\item For normal predictions, the output has dimension \verb{(num_class, nrow(newdata))}.
-When \code{predcontrib = TRUE} and it is not a multiclass setting, the output is a matrix object with
+\item For \code{predcontrib = TRUE}, the dimension is \verb{(ncol(newdata) + 1, num_class, nrow(newdata))}.
-\code{num_features + 1} columns. The last "+ 1" column in a matrix corresponds to bias.
+\item For \code{predinteraction = TRUE}, the dimension is \verb{(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))}.
-For a multiclass case, a list of \code{num_class} elements is returned, where each element is
+\item For \code{predleaf = TRUE}, the dimension is \verb{(n_trees_in_forest, num_class, n_iterations, nrow(newdata))}.
-such a matrix. The contribution values are on the scale of untransformed margin
+}
 (e.g., for binary classification would mean that the contributions are log-odds deviations from bias).
 When \code{predinteraction = TRUE} and it is not a multiclass setting, the output is a 3d array with
 dimensions \code{c(nrow, num_features + 1, num_features + 1)}. The off-diagonal (in the last two dimensions)
 elements represent different features interaction contributions. The array is symmetric WRT the last
 two dimensions. The "+ 1" columns corresponds to bias. Summing this array along the last dimension should
 produce practically the same result as predict with \code{predcontrib = TRUE}.
 For a multiclass case, a list of \code{num_class} elements is returned, where each element is
 such an array.
 When \code{strict_shape} is set to \code{TRUE}, the output is always an array.  For
 normal prediction, the output is a 2-dimension array \code{(num_class, nrow(newdata))}.
 For \code{predcontrib = TRUE}, output is \code{(ncol(newdata) + 1, num_class, nrow(newdata))}
 For \code{predinteraction = TRUE}, output is \code{(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))}
 For \code{predleaf = TRUE}, output is \code{(n_trees_in_forest, num_class, n_iterations, nrow(newdata))}
 }
 \description{
 Predicted values based on either xgboost model or model handle object.
 }
 \details{
-Note that \code{iterationrange} would currently do nothing for predictions from gblinear,
+Note that \code{iterationrange} would currently do nothing for predictions from "gblinear",
-since gblinear doesn't keep its boosting history.
+since "gblinear" doesn't keep its boosting history.
 One possible practical applications of the \code{predleaf} option is to use the model
 as a generator of new features which capture non-linearity and interactions,
-e.g., as implemented in \code{\link{xgb.create.features}}.
+e.g., as implemented in \code{\link[=xgb.create.features]{xgb.create.features()}}.
 Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
 individual predictions. For "gblinear" booster, feature contributions are simply linear terms
@@ -124,14 +120,14 @@ Since it quadratically depends on the number of features, it is recommended to p
 of the most important features first. See below about the format of the returned results.
 The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
-If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
+If you want to change their number, assign a new number to \code{nthread} using \code{\link[=xgb.parameters<-]{xgb.parameters<-()}}.
-Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
+Note that converting a matrix to \code{\link[=xgb.DMatrix]{xgb.DMatrix()}} uses multiple threads too.
 }
 \examples{
 ## binary classification:
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
-data(agaricus.test, package='xgboost')
+data(agaricus.test, package = "xgboost")
 ## Keep the number of threads to 2 for examples
 nthread <- 2
@@ -140,8 +136,16 @@ data.table::setDTthreads(nthread)
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+bst <- xgboost(
-               eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
+  data = train$data,
  label = train$label,
  max_depth = 2,
  eta = 0.5,
  nthread = nthread,
  nrounds = 5,
  objective = "binary:logistic"
 )
 # use all trees by default
 pred <- predict(bst, test$data)
 # use only the 1st tree
@@ -173,10 +177,21 @@ par(mar = old_mar)
 lb <- as.numeric(iris$Species) - 1
 num_class <- 3
 set.seed(11)
-bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
+
-               max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
+bst <- xgboost(
-               objective = "multi:softprob", num_class = num_class)
+  data = as.matrix(iris[, -5]),
  label = lb,
  max_depth = 4,
  eta = 0.5,
  nthread = 2,
  nrounds = 10,
  subsample = 0.5,
  objective = "multi:softprob",
  num_class = num_class
 )
 # predict for softmax returns num_class probability numbers per case:
 pred <- predict(bst, as.matrix(iris[, -5]))
 str(pred)
@@ -187,11 +202,21 @@ pred_labels <- max.col(pred) - 1
 # the following should result in the same error as seen in the last iteration
 sum(pred_labels != lb) / length(lb)
-# compare that to the predictions from softmax:
+# compare with predictions from softmax:
 set.seed(11)
-bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
+
-               max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
+bst <- xgboost(
-               objective = "multi:softmax", num_class = num_class)
+  data = as.matrix(iris[, -5]),
  label = lb,
  max_depth = 4,
  eta = 0.5,
  nthread = 2,
  nrounds = 10,
  subsample = 0.5,
  objective = "multi:softmax",
  num_class = num_class
 )
 pred <- predict(bst, as.matrix(iris[, -5]))
 str(pred)
 all.equal(pred, pred_labels)
@@ -202,10 +227,13 @@ sum(pred5 != lb)/length(lb)
 }
 \references{
-Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
+\enumerate{
-
+\item Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions",
-Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles", \url{https://arxiv.org/abs/1706.06060}
+NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
 \item Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles",
 \url{https://arxiv.org/abs/1706.06060}
 }
 }
 \seealso{
-\code{\link{xgb.train}}.
+\code{\link[=xgb.train]{xgb.train()}}
 }
--- a/R-package/man/print.xgb.Booster.Rd
+++ b/R-package/man/print.xgb.Booster.Rd
@@ -7,21 +7,30 @@
 \method{print}{xgb.Booster}(x, verbose = FALSE, ...)
 }
 \arguments{
-\item{x}{an xgb.Booster object}
+\item{x}{An \code{xgb.Booster} object.}
-\item{verbose}{whether to print detailed data (e.g., attribute values)}
+\item{verbose}{Whether to print detailed data (e.g., attribute values).}
-\item{...}{not currently used}
+\item{...}{Not currently used.}
 }
 \description{
-Print information about xgb.Booster.
+Print information about \code{xgb.Booster}.
 }
 \examples{
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
 train <- agaricus.train
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+bst <- xgboost(
-attr(bst, 'myattr') <- 'memo'
+  data = train$data,
  label = train$label,
  max_depth = 2,
  eta = 1,
  nthread = 2,
  nrounds = 2,
  objective = "binary:logistic"
 )
 attr(bst, "myattr") <- "memo"
 print(bst)
 print(bst, verbose = TRUE)
--- a/R-package/man/xgb.Booster.complete.Rd
+++ b/R-package/man/xgb.Booster.complete.Rd
@@ -2,14 +2,14 @@
 % Please edit documentation in R/xgb.Booster.R
 \name{xgb.Booster.complete}
 \alias{xgb.Booster.complete}
-\title{Restore missing parts of an incomplete xgb.Booster object.}
+\title{Restore missing parts of an incomplete xgb.Booster object}
 \usage{
 xgb.Booster.complete(object, saveraw = TRUE)
 }
 \arguments{
-\item{object}{object of class \code{xgb.Booster}}
+\item{object}{Object of class \code{xgb.Booster}.}
-\item{saveraw}{a flag indicating whether to append \code{raw} Booster memory dump data
+\item{saveraw}{A flag indicating whether to append \code{raw} Booster memory dump data
 when it doesn't already exist.}
 }
 \value{
@@ -27,15 +27,24 @@ While this method is primarily for internal use, it might be useful in some prac
 E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
 its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
 should still work for such a model object since those methods would be using
-\code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
+\code{xgb.Booster.complete()} internally. However, one might find it to be more efficient to call the
-\code{xgb.Booster.complete} function explicitly once after loading a model as an R-object.
+\code{xgb.Booster.complete()} function explicitly once after loading a model as an R-object.
 That would prevent further repeated implicit reconstruction of an internal booster model.
 }
 \examples{
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
+
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+bst <- xgboost(
  data = agaricus.train$data,
  label = agaricus.train$label,
  max_depth = 2,
  eta = 1,
  nthread = 2,
  nrounds = 2,
  objective = "binary:logistic"
 )
 saveRDS(bst, "xgb.model.rds")
 # Warning: The resulting RDS file is only compatible with the current XGBoost version.
--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@@ -38,7 +38,8 @@ so it doesn't make sense to assign weights to individual data points.}
 \item{base_margin}{Base margin used for boosting from existing model.
-       In the case of multi-output models, one can also pass multi-dimensional base_margin.}
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   In the case of multi-output models, one can also pass multi-dimensional base_margin.
 }\if{html}{\out{</div>}}}
 \item{missing}{a float value to represents missing values in data (used only when input is a dense matrix).
 It is useful when a 0 or some other extreme value represents missing values in data.}
@@ -62,7 +63,7 @@ frame and matrix.}
 \item{enable_categorical}{Experimental support of specializing for categorical features.
-                          If passing 'TRUE' and 'data' is a data frame,
+\if{html}{\out{<div class="sourceCode">}}\preformatted{                      If passing 'TRUE' and 'data' is a data frame,
                      columns of categorical types will automatically
                      be set to be of categorical type (feature_type='c') in the resulting DMatrix.
@@ -71,7 +72,8 @@ frame and matrix.}
                      If 'data' is not a data frame, this argument is ignored.
-                          JSON/UBJSON serialization format is required for this.}
+                      JSON/UBJSON serialization format is required for this.
 }\if{html}{\out{</div>}}}
 }
 \description{
 Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file.
--- a/R-package/man/xgb.attr.Rd
+++ b/R-package/man/xgb.attr.Rd
@@ -5,7 +5,7 @@
 \alias{xgb.attr<-}
 \alias{xgb.attributes}
 \alias{xgb.attributes<-}
-\title{Accessors for serializable attributes of a model.}
+\title{Accessors for serializable attributes of a model}
 \usage{
 xgb.attr(object, name)
@@ -18,62 +18,70 @@ xgb.attributes(object) <- value
 \arguments{
 \item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
-\item{name}{a non-empty character string specifying which attribute is to be accessed.}
+\item{name}{A non-empty character string specifying which attribute is to be accessed.}
-\item{value}{a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-}
+\item{value}{For \verb{xgb.attr<-}, a value of an attribute; for \verb{xgb.attributes<-},
-it's a list (or an object coercible to a list) with the names of attributes to set
+it is a list (or an object coercible to a list) with the names of attributes to set
 and the elements corresponding to attribute values.
 Non-character values are converted to character.
-When attribute value is not a scalar, only the first index is used.
+When an attribute value is not a scalar, only the first index is used.
 Use \code{NULL} to remove an attribute.}
 }
 \value{
-\code{xgb.attr} returns either a string value of an attribute
+\itemize{
 \item \code{xgb.attr()} returns either a string value of an attribute
 or \code{NULL} if an attribute wasn't stored in a model.
-
+\item \code{xgb.attributes()} returns a list of all attributes stored in a model
 \code{xgb.attributes} returns a list of all attribute stored in a model
 or \code{NULL} if a model has no stored attributes.
 }
 }
 \description{
 These methods allow to manipulate the key-value attribute strings of an xgboost model.
 }
 \details{
-The primary purpose of xgboost model attributes is to store some meta-data about the model.
+The primary purpose of xgboost model attributes is to store some meta data about the model.
 Note that they are a separate concept from the object attributes in R.
 Specifically, they refer to key-value strings that can be attached to an xgboost model,
 stored together with the model's binary representation, and accessed later
 (from R or any other interface).
-In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
+In contrast, any R attribute assigned to an R object of \code{xgb.Booster} class
-would not be saved by \code{xgb.save} because an xgboost model is an external memory object
+would not be saved by \code{\link[=xgb.save]{xgb.save()}} because an xgboost model is an external memory object
 and its serialization is handled externally.
 Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
 change the value of that parameter for a model.
-Use \code{\link{xgb.parameters<-}} to set or change model parameters.
+Use \code{\link[=xgb.parameters<-]{xgb.parameters<-()}} to set or change model parameters.
 The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
 than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
 That would only matter if attributes need to be set many times.
 Note, however, that when feeding a handle of an \code{xgb.Booster} object to the attribute setters,
 the raw model cache of an \code{xgb.Booster} object would not be automatically updated,
-and it would be user's responsibility to call \code{xgb.serialize} to update it.
+and it would be the user's responsibility to call \code{\link[=xgb.serialize]{xgb.serialize()}} to update it.
-The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
+The \verb{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
 but it doesn't delete the other existing attributes.
 }
 \examples{
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
 train <- agaricus.train
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+bst <- xgboost(
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+  data = train$data,
  label = train$label,
  max_depth = 2,
  eta = 1,
  nthread = 2,
  nrounds = 2,
  objective = "binary:logistic"
 )
 xgb.attr(bst, "my_attribute") <- "my attribute value"
 print(xgb.attr(bst, "my_attribute"))
 xgb.attributes(bst) <- list(a = 123, b = "abc")
-xgb.save(bst, 'xgb.model')
+xgb.save(bst, "xgb.model")
-bst1 <- xgb.load('xgb.model')
+bst1 <- xgb.load("xgb.model")
-if (file.exists('xgb.model')) file.remove('xgb.model')
+if (file.exists("xgb.model")) file.remove("xgb.model")
 print(xgb.attr(bst1, "my_attribute"))
 print(xgb.attributes(bst1))
--- a/R-package/man/xgb.config.Rd
+++ b/R-package/man/xgb.config.Rd
@@ -3,31 +3,38 @@
 \name{xgb.config}
 \alias{xgb.config}
 \alias{xgb.config<-}
-\title{Accessors for model parameters as JSON string.}
+\title{Accessors for model parameters as JSON string}
 \usage{
 xgb.config(object)
 xgb.config(object) <- value
 }
 \arguments{
-\item{object}{Object of class \code{xgb.Booster}}
+\item{object}{Object of class \code{xgb.Booster}.}
 \item{value}{A JSON string.}
 }
 \description{
-Accessors for model parameters as JSON string.
+Accessors for model parameters as JSON string
 }
 \examples{
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
 ## Keep the number of threads to 1 for examples
 nthread <- 1
 data.table::setDTthreads(nthread)
 train <- agaricus.train
 bst <- xgboost(
-  data = train$data, label = train$label, max_depth = 2,
+  data = train$data,
-  eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+  label = train$label,
  max_depth = 2,
  eta = 1,
  nthread = nthread,
  nrounds = 2,
  objective = "binary:logistic"
 )
 config <- xgb.config(bst)
 }
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -48,7 +48,7 @@ be the binary vector \code{[0, 1, 0, 1, 0]}, where the first 3 entries
 correspond to the leaves of the first subtree and last 2 to
 those of the second subtree.
-[...]
+\link{...}
 We can understand boosted decision tree
 based transformation as a supervised feature encoding that
@@ -62,7 +62,7 @@ data(agaricus.test, package='xgboost')
 dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
-param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
+param <- list(max_depth=2, eta=1, objective='binary:logistic')
 nrounds = 4
 bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
--- a/R-package/man/xgb.parameters.Rd
+++ b/R-package/man/xgb.parameters.Rd
@@ -2,14 +2,14 @@
 % Please edit documentation in R/xgb.Booster.R
 \name{xgb.parameters<-}
 \alias{xgb.parameters<-}
-\title{Accessors for model parameters.}
+\title{Accessors for model parameters}
 \usage{
 xgb.parameters(object) <- value
 }
 \arguments{
 \item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
-\item{value}{a list (or an object coercible to a list) with the names of parameters to set
+\item{value}{A list (or an object coercible to a list) with the names of parameters to set
 and the elements corresponding to parameter values.}
 }
 \description{
@@ -20,11 +20,18 @@ Note that the setter would usually work more efficiently for \code{xgb.Booster.h
 than for \code{xgb.Booster}, since only just a handle would need to be copied.
 }
 \examples{
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
 train <- agaricus.train
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+bst <- xgboost(
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+  data = train$data,
  label = train$label,
  max_depth = 2,
  eta = 1,
  nthread = 2,
  nrounds = 2,
  objective = "binary:logistic"
 )
 xgb.parameters(bst) <- list(eta = 0.1)
--- a/R-package/man/xgb.plot.shap.Rd
+++ b/R-package/man/xgb.plot.shap.Rd
@@ -41,7 +41,7 @@ xgb.plot.shap(
 \item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
 feature importance is calculated, and \code{top_n} high ranked features are taken.}
-\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
+\item{top_n}{when \code{features} is NULL, top_n \verb{[1, 100]} most important features in a model are taken.}
 \item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
 or \code{features} is missing.}
--- a/R-package/man/xgb.plot.shap.summary.Rd
+++ b/R-package/man/xgb.plot.shap.summary.Rd
@@ -38,7 +38,7 @@ xgb.plot.shap.summary(
 \item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
 feature importance is calculated, and \code{top_n} high ranked features are taken.}
-\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
+\item{top_n}{when \code{features} is NULL, top_n \verb{[1, 100]} most important features in a model are taken.}
 \item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
 or \code{features} is missing.}
@@ -67,7 +67,7 @@ Each point (observation) is coloured based on its feature value. The plot
 hence allows us to see which features have a negative / positive contribution
 on the model prediction, and whether the contribution is different for larger
 or smaller values of the feature. We effectively try to replicate the
-\code{summary_plot} function from https://github.com/shap/shap.
+\code{summary_plot} function from \url{https://github.com/shap/shap}.
 }
 \examples{
 # See \code{\link{xgb.plot.shap}}.
--- a/R-package/man/xgb.shap.data.Rd
+++ b/R-package/man/xgb.shap.data.Rd
@@ -27,7 +27,7 @@ xgb.shap.data(
 \item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
 feature importance is calculated, and \code{top_n} high ranked features are taken.}
-\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
+\item{top_n}{when \code{features} is NULL, top_n \verb{[1, 100]} most important features in a model are taken.}
 \item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
 or \code{features} is missing.}
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -45,14 +45,16 @@ xgboost(
 \item{params}{the list of parameters. The complete list of parameters is
 available in the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. Below
 is a shorter summary:
-
+\enumerate{
-1. General Parameters
+\item General Parameters
 }
 \itemize{
 \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}.
 }
-
+\enumerate{
-2. Booster Parameters
+\item Booster Parameters
 }
 2.1. Parameters for Tree Booster
@@ -97,8 +99,9 @@ xgboost(
 \item \code{lambda_bias} L2 regularization term on bias. Default: 0
 \item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
 }
-
+\enumerate{
-3. Task Parameters
+\item Task Parameters
 }
 \itemize{
 \item{ \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it.
--- a/R-package/man/xgb.unserialize.Rd
+++ b/R-package/man/xgb.unserialize.Rd
@@ -11,7 +11,7 @@ xgb.unserialize(buffer, handle = NULL)
 \item{handle}{An \code{xgb.Booster.handle} object which will be overwritten with
 the new deserialized object. Must be a null handle (e.g. when loading the model through
-`readRDS`). If not provided, a new handle will be created.}
+\code{readRDS}). If not provided, a new handle will be created.}
 }
 \value{
 An \code{xgb.Booster.handle} object.