[R] Rename watchlist -> evals (#10032)

2024-03-09 23:48:06 +01:00
parent 2c13f90384
commit b023a253b4
28 changed files with 218 additions and 221 deletions
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -56,10 +56,10 @@
 #' For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
 #' \item `dtrain`: The training data for the fold (as an `xgb.DMatrix` object).
 #' \item `bst`: Rhe `xgb.Booster` object for the fold.
-#' \item `watchlist`: A list with two DMatrices, with names `train` and `test`
+#' \item `evals`: A list containing two DMatrices, with names `train` and `test`
 #' (`test` is the held-out data for the fold).
 #' \item `index`: The indices of the hold-out data for that fold (base-1 indexing),
-#' from which the `test` entry in the watchlist was obtained.
+#' from which the `test` entry in `evals` was obtained.
 #' }
 #'
 #' This object should \bold{not} be in-place modified in ways that conflict with the
@@ -78,7 +78,7 @@
 #' Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
 #' folds can be found in the `model` object.
 #'
-#' \item watchlist The evaluation watchlist, as passed under argument `watchlist` to
+#' \item evals The evaluation data, as passed under argument `evals` to
 #' \link{xgb.train}.
 #'
 #' For \link{xgb.cv}, this will always be `NULL`.
@@ -101,15 +101,15 @@
 #' \item iteration Index of the iteration number that is being executed (first iteration
 #' will be the same as parameter `begin_iteration`, then next one will add +1, and so on).
 #'
-#' \item iter_feval Evaluation metrics for the `watchlist` that was supplied, either
+#' \item iter_feval Evaluation metrics for `evals` that were supplied, either
 #' determined by the objective, or by parameter `feval`.
 #'
 #' For \link{xgb.train}, this will be a named vector with one entry per element in
-#' `watchlist`, where the names are determined as 'watchlist name' + '-' + 'metric name' - for
+#' `evals`, where the names are determined as 'evals name' + '-' + 'metric name' - for
-#' example, if `watchlist` contains an entry named "tr" and the metric is "rmse",
+#' example, if `evals` contains an entry named "tr" and the metric is "rmse",
 #' this will be a one-element vector with name "tr-rmse".
 #'
-#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(watchlist), nfolds]`,
+#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(evals), nfolds]`,
 #' where the row names will follow the same naming logic as the one-dimensional vector
 #' that is passed in \link{xgb.train}.
 #'
@@ -169,18 +169,18 @@
 #' }
 #' @examples
 #' # Example constructing a custom callback that calculates
-#' # squared error on the training data, without a watchlist,
+#' # squared error on the training data (no separate test set),
 #' # and outputs the per-iteration results.
 #' ssq_callback <- xgb.Callback(
 #'   cb_name = "ssq",
-#'   f_before_training = function(env, model, data, watchlist,
+#'   f_before_training = function(env, model, data, evals,
 #'                                begin_iteration, end_iteration) {
 #'     # A vector to keep track of a number at each iteration
 #'     env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
 #'   },
-#'   f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+#'   f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
 #'     # This calculates the sum of squared errors on the training data.
-#'     # Note that this can be better done by passing a 'watchlist' entry,
+#'     # Note that this can be better done by passing an 'evals' entry,
 #'     # but this demonstrates a way in which callbacks can be structured.
 #'     pred <- predict(model, data)
 #'     err <- pred - getinfo(data, "label")
@@ -196,7 +196,7 @@
 #'     # A return value of 'TRUE' here would signal to finalize the training
 #'     return(FALSE)
 #'   },
-#'   f_after_training = function(env, model, data, watchlist, iteration,
+#'   f_after_training = function(env, model, data, evals, iteration,
 #'                               final_feval, prev_cb_res) {
 #'     return(env$logs)
 #'   }
@@ -220,10 +220,10 @@
 xgb.Callback <- function(
  cb_name = "custom_callback",
  env = new.env(),
-  f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) NULL,
+  f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) NULL,
-  f_before_iter = function(env, model, data, watchlist, iteration) NULL,
+  f_before_iter = function(env, model, data, evals, iteration) NULL,
-  f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL,
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
-  f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) NULL
+  f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) NULL
 ) {
  stopifnot(is.null(f_before_training) || is.function(f_before_training))
  stopifnot(is.null(f_before_iter) || is.function(f_before_iter))
@@ -251,7 +251,7 @@ xgb.Callback <- function(
  callbacks,
  model,
  data,
-  watchlist,
+  evals,
  begin_iteration,
  end_iteration
 ) {
@@ -261,7 +261,7 @@ xgb.Callback <- function(
        callback$env,
        model,
        data,
-        watchlist,
+        evals,
        begin_iteration,
        end_iteration
      )
@@ -273,7 +273,7 @@ xgb.Callback <- function(
  callbacks,
  model,
  data,
-  watchlist,
+  evals,
  iteration
 ) {
  if (!length(callbacks)) {
@@ -287,7 +287,7 @@ xgb.Callback <- function(
      cb$env,
      model,
      data,
-      watchlist,
+      evals,
      iteration
    )
    if (!NROW(should_stop)) {
@@ -304,7 +304,7 @@ xgb.Callback <- function(
  callbacks,
  model,
  data,
-  watchlist,
+  evals,
  iteration,
  iter_feval
 ) {
@@ -319,7 +319,7 @@ xgb.Callback <- function(
      cb$env,
      model,
      data,
-      watchlist,
+      evals,
      iteration,
      iter_feval
    )
@@ -337,7 +337,7 @@ xgb.Callback <- function(
  callbacks,
  model,
  data,
-  watchlist,
+  evals,
  iteration,
  final_feval,
  prev_cb_res
@@ -355,7 +355,7 @@ xgb.Callback <- function(
          cb$env,
          model,
          data,
-          watchlist,
+          evals,
          iteration,
          final_feval,
          getElement(old_cb_res, cb$cb_name)
@@ -428,7 +428,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
    env = as.environment(list(period = period, showsd = showsd, is_first_call = TRUE)),
    f_before_training = NULL,
    f_before_iter = NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
      if (is.null(iter_feval)) {
        return(FALSE)
      }
@@ -439,7 +439,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
      env$is_first_call <- FALSE
      return(FALSE)
    },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
      if (is.null(final_feval)) {
        return(NULL)
      }
@@ -453,7 +453,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
 #' @title Callback for logging the evaluation history
 #' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
 #' @details This callback creates a table with per-iteration evaluation metrics (see parameters
-#' `watchlist` and `feval` in \link{xgb.train}).
+#' `evals` and `feval` in \link{xgb.train}).
 #' @details
 #' Note: in the column names of the final data.table, the dash '-' character is replaced with
 #' the underscore '_' in order to make the column names more like regular R identifiers.
@@ -462,18 +462,18 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
 xgb.cb.evaluation.log <- function() {
  xgb.Callback(
    cb_name = "evaluation_log",
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
      env$evaluation_log <- vector("list", end_iteration - begin_iteration + 1)
      env$next_log <- 1
    },
    f_before_iter = NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
      tmp <- .summarize.feval(iter_feval, TRUE)
      env$evaluation_log[[env$next_log]] <- list(iter = iteration, metrics = tmp$feval, sds = tmp$stdev)
      env$next_log <- env$next_log + 1
      return(FALSE)
    },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
      if (!NROW(env$evaluation_log)) {
        return(prev_cb_res)
      }
@@ -543,7 +543,7 @@ xgb.cb.reset.parameters <- function(new_params) {
  xgb.Callback(
    cb_name = "reset_parameters",
    env = as.environment(list(new_params = new_params)),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
      env$end_iteration <- end_iteration
      pnames <- gsub(".", "_", names(env$new_params), fixed = TRUE)
@@ -560,7 +560,7 @@ xgb.cb.reset.parameters <- function(new_params) {
        }
      }
    },
-    f_before_iter = function(env, model, data, watchlist, iteration) {
+    f_before_iter = function(env, model, data, evals, iteration) {
      pars <- lapply(env$new_params, function(p) {
        if (is.function(p)) {
          return(p(iteration, env$end_iteration))
@@ -589,9 +589,9 @@ xgb.cb.reset.parameters <- function(new_params) {
 #' @param maximize Whether to maximize the evaluation metric.
 #' @param metric_name The name of an evaluation column to use as a criteria for early
 #'        stopping. If not set, the last column would be used.
-#'        Let's say the test data in \code{watchlist} was labelled as \code{dtest},
+#'        Let's say the test data in \code{evals} was labelled as \code{dtest},
 #'        and one wants to use the AUC in test data for early stopping regardless of where
-#'        it is in the \code{watchlist}, then one of the following would need to be set:
+#'        it is in the \code{evals}, then one of the following would need to be set:
 #'        \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
 #'        All dash '-' characters in metric names are considered equivalent to '_'.
 #' @param verbose Whether to print the early stopping information.
@@ -615,7 +615,7 @@ xgb.cb.reset.parameters <- function(new_params) {
 #' base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
 #' through \link{xgb.attr} or \link{xgb.attributes}.
 #'
-#' At least one data element is required in the evaluation watchlist for early stopping to work.
+#' At least one dataset is required in `evals` for early stopping to work.
 #' @export
 xgb.cb.early.stop <- function(
  stopping_rounds,
@@ -642,15 +642,15 @@ xgb.cb.early.stop <- function(
        stopped_by_max_rounds = FALSE
      )
    ),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
-      if (inherits(model, "xgb.Booster") && !length(watchlist)) {
+      if (inherits(model, "xgb.Booster") && !length(evals)) {
-        stop("For early stopping, watchlist must have at least one element")
+        stop("For early stopping, 'evals' must have at least one element")
      }
      env$begin_iteration <- begin_iteration
      return(NULL)
    },
-    f_before_iter = function(env, model, data, watchlist, iteration) NULL,
+    f_before_iter = function(env, model, data, evals, iteration) NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
      sds <- NULL
      if (NCOL(iter_feval) > 1) {
        tmp <- .summarize.feval(iter_feval, TRUE)
@@ -729,7 +729,7 @@ xgb.cb.early.stop <- function(
      }
      return(FALSE)
    },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
      if (inherits(model, "xgb.Booster") && !env$keep_all_iter && env$best_iteration < iteration) {
        # Note: it loses the attributes after being sliced,
        # so they have to be re-assigned afterwards.
@@ -798,18 +798,18 @@ xgb.cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
  xgb.Callback(
    cb_name = "save_model",
    env = as.environment(list(save_period = save_period, save_name = save_name, last_save = 0)),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
      env$begin_iteration <- begin_iteration
    },
    f_before_iter = NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
      if (env$save_period > 0 && (iteration - env$begin_iteration) %% env$save_period == 0) {
        .save.model.w.formatted.name(model, env$save_name, iteration)
        env$last_save <- iteration
      }
      return(FALSE)
    },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
      if (env$save_period == 0 && iteration > env$last_save) {
        .save.model.w.formatted.name(model, env$save_name, iteration)
      }
@@ -840,19 +840,19 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
  xgb.Callback(
    cb_name = "cv_predict",
    env = as.environment(list(save_models = save_models, outputmargin = outputmargin)),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
      if (inherits(model, "xgb.Booster")) {
        stop("'cv.predict' callback is only for 'xgb.cv'.")
      }
    },
    f_before_iter = NULL,
    f_after_iter = NULL,
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
      pred <- NULL
      for (fd in model) {
        pr <- predict(
          fd$bst,
-          fd$watchlist[[2L]],
+          fd$evals[[2L]],
          outputmargin = env$outputmargin,
          reshape = TRUE
        )
@@ -1002,7 +1002,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
  xgb.Callback(
    cb_name = "gblinear_history",
    env = as.environment(list(sparse = sparse)),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
      if (!inherits(model, "xgb.Booster")) {
        model <- model[[1L]]$bst
      }
@@ -1013,7 +1013,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
      env$next_idx <- 1
    },
    f_before_iter = NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
      if (inherits(model, "xgb.Booster")) {
        coef_this <- .extract.coef(model, env$sparse)
      } else {
@@ -1023,7 +1023,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
      env$next_idx <- env$next_idx + 1
      return(FALSE)
    },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
      # in case of early stopping
      if (env$next_idx <= length(env$coef_hist)) {
        env$coef_hist <- head(env$coef_hist, env$next_idx - 1)
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -193,20 +193,20 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
 # Evaluate one iteration.
 # Returns a named vector of evaluation metrics
 # with the names in a 'datasetname-metricname' format.
-xgb.iter.eval <- function(bst, watchlist, iter, feval) {
+xgb.iter.eval <- function(bst, evals, iter, feval) {
  handle <- xgb.get.handle(bst)
-  if (length(watchlist) == 0)
+  if (length(evals) == 0)
    return(NULL)
-  evnames <- names(watchlist)
+  evnames <- names(evals)
  if (is.null(feval)) {
-    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), watchlist, as.list(evnames))
+    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), evals, as.list(evnames))
    mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
    res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
  } else {
-    res <- sapply(seq_along(watchlist), function(j) {
+    res <- sapply(seq_along(evals), function(j) {
-      w <- watchlist[[j]]
+      w <- evals[[j]]
      ## predict using all trees
      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
      eval_res <- feval(preds, w)
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@@ -71,7 +71,6 @@
 #' new.dtest <- xgb.DMatrix(
 #'   data = new.features.test, label = agaricus.test$label, nthread = 2
 #' )
 #' watchlist <- list(train = new.dtrain)
 #' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 #'
 #' # Model accuracy with new features
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -215,7 +215,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
      modelfile = NULL
    )
    bst <- bst$bst
-    list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
+    list(dtrain = dtrain, bst = bst, evals = list(train = dtrain, test = dtest), index = folds[[k]])
  })
  # extract parameters that can affect the relationship b/w #trees and #iterations
@@ -254,7 +254,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
      )
      xgb.iter.eval(
        bst = fd$bst,
-        watchlist = fd$watchlist,
+        evals = fd$evals,
        iter = iteration - 1,
        feval = feval
      )
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -114,13 +114,13 @@
 #' @param data training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
 #'        \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.
 #' @param nrounds max number of boosting iterations.
-#' @param watchlist named list of xgb.DMatrix datasets to use for evaluating model performance.
+#' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance.
 #'        Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
 #'        of these datasets during each boosting iteration, and stored in the end as a field named
 #'        \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
 #'        \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
 #'        printed out during the training.
-#'        E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+#'        E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
 #'        the performance of each round's model on mat1 and mat2.
 #' @param obj customized objective function. Returns gradient and second order
 #'        gradient with given prediction and dtrain.
@@ -171,7 +171,7 @@
 #' @details
 #' These are the training functions for \code{xgboost}.
 #'
-#' The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+#' The \code{xgb.train} interface supports advanced features such as \code{evals},
 #' customized objective and evaluation metric functions, therefore it is more flexible
 #' than the \code{xgboost} interface.
 #'
@@ -209,7 +209,7 @@
 #' \itemize{
 #'   \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
 #'         and the \code{print_every_n} parameter is passed to it.
-#'   \item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
+#'   \item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
 #'   \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
 #'   \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
 #' }
@@ -254,12 +254,12 @@
 #' dtest <- with(
 #'   agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
 #' )
-#' watchlist <- list(train = dtrain, eval = dtest)
+#' evals <- list(train = dtrain, eval = dtest)
 #'
 #' ## A simple xgb.train example:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 #'
 #' ## An xgb.train example where custom objective and evaluation metric are
 #' ## used:
@@ -280,15 +280,15 @@
 #' #  as 'objective' and 'eval_metric' parameters in the params list:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = logregobj, eval_metric = evalerror)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 #'
 #' #  or through the ... arguments:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
 #'                  objective = logregobj, eval_metric = evalerror)
 #'
 #' #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
 #'                  obj = logregobj, feval = evalerror)
 #'
 #'
@@ -296,11 +296,11 @@
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
 #' my_etas <- list(eta = c(0.5, 0.1))
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
 #'                  callbacks = list(xgb.cb.reset.parameters(my_etas)))
 #'
 #' ## Early stopping:
-#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
 #'                  early_stopping_rounds = 3)
 #'
 #' ## An 'xgboost' interface example:
@@ -311,7 +311,7 @@
 #'
 #' @rdname xgb.train
 #' @export
-xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
+xgb.train <- function(params = list(), data, nrounds, evals = list(),
                      obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
                      early_stopping_rounds = NULL, maximize = NULL,
                      save_period = NULL, save_name = "xgboost.model",
@@ -324,17 +324,17 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  check.custom.obj()
  check.custom.eval()
-  # data & watchlist checks
+  # data & evals checks
  dtrain <- data
  if (!inherits(dtrain, "xgb.DMatrix"))
    stop("second argument dtrain must be xgb.DMatrix")
-  if (length(watchlist) > 0) {
+  if (length(evals) > 0) {
-    if (typeof(watchlist) != "list" ||
+    if (typeof(evals) != "list" ||
-        !all(vapply(watchlist, inherits, logical(1), what = 'xgb.DMatrix')))
+        !all(vapply(evals, inherits, logical(1), what = 'xgb.DMatrix')))
-      stop("watchlist must be a list of xgb.DMatrix elements")
+      stop("'evals' must be a list of xgb.DMatrix elements")
-    evnames <- names(watchlist)
+    evnames <- names(evals)
    if (is.null(evnames) || any(evnames == ""))
-      stop("each element of the watchlist must have a name tag")
+      stop("each element of 'evals' must have a name tag")
  }
  # Handle multiple evaluation metrics given as a list
  for (m in params$eval_metric) {
@@ -370,8 +370,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  if (verbose && !("print_evaluation" %in% cb_names)) {
    callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n))
  }
-  # evaluation log callback:  it is automatically enabled when watchlist is provided
+  # evaluation log callback:  it is automatically enabled when 'evals' is provided
-  if (length(watchlist) && !("evaluation_log" %in% cb_names)) {
+  if (length(evals) && !("evaluation_log" %in% cb_names)) {
    callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
  }
  # Model saving callback
@@ -385,7 +385,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  # Construct a booster (either a new one or load from xgb_model)
  bst <- xgb.Booster(
    params = params,
-    cachelist = append(watchlist, dtrain),
+    cachelist = append(evals, dtrain),
    modelfile = xgb_model
  )
  niter_init <- bst$niter
@@ -407,7 +407,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
    callbacks,
    bst,
    dtrain,
-    watchlist,
+    evals,
    begin_iteration,
    end_iteration
  )
@@ -419,7 +419,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
      callbacks,
      bst,
      dtrain,
-      watchlist,
+      evals,
      iteration
    )
@@ -431,10 +431,10 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
    )
    bst_evaluation <- NULL
-    if (length(watchlist) > 0) {
+    if (length(evals) > 0) {
      bst_evaluation <- xgb.iter.eval(
        bst = bst,
-        watchlist = watchlist,
+        evals = evals,
        iter = iteration - 1,
        feval = feval
      )
@@ -444,7 +444,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
      callbacks,
      bst,
      dtrain,
-      watchlist,
+      evals,
      iteration,
      bst_evaluation
    )
@@ -456,7 +456,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
    callbacks,
    bst,
    dtrain,
-    watchlist,
+    evals,
    iteration,
    bst_evaluation
  )
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -18,9 +18,9 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
    nthread = merged$nthread
  )
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
-  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n = print_every_n,
+  bst <- xgb.train(params, dtrain, nrounds, evals, verbose = verbose, print_every_n = print_every_n,
                   early_stopping_rounds = early_stopping_rounds, maximize = maximize,
                   save_period = save_period, save_name = save_name,
                   xgb_model = xgb_model, callbacks = callbacks, ...)
--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
@@ -74,17 +74,17 @@ print(paste("sum(abs(pred3-pred))=", sum(abs(pred3 - pred))))
 # to use advanced features, we need to put data in xgb.DMatrix
 dtrain <- xgb.DMatrix(data = train$data, label = train$label)
 dtest <- xgb.DMatrix(data = test$data, label = test$label)
-#---------------Using watchlist----------------
+#---------------Using an evaluation set----------------
-# watchlist is a list of xgb.DMatrix, each of them is tagged with name
+# 'evals' is a list of xgb.DMatrix, each of them is tagged with name
-watchlist <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)
-# to train with watchlist, use xgb.train, which contains more advanced features
+# to train with an evaluation set, use xgb.train, which contains more advanced features
-# watchlist allows us to monitor the evaluation result on all data in the list
+# 'evals' argument allows us to monitor the evaluation result on all data in the list
-print("Train xgboost using xgb.train with watchlist")
+print("Train xgboost using xgb.train with evaluation data")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                 nthread = 2, objective = "binary:logistic")
 # we can change evaluation metrics, or use multiple evaluation metrics
-print("train xgboost using xgb.train with watchlist, watch logloss and error")
+print("train xgboost using xgb.train with evaluation data, watch logloss and error")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                 eval_metric = "error", eval_metric = "logloss",
                 nthread = 2, objective = "binary:logistic")
@@ -92,7 +92,7 @@ bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist =
 xgb.DMatrix.save(dtrain, "dtrain.buffer")
 # to load it in, simply call xgb.DMatrix
 dtrain2 <- xgb.DMatrix("dtrain.buffer")
-bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                 nthread = 2, objective = "binary:logistic")
 # information can be extracted from xgb.DMatrix using getinfo
 label <- getinfo(dtest, "label")
--- a/R-package/demo/boost_from_prediction.R
+++ b/R-package/demo/boost_from_prediction.R
@@ -5,14 +5,14 @@ data(agaricus.test, package = 'xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 ###
 # advanced: start from a initial base prediction
 #
 print('start running example to start from a initial prediction')
 # train xgboost for 1 round
 param <- list(max_depth = 2, eta = 1, nthread = 2, objective = 'binary:logistic')
-bst <- xgb.train(param, dtrain, 1, watchlist)
+bst <- xgb.train(param, dtrain, 1, evals)
 # Note: we need the margin value instead of transformed prediction in set_base_margin
 # do predict with output_margin=TRUE, will always give you margin values before logistic transformation
 ptrain <- predict(bst, dtrain, outputmargin = TRUE)
@@ -23,4 +23,4 @@ setinfo(dtrain, "base_margin", ptrain)
 setinfo(dtest, "base_margin", ptest)
 print('this is result of boost from initial prediction')
-bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist)
+bst <- xgb.train(params = param, data = dtrain, nrounds = 1, evals = evals)
--- a/R-package/demo/custom_objective.R
+++ b/R-package/demo/custom_objective.R
@@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 num_round <- 2
 # user define objective function, given prediction, return gradient and second order gradient
@@ -38,7 +38,7 @@ param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
 print('start training with user customized objective')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
 #
 # there can be cases where you want additional information
@@ -62,4 +62,4 @@ param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
 print('start training with user customized objective, with additional attributes in DMatrix')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
 param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
-watchlist <- list(eval = dtest)
+evals <- list(eval = dtest)
 num_round <- 20
 # user define objective function, given prediction, return gradient and second order gradient
 # this is log likelihood loss
@@ -32,7 +32,7 @@ evalerror <- function(preds, dtrain) {
 }
 print('start training with early Stopping setting')
-bst <- xgb.train(param, dtrain, num_round, watchlist,
+bst <- xgb.train(param, dtrain, num_round, evals,
                 objective = logregobj, eval_metric = evalerror, maximize = FALSE,
                 early_stopping_round = 3)
 bst <- xgb.cv(param, dtrain, num_round, nfold = 5,
--- a/R-package/demo/generalized_linear_model.R
+++ b/R-package/demo/generalized_linear_model.R
@@ -25,9 +25,9 @@ param <- list(objective = "binary:logistic", booster = "gblinear",
 ##
 # the rest of settings are the same
 ##
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 num_round <- 2
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
 ypred <- predict(bst, dtest)
 labels <- getinfo(dtest, 'label')
 cat('error of preds=', mean(as.numeric(ypred > 0.5) != labels), '\n')
--- a/R-package/demo/gpu_accelerated.R
+++ b/R-package/demo/gpu_accelerated.R
@@ -23,7 +23,7 @@ y <- rbinom(N, 1, plogis(m))
 tr <- sample.int(N, N * 0.75)
 dtrain <- xgb.DMatrix(X[tr, ], label = y[tr])
 dtest <- xgb.DMatrix(X[-tr, ], label = y[-tr])
-wl <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)
 # An example of running 'gpu_hist' algorithm
 # which is
@@ -35,11 +35,11 @@ wl <- list(train = dtrain, test = dtest)
 param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
              max_bin = 64, tree_method = 'gpu_hist')
 pt <- proc.time()
-bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_gpu <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
 proc.time() - pt
 # Compare to the 'hist' algorithm:
 param$tree_method <- 'hist'
 pt <- proc.time()
-bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_hist <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
 proc.time() - pt
--- a/R-package/demo/predict_first_ntree.R
+++ b/R-package/demo/predict_first_ntree.R
@@ -6,11 +6,11 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 nrounds <- 2
 # training the model for two rounds
-bst <- xgb.train(param, dtrain, nrounds, nthread = 2, watchlist)
+bst <- xgb.train(param, dtrain, nrounds, nthread = 2, evals = evals)
 cat('start testing prediction from first n trees\n')
 labels <- getinfo(dtest, 'label')
--- a/R-package/demo/predict_leaf_indices.R
+++ b/R-package/demo/predict_leaf_indices.R
@@ -43,7 +43,6 @@ colnames(new.features.test) <- colnames(new.features.train)
 # learning with new features
 new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
 new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
 watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 # Model accuracy with new features
--- a/R-package/demo/tweedie_regression.R
+++ b/R-package/demo/tweedie_regression.R
@@ -39,7 +39,7 @@ bst <- xgb.train(
  data = d_train,
  params = params,
  maximize = FALSE,
-  watchlist = list(train = d_train),
+  evals = list(train = d_train),
  nrounds = 20)
 var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)
--- a/R-package/man/xgb.Callback.Rd
+++ b/R-package/man/xgb.Callback.Rd
@@ -7,11 +7,11 @@
 xgb.Callback(
  cb_name = "custom_callback",
  env = new.env(),
-  f_before_training = function(env, model, data, watchlist, begin_iteration,
+  f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)
-    end_iteration) NULL,
+    NULL,
-  f_before_iter = function(env, model, data, watchlist, iteration) NULL,
+  f_before_iter = function(env, model, data, evals, iteration) NULL,
-  f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL,
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
-  f_after_training = function(env, model, data, watchlist, iteration, final_feval,
+  f_after_training = function(env, model, data, evals, iteration, final_feval,
    prev_cb_res) NULL
 )
 }
@@ -82,10 +82,10 @@ not be kept after the model fitting function terminates (see parameter \code{f_a
 For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
 \item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
 \item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
-\item \code{watchlist}: A list with two DMatrices, with names \code{train} and \code{test}
+\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
 (\code{test} is the held-out data for the fold).
 \item \code{index}: The indices of the hold-out data for that fold (base-1 indexing),
-from which the \code{test} entry in the watchlist was obtained.
+from which the \code{test} entry in \code{evals} was obtained.
 }
 This object should \bold{not} be in-place modified in ways that conflict with the
@@ -104,7 +104,7 @@ For keeping variables across iterations, it's recommended to use \code{env} inst
 Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
 folds can be found in the \code{model} object.
-\item watchlist The evaluation watchlist, as passed under argument \code{watchlist} to
+\item evals The evaluation data, as passed under argument \code{evals} to
 \link{xgb.train}.
 For \link{xgb.cv}, this will always be \code{NULL}.
@@ -127,15 +127,15 @@ example by using the early stopping callback \link{xgb.cb.early.stop}.
 \item iteration Index of the iteration number that is being executed (first iteration
 will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).
-\item iter_feval Evaluation metrics for the \code{watchlist} that was supplied, either
+\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
 determined by the objective, or by parameter \code{feval}.
 For \link{xgb.train}, this will be a named vector with one entry per element in
-\code{watchlist}, where the names are determined as 'watchlist name' + '-' + 'metric name' - for
+\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
-example, if \code{watchlist} contains an entry named "tr" and the metric is "rmse",
+example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
 this will be a one-element vector with name "tr-rmse".
-For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(watchlist), nfolds]},
+For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
 where the row names will follow the same naming logic as the one-dimensional vector
 that is passed in \link{xgb.train}.
@@ -187,18 +187,18 @@ the order in which the callbacks are passed to the model fitting function.
 }
 \examples{
 # Example constructing a custom callback that calculates
-# squared error on the training data, without a watchlist,
+# squared error on the training data (no separate test set),
 # and outputs the per-iteration results.
 ssq_callback <- xgb.Callback(
  cb_name = "ssq",
-  f_before_training = function(env, model, data, watchlist,
+  f_before_training = function(env, model, data, evals,
                               begin_iteration, end_iteration) {
    # A vector to keep track of a number at each iteration
    env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
  },
-  f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
    # This calculates the sum of squared errors on the training data.
-    # Note that this can be better done by passing a 'watchlist' entry,
+    # Note that this can be better done by passing an 'evals' entry,
    # but this demonstrates a way in which callbacks can be structured.
    pred <- predict(model, data)
    err <- pred - getinfo(data, "label")
@@ -214,7 +214,7 @@ ssq_callback <- xgb.Callback(
    # A return value of 'TRUE' here would signal to finalize the training
    return(FALSE)
  },
-  f_after_training = function(env, model, data, watchlist, iteration,
+  f_after_training = function(env, model, data, evals, iteration,
                              final_feval, prev_cb_res) {
    return(env$logs)
  }
--- a/R-package/man/xgb.cb.early.stop.Rd
+++ b/R-package/man/xgb.cb.early.stop.Rd
@@ -20,9 +20,9 @@ the evaluation metric in order to stop the training.}
 \item{metric_name}{The name of an evaluation column to use as a criteria for early
 stopping. If not set, the last column would be used.
-Let's say the test data in \code{watchlist} was labelled as \code{dtest},
+Let's say the test data in \code{evals} was labelled as \code{dtest},
 and one wants to use the AUC in test data for early stopping regardless of where
-it is in the \code{watchlist}, then one of the following would need to be set:
+it is in the \code{evals}, then one of the following would need to be set:
 \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
 All dash '-' characters in metric names are considered equivalent to '_'.}
@@ -51,5 +51,5 @@ condition occurred. Note that the \code{best_iteration} that is stored under R a
 base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
 through \link{xgb.attr} or \link{xgb.attributes}.
-At least one data element is required in the evaluation watchlist for early stopping to work.
+At least one dataset is required in \code{evals} for early stopping to work.
 }
--- a/R-package/man/xgb.cb.evaluation.log.Rd
+++ b/R-package/man/xgb.cb.evaluation.log.Rd
@@ -14,7 +14,7 @@ Callback for logging the evaluation history
 }
 \details{
 This callback creates a table with per-iteration evaluation metrics (see parameters
-\code{watchlist} and \code{feval} in \link{xgb.train}).
+\code{evals} and \code{feval} in \link{xgb.train}).
 Note: in the column names of the final data.table, the dash '-' character is replaced with
 the underscore '_' in order to make the column names more like regular R identifiers.
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -82,7 +82,6 @@ new.dtrain <- xgb.DMatrix(
 new.dtest <- xgb.DMatrix(
  data = new.features.test, label = agaricus.test$label, nthread = 2
 )
 watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 # Model accuracy with new features
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -9,7 +9,7 @@ xgb.train(
  params = list(),
  data,
  nrounds,
-  watchlist = list(),
+  evals = list(),
  obj = NULL,
  feval = NULL,
  verbose = 1,
@@ -158,13 +158,13 @@ List is provided in detail section.}
 \item{nrounds}{max number of boosting iterations.}
-\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
+\item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance.
 Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
 of these datasets during each boosting iteration, and stored in the end as a field named
 \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
 \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
 printed out during the training.
-E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
 the performance of each round's model on mat1 and mat2.}
 \item{obj}{customized objective function. Returns gradient and second order
@@ -234,7 +234,7 @@ The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
 \details{
 These are the training functions for \code{xgboost}.
-The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+The \code{xgb.train} interface supports advanced features such as \code{evals},
 customized objective and evaluation metric functions, therefore it is more flexible
 than the \code{xgboost} interface.
@@ -272,7 +272,7 @@ The following callbacks are automatically created when certain parameters are se
 \itemize{
 \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
 and the \code{print_every_n} parameter is passed to it.
-\item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
+\item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
 \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
 \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
 }
@@ -307,12 +307,12 @@ dtrain <- with(
 dtest <- with(
  agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
 )
-watchlist <- list(train = dtrain, eval = dtest)
+evals <- list(train = dtrain, eval = dtest)
 ## A simple xgb.train example:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 ## An xgb.train example where custom objective and evaluation metric are
 ## used:
@@ -333,15 +333,15 @@ evalerror <- function(preds, dtrain) {
 #  as 'objective' and 'eval_metric' parameters in the params list:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = logregobj, eval_metric = evalerror)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 #  or through the ... arguments:
 param <- list(max_depth = 2, eta = 1, nthread = nthread)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                 objective = logregobj, eval_metric = evalerror)
 #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
                 obj = logregobj, feval = evalerror)
@@ -349,11 +349,11 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                 callbacks = list(xgb.cb.reset.parameters(my_etas)))
 ## Early stopping:
-bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
                 early_stopping_rounds = 3)
 ## An 'xgboost' interface example:
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -20,7 +20,7 @@ test_that("train and predict binary classification", {
      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
      eta = 1, nthread = n_threads, nrounds = nrounds,
      objective = "binary:logistic", eval_metric = "error",
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
    ),
    "train-error"
  )
@@ -152,7 +152,7 @@ test_that("train and predict softprob", {
      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
      max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
      objective = "multi:softprob", num_class = 3, eval_metric = "merror",
-      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
    ),
    "train-merror"
  )
@@ -203,7 +203,7 @@ test_that("train and predict softmax", {
      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
      max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
      objective = "multi:softmax", num_class = 3, eval_metric = "merror",
-      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
    ),
    "train-merror"
  )
@@ -226,7 +226,7 @@ test_that("train and predict RF", {
    nthread = n_threads,
    nrounds = 1, objective = "binary:logistic", eval_metric = "error",
    num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1,
-    watchlist = list(train = xgb.DMatrix(train$data, label = lb))
+    evals = list(train = xgb.DMatrix(train$data, label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 1)
@@ -250,7 +250,7 @@ test_that("train and predict RF with softprob", {
    objective = "multi:softprob", eval_metric = "merror",
    num_class = 3, verbose = 0,
    num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5,
-    watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+    evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 15)
  # predict for all iterations:
@@ -271,7 +271,7 @@ test_that("use of multiple eval metrics works", {
      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
      eval_metric = "error", eval_metric = "auc", eval_metric = "logloss",
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
    ),
    "train-error.*train-auc.*train-logloss"
  )
@@ -283,7 +283,7 @@ test_that("use of multiple eval metrics works", {
      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
      eval_metric = list("error", "auc", "logloss"),
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
    ),
    "train-error.*train-auc.*train-logloss"
  )
@@ -295,19 +295,19 @@ test_that("use of multiple eval metrics works", {
 test_that("training continuation works", {
  dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
  param <- list(
    objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
  )
  # for the reference, use 4 iterations at once:
  set.seed(11)
-  bst <- xgb.train(param, dtrain, nrounds = 4, watchlist, verbose = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 4, evals = evals, verbose = 0)
  # first two iterations:
  set.seed(11)
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
  # continue for two more:
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = bst1)
  if (!windows_flag && !solaris_flag) {
    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
  }
@@ -315,7 +315,7 @@ test_that("training continuation works", {
  expect_equal(dim(attributes(bst2)$evaluation_log), c(4, 2))
  expect_equal(attributes(bst2)$evaluation_log, attributes(bst)$evaluation_log)
  # test continuing from raw model data
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
  if (!windows_flag && !solaris_flag) {
    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
  }
@@ -323,7 +323,7 @@ test_that("training continuation works", {
  # test continuing from a model in file
  fname <- file.path(tempdir(), "xgboost.json")
  xgb.save(bst1, fname)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = fname)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = fname)
  if (!windows_flag && !solaris_flag) {
    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
  }
@@ -417,7 +417,7 @@ test_that("max_delta_step works", {
  dtrain <- xgb.DMatrix(
    agaricus.train$data, label = agaricus.train$label, nthread = n_threads
  )
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
  param <- list(
    objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
    nthread = n_threads,
@@ -425,9 +425,9 @@ test_that("max_delta_step works", {
  )
  nrounds <- 5
  # model with no restriction on max_delta_step
-  bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
+  bst1 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1)
  # model with restricted max_delta_step
-  bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
+  bst2 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1, max_delta_step = 1)
  # the no-restriction model is expected to have consistently lower loss during the initial iterations
  expect_true(all(attributes(bst1)$evaluation_log$train_logloss < attributes(bst2)$evaluation_log$train_logloss))
  expect_lt(mean(attributes(bst1)$evaluation_log$train_logloss) / mean(attributes(bst2)$evaluation_log$train_logloss), 0.8)
@@ -444,7 +444,7 @@ test_that("colsample_bytree works", {
  colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
  dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
  dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
-  watchlist <- list(train = dtrain, eval = dtest)
+  evals <- list(train = dtrain, eval = dtest)
  ## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
  ## each tree
  param <- list(
@@ -453,7 +453,7 @@ test_that("colsample_bytree works", {
    eval_metric = "auc"
  )
  set.seed(2)
-  bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 100, evals = evals, verbose = 0)
  xgb.importance(model = bst)
  # If colsample_bytree works properly, a variety of features should be used
  # in the 100 trees
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -19,7 +19,7 @@ ltrain <- add.noise(train$label, 0.2)
 ltest <- add.noise(test$label, 0.2)
 dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
 dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
-watchlist <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)
 err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
@@ -39,7 +39,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.train", {
        nthread = n_threads
      ),
      nrounds = 10,
-      watchlist = list(train = dtrain, test = dtest),
+      evals = list(train = dtrain, test = dtest),
      callbacks = list(xgb.cb.print.evaluation(period = 1))
    )
  })
@@ -57,7 +57,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.train", {
        nthread = n_threads
      ),
      nrounds = 10,
-      watchlist = list(train = dtrain, test = dtest),
+      evals = list(train = dtrain, test = dtest),
      callbacks = list(xgb.cb.print.evaluation(period = 2))
    )
  })
@@ -117,7 +117,7 @@ test_that("xgb.cb.evaluation.log works as expected for xgb.train", {
    ),
    nrounds = 10,
    verbose = FALSE,
-    watchlist = list(train = dtrain, test = dtest),
+    evals = list(train = dtrain, test = dtest),
    callbacks = list(xgb.cb.evaluation.log())
  )
  logs <- attributes(model)$evaluation_log
@@ -155,7 +155,7 @@ param <- list(objective = "binary:logistic", eval_metric = "error",
 test_that("can store evaluation_log without printing", {
  expect_silent(
-    bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, verbose = 0)
+    bst <- xgb.train(param, dtrain, nrounds = 10, evals = evals, eta = 1, verbose = 0)
  )
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_false(is.null(attributes(bst)$evaluation_log$train_error))
@@ -166,14 +166,14 @@ test_that("xgb.cb.reset.parameters works as expected", {
  # fixed eta
  set.seed(111)
-  bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9, verbose = 0)
+  bst0 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 0.9, verbose = 0)
  expect_false(is.null(attributes(bst0)$evaluation_log))
  expect_false(is.null(attributes(bst0)$evaluation_log$train_error))
  # same eta but re-set as a vector parameter in the callback
  set.seed(111)
  my_par <- list(eta = c(0.9, 0.9))
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bst1)$evaluation_log$train_error))
  expect_equal(attributes(bst0)$evaluation_log$train_error,
@@ -182,7 +182,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
  # same eta but re-set via a function in the callback
  set.seed(111)
  my_par <- list(eta = function(itr, itr_end) 0.9)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bst2)$evaluation_log$train_error))
  expect_equal(attributes(bst0)$evaluation_log$train_error,
@@ -191,7 +191,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
  # different eta re-set as a vector parameter in the callback
  set.seed(111)
  my_par <- list(eta = c(0.6, 0.5))
-  bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+  bst3 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bst3)$evaluation_log$train_error))
  expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error))
@@ -199,7 +199,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
  # resetting multiple parameters at the same time runs with no error
  my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8))
  expect_error(
-    bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+    bst4 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                      callbacks = list(xgb.cb.reset.parameters(my_par)))
  , NA) # NA = no error
  # CV works as well
@@ -210,7 +210,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
  # expect no learning with 0 learning rate
  my_par <- list(eta = c(0., 0.))
-  bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+  bstX <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bstX)$evaluation_log$train_error))
  er <- unique(attributes(bstX)$evaluation_log$train_error)
@@ -223,7 +223,7 @@ test_that("xgb.cb.save.model works as expected", {
  files <- unname(sapply(files, function(f) file.path(tempdir(), f)))
  for (f in files) if (file.exists(f)) file.remove(f)
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+  bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
                   save_period = 1, save_name = file.path(tempdir(), "xgboost_%02d.json"))
  expect_true(file.exists(files[1]))
  expect_true(file.exists(files[2]))
@@ -239,7 +239,7 @@ test_that("xgb.cb.save.model works as expected", {
  expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))
  # save_period = 0 saves the last iteration's model
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+  bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
                   save_period = 0, save_name = file.path(tempdir(), 'xgboost.json'))
  expect_true(file.exists(files[3]))
  b2 <- xgb.load(files[3])
@@ -252,7 +252,7 @@ test_that("xgb.cb.save.model works as expected", {
 test_that("early stopping xgb.train works", {
  set.seed(11)
  expect_output(
-    bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+    bst <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
                     early_stopping_rounds = 3, maximize = FALSE)
  , "Stopping. Best iteration")
  expect_false(is.null(xgb.attr(bst, "best_iteration")))
@@ -266,7 +266,7 @@ test_that("early stopping xgb.train works", {
  set.seed(11)
  expect_silent(
-    bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+    bst0 <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
                      early_stopping_rounds = 3, maximize = FALSE, verbose = 0)
  )
  expect_equal(attributes(bst)$evaluation_log, attributes(bst0)$evaluation_log)
@@ -282,7 +282,7 @@ test_that("early stopping xgb.train works", {
 test_that("early stopping using a specific metric works", {
  set.seed(11)
  expect_output(
-    bst <- xgb.train(param[-2], dtrain, nrounds = 20, watchlist, eta = 0.6,
+    bst <- xgb.train(param[-2], dtrain, nrounds = 20, evals = evals, eta = 0.6,
                     eval_metric = "logloss", eval_metric = "auc",
                     callbacks = list(xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE,
                                                        metric_name = 'test_logloss')))
@@ -315,7 +315,7 @@ test_that("early stopping works with titanic", {
    nrounds = 100,
    early_stopping_rounds = 3,
    nthread = n_threads,
-    watchlist = list(train = xgb.DMatrix(dtx, label = dty))
+    evals = list(train = xgb.DMatrix(dtx, label = dty))
  )
  expect_true(TRUE)  # should not crash
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -12,7 +12,7 @@ dtrain <- xgb.DMatrix(
 dtest <- xgb.DMatrix(
  agaricus.test$data, label = agaricus.test$label, nthread = n_threads
 )
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 logregobj <- function(preds, dtrain) {
  labels <- getinfo(dtrain, "label")
@@ -33,7 +33,7 @@ param <- list(max_depth = 2, eta = 1, nthread = n_threads,
 num_round <- 2
 test_that("custom objective works", {
-  bst <- xgb.train(param, dtrain, num_round, watchlist)
+  bst <- xgb.train(param, dtrain, num_round, evals)
  expect_equal(class(bst), "xgb.Booster")
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_false(is.null(attributes(bst)$evaluation_log$eval_error))
@@ -48,7 +48,7 @@ test_that("custom objective in CV works", {
 })
 test_that("custom objective with early stop works", {
-  bst <- xgb.train(param, dtrain, 10, watchlist)
+  bst <- xgb.train(param, dtrain, 10, evals)
  expect_equal(class(bst), "xgb.Booster")
  train_log <- attributes(bst)$evaluation_log$train_error
  expect_true(all(diff(train_log) <= 0))
@@ -66,7 +66,7 @@ test_that("custom objective using DMatrix attr works", {
    return(list(grad = grad, hess = hess))
  }
  param$objective <- logregobjattr
-  bst <- xgb.train(param, dtrain, num_round, watchlist)
+  bst <- xgb.train(param, dtrain, num_round, evals)
  expect_equal(class(bst), "xgb.Booster")
 })
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@@ -41,13 +41,13 @@ test_that("xgb.DMatrix: basic construction", {
  params <- list(tree_method = "hist", nthread = n_threads)
  bst_fd <- xgb.train(
-    params, nrounds = 8, fd, watchlist = list(train = fd)
+    params, nrounds = 8, fd, evals = list(train = fd)
  )
  bst_dgr <- xgb.train(
-    params, nrounds = 8, fdgr, watchlist = list(train = fdgr)
+    params, nrounds = 8, fdgr, evals = list(train = fdgr)
  )
  bst_dgc <- xgb.train(
-    params, nrounds = 8, fdgc, watchlist = list(train = fdgc)
+    params, nrounds = 8, fdgc, evals = list(train = fdgc)
  )
  raw_fd <- xgb.save.raw(bst_fd, raw_format = "ubj")
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@@ -14,19 +14,19 @@ test_that("gblinear works", {
  param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
                nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
-  watchlist <- list(eval = dtest, train = dtrain)
+  evals <- list(eval = dtest, train = dtrain)
  n <- 5         # iterations
  ERR_UL <- 0.005 # upper limit for the test set error
  VERB <- 0      # chatterbox switch
  param$updater <- 'shotgun'
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
  ypred <- predict(bst, dtest)
  expect_equal(length(getinfo(dtest, 'label')), 1611)
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic',
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic',
                   callbacks = list(xgb.cb.gblinear.history()))
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
  h <- xgb.gblinear.history(bst)
@@ -34,16 +34,16 @@ test_that("gblinear works", {
  expect_is(h, "matrix")
  param$updater <- 'coord_descent'
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic')
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
-  bst <- xgb.train(param, dtrain, 2, watchlist, verbose = VERB, feature_selector = 'greedy')
+  bst <- xgb.train(param, dtrain, 2, evals, verbose = VERB, feature_selector = 'greedy')
  expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL)
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'thrifty',
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'thrifty',
                   top_k = 50, callbacks = list(xgb.cb.gblinear.history(sparse = TRUE)))
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
  h <- xgb.gblinear.history(bst)
--- a/R-package/tests/testthat/test_ranking.R
+++ b/R-package/tests/testthat/test_ranking.R
@@ -15,7 +15,7 @@ test_that('Test ranking with unweighted data', {
  params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
                 eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
-  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
  # Check if the metric is monotone increasing
  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
@@ -39,7 +39,7 @@ test_that('Test ranking with weighted data', {
    eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
    eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
  )
-  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
  # Check if the metric is monotone increasing
  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@@ -17,7 +17,7 @@ dtest <- xgb.DMatrix(
 win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
 test_that("updating the model works", {
-  watchlist <- list(train = dtrain, test = dtest)
+  evals <- list(train = dtrain, test = dtest)
  # no-subsampling
  p1 <- list(
@@ -25,19 +25,19 @@ test_that("updating the model works", {
    updater = "grow_colmaker,prune"
  )
  set.seed(11)
-  bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst1 <- xgb.train(p1, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr1 <- xgb.model.dt.tree(model = bst1)
  # with subsampling
  p2 <- modifyList(p1, list(subsample = 0.1))
  set.seed(11)
-  bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst2 <- xgb.train(p2, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr2 <- xgb.model.dt.tree(model = bst2)
  # the same no-subsampling boosting with an extra 'refresh' updater:
  p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
  set.seed(11)
-  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr1r <- xgb.model.dt.tree(model = bst1r)
  # all should be the same when no subsampling
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1r)$evaluation_log)
@@ -53,7 +53,7 @@ test_that("updating the model works", {
  # the same boosting with subsampling with an extra 'refresh' updater:
  p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
  set.seed(11)
-  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr2r <- xgb.model.dt.tree(model = bst2r)
  # should be the same evaluation but different gains and larger cover
  expect_equal(attributes(bst2)$evaluation_log, attributes(bst2r)$evaluation_log)
@@ -66,7 +66,7 @@ test_that("updating the model works", {
  # process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
  set.seed(123)
  p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
  tr1u <- xgb.model.dt.tree(model = bst1u)
  # all should be the same when no subsampling
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@@ -79,7 +79,7 @@ test_that("updating the model works", {
  # same thing but with a serialized model
  set.seed(123)
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
  tr1u <- xgb.model.dt.tree(model = bst1u)
  # all should be the same when no subsampling
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@@ -87,7 +87,7 @@ test_that("updating the model works", {
  # process type 'update' for model with subsampling, refreshing only the tree stats from training data:
  p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
+  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst2)
  tr2u <- xgb.model.dt.tree(model = bst2u)
  # should be the same evaluation but different gains and larger cover
  expect_equal(attributes(bst2)$evaluation_log, attributes(bst2u)$evaluation_log)
@@ -102,7 +102,7 @@ test_that("updating the model works", {
  # process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
  p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
  tr1ut <- xgb.model.dt.tree(model = bst1ut)
  # should be the same evaluations but different gains and smaller cover (test data is smaller)
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1ut)$evaluation_log)
@@ -115,18 +115,18 @@ test_that("updating works for multiclass & multitree", {
  dtr <- xgb.DMatrix(
    as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
  )
-  watchlist <- list(train = dtr)
+  evals <- list(train = dtr)
  p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
             objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
             base_score = 0)
  set.seed(121)
-  bst0 <- xgb.train(p0, dtr, 5, watchlist, verbose = 0)
+  bst0 <- xgb.train(p0, dtr, 5, evals = evals, verbose = 0)
  tr0 <- xgb.model.dt.tree(model = bst0)
  # run update process for an original model with subsampling
  p0u <- modifyList(p0, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
  bst0u <- xgb.train(p0u, dtr, nrounds = xgb.get.num.boosted.rounds(bst0),
-                     watchlist, xgb_model = bst0, verbose = 0)
+                     evals = evals, xgb_model = bst0, verbose = 0)
  tr0u <- xgb.model.dt.tree(model = bst0u)
  # should be the same evaluation but different gains and larger cover
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -341,10 +341,10 @@ One way to measure progress in learning of a model is to provide to **XGBoost**
 > in some way it is similar to what we have done above with the average error. The main difference is that below it was after building the model, and now it is during the construction that we measure errors.
-For the purpose of this example, we use `watchlist` parameter. It is a list of `xgb.DMatrix`, each of them tagged with a name.
+For the purpose of this example, we use the `evals` parameter. It is a list of `xgb.DMatrix` objects, each of them tagged with a name.
-```{r watchlist, message=F, warning=F}
+```{r evals, message=F, warning=F}
-watchlist <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)
 bst <- xgb.train(
    data = dtrain
@@ -355,7 +355,7 @@ bst <- xgb.train(
        , objective = "binary:logistic"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```
@@ -367,7 +367,7 @@ If with your own dataset you have not such results, you should think about how y
 For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.
-```{r watchlist2, message=F, warning=F}
+```{r evals2, message=F, warning=F}
 bst <- xgb.train(
    data = dtrain
    , max_depth = 2
@@ -379,7 +379,7 @@ bst <- xgb.train(
        , eval_metric = "logloss"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```
@@ -401,7 +401,7 @@ bst <- xgb.train(
        , eval_metric = "logloss"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```
@@ -430,7 +430,7 @@ bst <- xgb.train(
        , objective = "binary:logistic"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```