merge latest changes

2024-01-24 13:30:08 -08:00
parent 069cf1d019 d12cc1090a
commit 3fe874078c
83 changed files with 1408 additions and 1273 deletions
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -65,6 +65,6 @@ Imports:
    data.table (>= 1.9.6),
    jsonlite (>= 1.0)
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0
 Encoding: UTF-8
 SystemRequirements: GNU make, C++17
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand

+S3method("[",xgb.Booster)
 S3method("[",xgb.DMatrix)
 S3method("dimnames<-",xgb.DMatrix)
 S3method(coef,xgb.Booster)
@@ -7,6 +8,7 @@ S3method(dim,xgb.DMatrix)
 S3method(dimnames,xgb.DMatrix)
 S3method(getinfo,xgb.Booster)
 S3method(getinfo,xgb.DMatrix)
+S3method(length,xgb.Booster)
 S3method(predict,xgb.Booster)
 S3method(print,xgb.Booster)
 S3method(print,xgb.DMatrix)
@@ -62,6 +64,7 @@ export(xgb.plot.tree)
 export(xgb.save)
 export(xgb.save.raw)
 export(xgb.set.config)
+export(xgb.slice.Booster)
 export(xgb.train)
 export(xgboost)
 import(methods)
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -280,7 +280,6 @@ cb.reset.parameters <- function(new_params) {
 #' \code{iteration},
 #' \code{begin_iteration},
 #' \code{end_iteration},
-#' \code{num_parallel_tree}.
 #'
 #' @seealso
 #' \code{\link{callbacks}},
@@ -291,7 +290,6 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
                          metric_name = NULL, verbose = TRUE) {
  # state variables
  best_iteration <- -1
-  best_ntreelimit <- -1
  best_score <- Inf
  best_msg <- NULL
  metric_idx <- 1
@@ -358,12 +356,10 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
        # If the difference is due to floating-point truncation, update best_score
        best_score <- attr_best_score
      }
-      xgb.attr(env$bst, "best_iteration") <- best_iteration
-      xgb.attr(env$bst, "best_ntreelimit") <- best_ntreelimit
+      xgb.attr(env$bst, "best_iteration") <- best_iteration - 1
      xgb.attr(env$bst, "best_score") <- best_score
    } else {
      env$basket$best_iteration <- best_iteration
-      env$basket$best_ntreelimit <- best_ntreelimit
    }
  }

@@ -385,14 +381,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
      )
      best_score <<- score
      best_iteration <<- i
-      best_ntreelimit <<- best_iteration * env$num_parallel_tree
      # save the property to attributes, so they will occur in checkpoint
      if (!is.null(env$bst)) {
        xgb.attributes(env$bst) <- list(
          best_iteration = best_iteration - 1, # convert to 0-based index
          best_score = best_score,
-          best_msg = best_msg,
-          best_ntreelimit = best_ntreelimit)
+          best_msg = best_msg
+        )
      }
    } else if (i - best_iteration >= stopping_rounds) {
      env$stop_condition <- TRUE
@@ -475,8 +470,6 @@ cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
 #' \code{data},
 #' \code{end_iteration},
 #' \code{params},
-#' \code{num_parallel_tree},
-#' \code{num_class}.
 #'
 #' @return
 #' Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
@@ -499,19 +492,21 @@ cb.cv.predict <- function(save_models = FALSE) {
      stop("'cb.cv.predict' callback requires 'basket' and 'bst_folds' lists in its calling frame")

    N <- nrow(env$data)
-    pred <-
-      if (env$num_class > 1) {
-        matrix(NA_real_, N, env$num_class)
-      } else {
-        rep(NA_real_, N)
-      }
+    pred <- NULL

-    iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration) + 1)
+    iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration))
    if (NVL(env$params[['booster']], '') == 'gblinear') {
-      iterationrange <- c(1, 1)  # must be 0 for gblinear
+      iterationrange <- "all"
    }
    for (fd in env$bst_folds) {
      pr <- predict(fd$bst, fd$watchlist[[2]], iterationrange = iterationrange, reshape = TRUE)
+      if (is.null(pred)) {
+        if (NCOL(pr) > 1L) {
+          pred <- matrix(NA_real_, N, ncol(pr))
+        } else {
+          pred <- matrix(NA_real_, N)
+        }
+      }
      if (is.matrix(pred)) {
        pred[fd$index, ] <- pr
      } else {
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -208,7 +208,7 @@ xgb.iter.eval <- function(bst, watchlist, iter, feval) {
    res <- sapply(seq_along(watchlist), function(j) {
      w <- watchlist[[j]]
      ## predict using all trees
-      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = c(1, 1))
+      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
      eval_res <- feval(preds, w)
      out <- eval_res$value
      names(out) <- paste0(evnames[j], "-", eval_res$metric)
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -89,7 +89,6 @@ xgb.get.handle <- function(object) {
 #' @param outputmargin Whether the prediction should be returned in the form of original untransformed
 #'        sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
 #'        logistic regression would return log-odds instead of probabilities.
-#' @param ntreelimit Deprecated, use `iterationrange` instead.
 #' @param predleaf Whether to predict pre-tree leaf indices.
 #' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
 #' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
@@ -99,11 +98,17 @@ xgb.get.handle <- function(object) {
 #'        or `predinteraction` is `TRUE`.
 #' @param training Whether the predictions are used for training. For dart booster,
 #'        training predicting will perform dropout.
-#' @param iterationrange Specifies which trees are used in prediction. For
-#'        example, take a random forest with 100 rounds.
-#'        With `iterationrange=c(1, 21)`, only the trees built during `[1, 21)` (half open set)
-#'        rounds are used in this prediction. The index is 1-based just like an R vector. When set
-#'        to `c(1, 1)`, XGBoost will use all trees.
+#' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
+#'        a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.
+#'        base-1 indexing, and inclusive of both ends).
+#'
+#'        For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
+#'        predict using only the first one.
+#'
+#'        If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
+#'        of the iterations (rounds) otherwise.
+#'
+#'        If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
 #' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
 #'        type and shape of predictions are invariant to the model type.
 #' @param ... Not used.
@@ -189,7 +194,7 @@ xgb.get.handle <- function(object) {
 #' # use all trees by default
 #' pred <- predict(bst, test$data)
 #' # use only the 1st tree
-#' pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
+#' pred1 <- predict(bst, test$data, iterationrange = c(1, 1))
 #'
 #' # Predicting tree leafs:
 #' # the result is an nsamples X ntrees matrix
@@ -260,11 +265,11 @@ xgb.get.handle <- function(object) {
 #' all.equal(pred, pred_labels)
 #' # prediction from using only 5 iterations should result
 #' # in the same error as seen in iteration 5:
-#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
+#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
 #' sum(pred5 != lb) / length(lb)
 #'
 #' @export
-predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
+predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
                                predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
                                reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
  if (!inherits(newdata, "xgb.DMatrix")) {
@@ -275,25 +280,21 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
    )
  }

-  if (NVL(xgb.booster_type(object), '') == 'gblinear' || is.null(ntreelimit))
-    ntreelimit <- 0

-  if (ntreelimit != 0 && is.null(iterationrange)) {
-    ## only ntreelimit, initialize iteration range
-    iterationrange <- c(0, 0)
-  } else if (ntreelimit == 0 && !is.null(iterationrange)) {
-    ## only iteration range, handle 1-based indexing
-    iterationrange <- c(iterationrange[1] - 1, iterationrange[2] - 1)
-  } else if (ntreelimit != 0 && !is.null(iterationrange)) {
-    ## both are specified, let libgxgboost throw an error
+  if (!is.null(iterationrange)) {
+    if (is.character(iterationrange)) {
+      stopifnot(iterationrange == "all")
+      iterationrange <- c(0, 0)
+    } else {
+      iterationrange[1] <- iterationrange[1] - 1 # base-0 indexing
+    }
  } else {
    ## no limit is supplied, use best
    best_iteration <- xgb.best_iteration(object)
    if (is.null(best_iteration)) {
      iterationrange <- c(0, 0)
    } else {
-      ## We don't need to + 1 as R is 1-based index.
-      iterationrange <- c(0, as.integer(best_iteration))
+      iterationrange <- c(0, as.integer(best_iteration) + 1L)
    }
  }
  ## Handle the 0 length values.
@@ -312,7 +313,6 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
    strict_shape = box(TRUE),
    iteration_begin = box(as.integer(iterationrange[1])),
    iteration_end = box(as.integer(iterationrange[2])),
-    ntree_limit = box(as.integer(ntreelimit)),
    type = box(as.integer(0))
  )

@@ -343,24 +343,24 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
  )
  names(predts) <- c("shape", "results")
  shape <- predts$shape
-  ret <- predts$results
+  arr <- predts$results

-  n_ret <- length(ret)
+  n_ret <- length(arr)
  n_row <- nrow(newdata)
  if (n_row != shape[1]) {
    stop("Incorrect predict shape.")
  }

-  arr <- array(data = ret, dim = rev(shape))
+  .Call(XGSetArrayDimInplace_R, arr, rev(shape))

  cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "BIAS") else NULL
  n_groups <- shape[2]

  ## Needed regardless of whether strict shape is being used.
  if (predcontrib) {
-    dimnames(arr) <- list(cnames, NULL, NULL)
+    .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, NULL, NULL))
  } else if (predinteraction) {
-    dimnames(arr) <- list(cnames, cnames, NULL, NULL)
+    .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, cnames, NULL, NULL))
  }
  if (strict_shape) {
    return(arr) # strict shape is calculated by libxgboost uniformly.
@@ -368,43 +368,51 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA

  if (predleaf) {
    ## Predict leaf
-    arr <- if (n_ret == n_row) {
-      matrix(arr, ncol = 1)
+    if (n_ret == n_row) {
+      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
    } else {
-      matrix(arr, nrow = n_row, byrow = TRUE)
+      arr <- matrix(arr, nrow = n_row, byrow = TRUE)
    }
  } else if (predcontrib) {
    ## Predict contribution
    arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
-    arr <- if (n_ret == n_row) {
-      matrix(arr, ncol =  1, dimnames = list(NULL, cnames))
+    if (n_ret == n_row) {
+      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
+      .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
    } else if (n_groups != 1) {
      ## turns array into list of matrices
-      lapply(seq_len(n_groups), function(g) arr[g, , ])
+      arr <- lapply(seq_len(n_groups), function(g) arr[g, , ])
    } else {
      ## remove the first axis (group)
-      dn <- dimnames(arr)
-      matrix(arr[1, , ], nrow = dim(arr)[2], ncol = dim(arr)[3], dimnames = c(dn[2], dn[3]))
+      newdim <- dim(arr)[2:3]
+      newdn <- dimnames(arr)[2:3]
+      arr <- arr[1, , ]
+      .Call(XGSetArrayDimInplace_R, arr, newdim)
+      .Call(XGSetArrayDimNamesInplace_R, arr, newdn)
    }
  } else if (predinteraction) {
    ## Predict interaction
    arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
-    arr <- if (n_ret == n_row) {
-      matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
+    if (n_ret == n_row) {
+      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
+      .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
    } else if (n_groups != 1) {
      ## turns array into list of matrices
-      lapply(seq_len(n_groups), function(g) arr[g, , , ])
+      arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ])
    } else {
      ## remove the first axis (group)
      arr <- arr[1, , , , drop = FALSE]
-      array(arr, dim = dim(arr)[2:4], dimnames(arr)[2:4])
+      newdim <- dim(arr)[2:4]
+      newdn <- dimnames(arr)[2:4]
+      .Call(XGSetArrayDimInplace_R, arr, newdim)
+      .Call(XGSetArrayDimNamesInplace_R, arr, newdn)
    }
  } else {
    ## Normal prediction
-    arr <- if (reshape && n_groups != 1) {
-      matrix(arr, ncol = n_groups, byrow = TRUE)
+    if (reshape && n_groups != 1) {
+      arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
    } else {
-      as.vector(ret)
+      .Call(XGSetArrayDimInplace_R, arr, NULL)
    }
  }
  return(arr)
@@ -492,7 +500,7 @@ xgb.attr <- function(object, name) {
    return(NULL)
  }
  if (!is.null(out)) {
-    if (name %in% c("best_iteration", "best_ntreelimit", "best_score")) {
+    if (name %in% c("best_iteration", "best_score")) {
      out <- as.numeric(out)
    }
  }
@@ -685,16 +693,94 @@ setinfo.xgb.Booster <- function(object, name, info) {
 }

 #' @title Get number of boosting in a fitted booster
-#' @param model A fitted `xgb.Booster` model.
+#' @param model,x A fitted `xgb.Booster` model.
 #' @return The number of rounds saved in the model, as an integer.
 #' @details Note that setting booster parameters related to training
 #' continuation / updates through \link{xgb.parameters<-} will reset the
 #' number of rounds to zero.
 #' @export
+#' @rdname xgb.get.num.boosted.rounds
 xgb.get.num.boosted.rounds <- function(model) {
  return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model)))
 }

+#' @rdname xgb.get.num.boosted.rounds
+#' @export
+length.xgb.Booster <- function(x) {
+  return(xgb.get.num.boosted.rounds(x))
+}
+
+#' @title Slice Booster by Rounds
+#' @description Creates a new booster including only a selected range of rounds / iterations
+#' from an existing booster, as given by the sequence `seq(start, end, step)`.
+#' @details Note that any R attributes that the booster might have, will not be copied into
+#' the resulting object.
+#' @param model,x A fitted `xgb.Booster` object, which is to be sliced by taking only a subset
+#' of its rounds / iterations.
+#' @param start Start of the slice (base-1 and inclusive, like R's \link{seq}).
+#' @param end End of the slice (base-1 and inclusive, like R's \link{seq}).
+#'
+#' Passing a value of zero here is equivalent to passing the full number of rounds in the
+#' booster object.
+#' @param step Step size of the slice. Passing '1' will take every round in the sequence defined by
+#' `(start, end)`, while passing '2' will take every second value, and so on.
+#' @return A sliced booster object containing only the requested rounds.
+#' @examples
+#' data(mtcars)
+#' y <- mtcars$mpg
+#' x <- as.matrix(mtcars[, -1])
+#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
+#' model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
+#' model_slice <- xgb.slice.Booster(model, 1, 3)
+#' # Prediction for first three rounds
+#' predict(model, x, predleaf = TRUE)[, 1:3]
+#'
+#' # The new model has only those rounds, so
+#' # a full prediction from it is equivalent
+#' predict(model_slice, x, predleaf = TRUE)
+#' @export
+#' @rdname xgb.slice.Booster
+xgb.slice.Booster <- function(model, start, end = xgb.get.num.boosted.rounds(model), step = 1L) {
+  # This makes the slice mimic the behavior of R's 'seq',
+  # which truncates on the end of the slice when the step
+  # doesn't reach it.
+  if (end > start && step > 1) {
+    d <- (end - start + 1) / step
+    if (d != floor(d)) {
+      end <- start + step * ceiling(d) - 1
+    }
+  }
+  return(
+    .Call(
+      XGBoosterSlice_R,
+      xgb.get.handle(model),
+      start - 1,
+      end,
+      step
+    )
+  )
+}
+
+#' @export
+#' @rdname xgb.slice.Booster
+#' @param i The indices - must be an increasing sequence as generated by e.g. `seq(...)`.
+`[.xgb.Booster` <- function(x, i) {
+  if (missing(i)) {
+    return(xgb.slice.Booster(x, 1, 0))
+  }
+  if (length(i) == 1) {
+    return(xgb.slice.Booster(x, i, i))
+  }
+  steps <- diff(i)
+  if (any(steps < 0)) {
+    stop("Can only slice booster with ascending sequences.")
+  }
+  if (length(unique(steps)) > 1) {
+    stop("Can only slice booster with fixed-step sequences.")
+  }
+  return(xgb.slice.Booster(x, i[1L], i[length(i)], steps[1L]))
+}
+
 #' @title Get Features Names from Booster
 #' @description Returns the feature / variable / column names from a fitted
 #' booster object, which are set automatically during the call to \link{xgb.train}
@@ -710,12 +796,6 @@ variable.names.xgb.Booster <- function(object, ...) {
  return(getinfo(object, "feature_name"))
 }

-xgb.ntree <- function(bst) {
-  config <- xgb.config(bst)
-  out <- strtoi(config$learner$gradient_booster$gbtree_model_param$num_trees)
-  return(out)
-}
-
 xgb.nthread <- function(bst) {
  config <- xgb.config(bst)
  out <- strtoi(config$learner$generic_param$nthread)
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -103,7 +103,6 @@
 #'         parameter or randomly generated.
 #'   \item \code{best_iteration} iteration number with the best evaluation metric value
 #'         (only available with early stopping).
-#'   \item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
 #'   \item \code{pred} CV prediction values available when \code{prediction} is set.
 #'         It is either vector or matrix (see \code{\link{cb.cv.predict}}).
 #'   \item \code{models} a list of the CV folds' models. It is only available with the explicit
@@ -218,7 +217,6 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing

  # extract parameters that can affect the relationship b/w #trees and #iterations
  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
-  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint

  # those are fixed for CV (no training continuation)
  begin_iteration <- 1
@@ -318,7 +316,7 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
      })
    }

-    for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
+    for (n in c('niter', 'best_iteration')) {
      if (is.null(x[[n]]))
        next
      cat(n, ': ', x[[n]], '\n', sep = '')
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@@ -113,19 +113,12 @@
 #' xgb.importance(model = mbst)
 #'
 #' @export
-xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
+xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL,
                           data = NULL, label = NULL, target = NULL) {

  if (!(is.null(data) && is.null(label) && is.null(target)))
    warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")

-  if (is.null(feature_names)) {
-    model_feature_names <- xgb.feature_names(model)
-    if (NROW(model_feature_names)) {
-      feature_names <- model_feature_names
-    }
-  }
-
  if (!(is.null(feature_names) || is.character(feature_names)))
    stop("feature_names: Has to be a character vector")

--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@@ -2,11 +2,8 @@
 #'
 #' Parse a boosted tree model text dump into a `data.table` structure.
 #'
-#' @param feature_names Character vector of feature names. If the model already
-#'        contains feature names, those will be used when \code{feature_names=NULL} (default value).
-#'
-#'        Note that, if the model already contains feature names, it's \bold{not} possible to override them here.
-#' @param model Object of class `xgb.Booster`.
+#' @param model Object of class `xgb.Booster`. If it contains feature names (they can be set through
+#'        \link{setinfo}), they will be used in the output from this function.
 #' @param text Character vector previously generated by the function [xgb.dump()]
 #'        (called with parameter `with_stats = TRUE`). `text` takes precedence over `model`.
 #' @param trees An integer vector of tree indices that should be used.
@@ -58,7 +55,7 @@
 #'
 #' # This bst model already has feature_names stored with it, so those would be used when
 #' # feature_names is not set:
-#' (dt <- xgb.model.dt.tree(model = bst))
+#' dt <- xgb.model.dt.tree(bst)
 #'
 #' # How to match feature names of splits that are following a current 'Yes' branch:
 #' merge(
@@ -69,7 +66,7 @@
 #' ]
 #'
 #' @export
-xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
+xgb.model.dt.tree <- function(model = NULL, text = NULL,
                              trees = NULL, use_int_id = FALSE, ...) {
  check.deprecation(...)

@@ -79,24 +76,15 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
         "  (or NULL if 'model' was provided).")
  }

-  model_feature_names <- NULL
-  if (inherits(model, "xgb.Booster")) {
-    model_feature_names <- xgb.feature_names(model)
-    if (NROW(model_feature_names) && !is.null(feature_names)) {
-      stop("'model' contains feature names. Cannot override them.")
-    }
-  }
-  if (is.null(feature_names) && !is.null(model) && !is.null(model_feature_names))
-    feature_names <- model_feature_names
-
-  if (!(is.null(feature_names) || is.character(feature_names))) {
-    stop("feature_names: must be a character vector")
-  }
-
  if (!(is.null(trees) || is.numeric(trees))) {
    stop("trees: must be a vector of integers.")
  }

+  feature_names <- NULL
+  if (inherits(model, "xgb.Booster")) {
+    feature_names <- xgb.feature_names(model)
+  }
+
  from_text <- TRUE
  if (is.null(text)) {
    text <- xgb.dump(model = model, with_stats = TRUE)
@@ -134,7 +122,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
  branch_rx_w_names <- paste0("\\d+:\\[(.+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
                              "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
  text_has_feature_names <- FALSE
-  if (NROW(model_feature_names)) {
+  if (NROW(feature_names)) {
    branch_rx <- branch_rx_w_names
    text_has_feature_names <- TRUE
  } else {
@@ -148,9 +136,6 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
      }
    }
  }
-  if (text_has_feature_names && is.null(model) && !is.null(feature_names)) {
-    stop("'text' contains feature names. Cannot override them.")
-  }
  branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
  td[
    isLeaf == FALSE,
--- a/R-package/R/xgb.plot.multi.trees.R
+++ b/R-package/R/xgb.plot.multi.trees.R
@@ -62,13 +62,13 @@
 #' }
 #'
 #' @export
-xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, plot_width = NULL, plot_height = NULL,
+xgb.plot.multi.trees <- function(model, features_keep = 5, plot_width = NULL, plot_height = NULL,
                                 render = TRUE, ...) {
  if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
    stop("DiagrammeR is required for xgb.plot.multi.trees")
  }
  check.deprecation(...)
-  tree.matrix <- xgb.model.dt.tree(feature_names = feature_names, model = model)
+  tree.matrix <- xgb.model.dt.tree(model = model)

  # first number of the path represents the tree, then the following numbers are related to the path to follow
  # root init
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@@ -2,9 +2,8 @@
 #'
 #' Read a tree model text dump and plot the model.
 #'
-#' @param feature_names Character vector used to overwrite the feature names
-#'        of the model. The default (`NULL`) uses the original feature names.
-#' @param model Object of class `xgb.Booster`.
+#' @param model Object of class `xgb.Booster`. If it contains feature names (they can be set through
+#'        \link{setinfo}), they will be used in the output from this function.
 #' @param trees An integer vector of tree indices that should be used.
 #'        The default (`NULL`) uses all trees.
 #'        Useful, e.g., in multiclass classification to get only
@@ -103,7 +102,7 @@
 #' }
 #'
 #' @export
-xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
+xgb.plot.tree <- function(model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
                          render = TRUE, show_node_id = FALSE, style = c("R", "xgboost"), ...) {
  check.deprecation(...)
  if (!inherits(model, "xgb.Booster")) {
@@ -120,17 +119,12 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
    if (NROW(trees) != 1L || !render || show_node_id) {
      stop("style='xgboost' is only supported for single, rendered tree, without node IDs.")
    }
-    if (!is.null(feature_names)) {
-      stop(
-        "style='xgboost' cannot override 'feature_names'. Will automatically take them from the model."
-      )
-    }

    txt <- xgb.dump(model, dump_format = "dot")
    return(DiagrammeR::grViz(txt[[trees + 1]], width = plot_width, height = plot_height))
  }

-  dt <- xgb.model.dt.tree(feature_names = feature_names, model = model, trees = trees)
+  dt <- xgb.model.dt.tree(model = model, trees = trees)

  dt[, label := paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Gain)]
  if (show_node_id)
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -393,7 +393,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  # Note: it might look like these aren't used, but they need to be defined in this
  # environment for the callbacks for work correctly.
  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
-  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint

  if (is_update && nrounds > niter_init)
    stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
--- a/R-package/demo/predict_first_ntree.R
+++ b/R-package/demo/predict_first_ntree.R
@@ -15,7 +15,7 @@ cat('start testing prediction from first n trees\n')
 labels <- getinfo(dtest, 'label')

 ### predict using first 1 tree
-ypred1 <- predict(bst, dtest, ntreelimit = 1)
+ypred1 <- predict(bst, dtest, iterationrange = c(1, 1))
 # by default, we predict using all the trees
 ypred2 <- predict(bst, dtest)

--- a/R-package/man/cb.cv.predict.Rd
+++ b/R-package/man/cb.cv.predict.Rd
@@ -35,8 +35,6 @@ Callback function expects the following values to be set in its calling frame:
 \code{data},
 \code{end_iteration},
 \code{params},
-\code{num_parallel_tree},
-\code{num_class}.
 }
 \seealso{
 \code{\link{callbacks}}
--- a/R-package/man/cb.early.stop.Rd
+++ b/R-package/man/cb.early.stop.Rd
@@ -55,7 +55,6 @@ Callback function expects the following values to be set in its calling frame:
 \code{iteration},
 \code{begin_iteration},
 \code{end_iteration},
-\code{num_parallel_tree}.
 }
 \seealso{
 \code{\link{callbacks}},
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -9,7 +9,6 @@
  newdata,
  missing = NA,
  outputmargin = FALSE,
-  ntreelimit = NULL,
  predleaf = FALSE,
  predcontrib = FALSE,
  approxcontrib = FALSE,
@@ -36,8 +35,6 @@ missing values in data (e.g., 0 or some other extreme value).}
 sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
 logistic regression would return log-odds instead of probabilities.}

-\item{ntreelimit}{Deprecated, use \code{iterationrange} instead.}
-
 \item{predleaf}{Whether to predict pre-tree leaf indices.}

 \item{predcontrib}{Whether to return feature contributions to individual predictions (see Details).}
@@ -53,11 +50,18 @@ or \code{predinteraction} is \code{TRUE}.}
 \item{training}{Whether the predictions are used for training. For dart booster,
 training predicting will perform dropout.}

-\item{iterationrange}{Specifies which trees are used in prediction. For
-example, take a random forest with 100 rounds.
-With \code{iterationrange=c(1, 21)}, only the trees built during \verb{[1, 21)} (half open set)
-rounds are used in this prediction. The index is 1-based just like an R vector. When set
-to \code{c(1, 1)}, XGBoost will use all trees.}
+\item{iterationrange}{Sequence of rounds/iterations from the model to use for prediction, specified by passing
+a two-dimensional vector with the start and end numbers in the sequence (same format as R's \code{seq} - i.e.
+base-1 indexing, and inclusive of both ends).
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
+   predict using only the first one.
+
+   If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
+   of the iterations (rounds) otherwise.
+
+   If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
+}\if{html}{\out{</div>}}}

 \item{strict_shape}{Default is \code{FALSE}. When set to \code{TRUE}, the output
 type and shape of predictions are invariant to the model type.}
@@ -145,7 +149,7 @@ bst <- xgb.train(
 # use all trees by default
 pred <- predict(bst, test$data)
 # use only the 1st tree
-pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
+pred1 <- predict(bst, test$data, iterationrange = c(1, 1))

 # Predicting tree leafs:
 # the result is an nsamples X ntrees matrix
@@ -216,7 +220,7 @@ str(pred)
 all.equal(pred, pred_labels)
 # prediction from using only 5 iterations should result
 # in the same error as seen in iteration 5:
-pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
+pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
 sum(pred5 != lb) / length(lb)

 }
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -135,7 +135,6 @@ It is created by the \code{\link{cb.evaluation.log}} callback.
 parameter or randomly generated.
 \item \code{best_iteration} iteration number with the best evaluation metric value
 (only available with early stopping).
-\item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
 \item \code{pred} CV prediction values available when \code{prediction} is set.
 It is either vector or matrix (see \code{\link{cb.cv.predict}}).
 \item \code{models} a list of the CV folds' models. It is only available with the explicit
--- a/R-package/man/xgb.get.num.boosted.rounds.Rd
+++ b/R-package/man/xgb.get.num.boosted.rounds.Rd
@@ -2,12 +2,15 @@
 % Please edit documentation in R/xgb.Booster.R
 \name{xgb.get.num.boosted.rounds}
 \alias{xgb.get.num.boosted.rounds}
+\alias{length.xgb.Booster}
 \title{Get number of boosting in a fitted booster}
 \usage{
 xgb.get.num.boosted.rounds(model)
+
+\method{length}{xgb.Booster}(x)
 }
 \arguments{
-\item{model}{A fitted \code{xgb.Booster} model.}
+\item{model, x}{A fitted \code{xgb.Booster} model.}
 }
 \value{
 The number of rounds saved in the model, as an integer.
--- a/R-package/man/xgb.importance.Rd
+++ b/R-package/man/xgb.importance.Rd
@@ -5,8 +5,8 @@
 \title{Feature importance}
 \usage{
 xgb.importance(
-  feature_names = NULL,
  model = NULL,
+  feature_names = getinfo(model, "feature_name"),
  trees = NULL,
  data = NULL,
  label = NULL,
@@ -14,11 +14,11 @@ xgb.importance(
 )
 }
 \arguments{
+\item{model}{Object of class \code{xgb.Booster}.}
+
 \item{feature_names}{Character vector used to overwrite the feature names
 of the model. The default is \code{NULL} (use original feature names).}

-\item{model}{Object of class \code{xgb.Booster}.}
-
 \item{trees}{An integer vector of tree indices that should be included
 into the importance calculation (only for the "gbtree" booster).
 The default (\code{NULL}) parses all trees.
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@@ -5,7 +5,6 @@
 \title{Parse model text dump}
 \usage{
 xgb.model.dt.tree(
-  feature_names = NULL,
  model = NULL,
  text = NULL,
  trees = NULL,
@@ -14,13 +13,8 @@ xgb.model.dt.tree(
 )
 }
 \arguments{
-\item{feature_names}{Character vector of feature names. If the model already
-contains feature names, those will be used when \code{feature_names=NULL} (default value).
-
-\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that, if the model already contains feature names, it's \\bold\{not\} possible to override them here.
-}\if{html}{\out{</div>}}}
-
-\item{model}{Object of class \code{xgb.Booster}.}
+\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
+\link{setinfo}), they will be used in the output from this function.}

 \item{text}{Character vector previously generated by the function \code{\link[=xgb.dump]{xgb.dump()}}
 (called with parameter \code{with_stats = TRUE}). \code{text} takes precedence over \code{model}.}
@@ -81,7 +75,7 @@ bst <- xgboost(

 # This bst model already has feature_names stored with it, so those would be used when
 # feature_names is not set:
-(dt <- xgb.model.dt.tree(model = bst))
+dt <- xgb.model.dt.tree(bst)

 # How to match feature names of splits that are following a current 'Yes' branch:
 merge(
--- a/R-package/man/xgb.plot.multi.trees.Rd
+++ b/R-package/man/xgb.plot.multi.trees.Rd
@@ -6,7 +6,6 @@
 \usage{
 xgb.plot.multi.trees(
  model,
-  feature_names = NULL,
  features_keep = 5,
  plot_width = NULL,
  plot_height = NULL,
@@ -15,10 +14,8 @@ xgb.plot.multi.trees(
 )
 }
 \arguments{
-\item{model}{Object of class \code{xgb.Booster}.}
-
-\item{feature_names}{Character vector used to overwrite the feature names
-of the model. The default (\code{NULL}) uses the original feature names.}
+\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
+\link{setinfo}), they will be used in the output from this function.}

 \item{features_keep}{Number of features to keep in each position of the multi trees,
 by default 5.}
--- a/R-package/man/xgb.plot.tree.Rd
+++ b/R-package/man/xgb.plot.tree.Rd
@@ -5,7 +5,6 @@
 \title{Plot boosted trees}
 \usage{
 xgb.plot.tree(
-  feature_names = NULL,
  model = NULL,
  trees = NULL,
  plot_width = NULL,
@@ -17,10 +16,8 @@ xgb.plot.tree(
 )
 }
 \arguments{
-\item{feature_names}{Character vector used to overwrite the feature names
-of the model. The default (\code{NULL}) uses the original feature names.}
-
-\item{model}{Object of class \code{xgb.Booster}.}
+\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
+\link{setinfo}), they will be used in the output from this function.}

 \item{trees}{An integer vector of tree indices that should be used.
 The default (\code{NULL}) uses all trees.
--- a/R-package/man/xgb.slice.Booster.Rd
+++ b/R-package/man/xgb.slice.Booster.Rd
@@ -0,0 +1,57 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.Booster.R
+\name{xgb.slice.Booster}
+\alias{xgb.slice.Booster}
+\alias{[.xgb.Booster}
+\title{Slice Booster by Rounds}
+\usage{
+xgb.slice.Booster(
+  model,
+  start,
+  end = xgb.get.num.boosted.rounds(model),
+  step = 1L
+)
+
+\method{[}{xgb.Booster}(x, i)
+}
+\arguments{
+\item{model, x}{A fitted \code{xgb.Booster} object, which is to be sliced by taking only a subset
+of its rounds / iterations.}
+
+\item{start}{Start of the slice (base-1 and inclusive, like R's \link{seq}).}
+
+\item{end}{End of the slice (base-1 and inclusive, like R's \link{seq}).
+
+Passing a value of zero here is equivalent to passing the full number of rounds in the
+booster object.}
+
+\item{step}{Step size of the slice. Passing '1' will take every round in the sequence defined by
+\verb{(start, end)}, while passing '2' will take every second value, and so on.}
+
+\item{i}{The indices - must be an increasing sequence as generated by e.g. \code{seq(...)}.}
+}
+\value{
+A sliced booster object containing only the requested rounds.
+}
+\description{
+Creates a new booster including only a selected range of rounds / iterations
+from an existing booster, as given by the sequence \code{seq(start, end, step)}.
+}
+\details{
+Note that any R attributes that the booster might have, will not be copied into
+the resulting object.
+}
+\examples{
+data(mtcars)
+y <- mtcars$mpg
+x <- as.matrix(mtcars[, -1])
+dm <- xgb.DMatrix(x, label = y, nthread = 1)
+model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
+model_slice <- xgb.slice.Booster(model, 1, 3)
+# Prediction for first three rounds
+predict(model, x, predleaf = TRUE)[, 1:3]
+
+# The new model has only those rounds, so
+# a full prediction from it is equivalent
+predict(model_slice, x, predleaf = TRUE)
+}
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@@ -42,6 +42,8 @@ extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterUpdateOneIter_R(SEXP, SEXP, SEXP);
 extern SEXP XGCheckNullPtr_R(SEXP);
+extern SEXP XGSetArrayDimInplace_R(SEXP, SEXP);
+extern SEXP XGSetArrayDimNamesInplace_R(SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromFile_R(SEXP, SEXP);
@@ -62,6 +64,7 @@ extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP);
 extern SEXP XGBSetGlobalConfig_R(SEXP);
 extern SEXP XGBGetGlobalConfig_R(void);
 extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
+extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP);

 static const R_CallMethodDef CallEntries[] = {
  {"XGDuplicate_R",               (DL_FUNC) &XGDuplicate_R,               1},
@@ -90,6 +93,8 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBoosterSetParam_R",         (DL_FUNC) &XGBoosterSetParam_R,         3},
  {"XGBoosterUpdateOneIter_R",    (DL_FUNC) &XGBoosterUpdateOneIter_R,    3},
  {"XGCheckNullPtr_R",            (DL_FUNC) &XGCheckNullPtr_R,            1},
+  {"XGSetArrayDimInplace_R",      (DL_FUNC) &XGSetArrayDimInplace_R,      2},
+  {"XGSetArrayDimNamesInplace_R", (DL_FUNC) &XGSetArrayDimNamesInplace_R, 2},
  {"XGDMatrixCreateFromCSC_R",    (DL_FUNC) &XGDMatrixCreateFromCSC_R,    6},
  {"XGDMatrixCreateFromCSR_R",    (DL_FUNC) &XGDMatrixCreateFromCSR_R,    6},
  {"XGDMatrixCreateFromFile_R",   (DL_FUNC) &XGDMatrixCreateFromFile_R,   2},
@@ -110,6 +115,7 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBSetGlobalConfig_R",        (DL_FUNC) &XGBSetGlobalConfig_R,        1},
  {"XGBGetGlobalConfig_R",        (DL_FUNC) &XGBGetGlobalConfig_R,        0},
  {"XGBoosterFeatureScore_R",     (DL_FUNC) &XGBoosterFeatureScore_R,     2},
+  {"XGBoosterSlice_R",            (DL_FUNC) &XGBoosterSlice_R,            4},
  {NULL, NULL, 0}
 };

--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@@ -263,6 +263,16 @@ XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle) {
  return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == nullptr);
 }

+XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims) {
+  Rf_setAttrib(arr, R_DimSymbol, dims);
+  return R_NilValue;
+}
+
+XGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names) {
+  Rf_setAttrib(arr, R_DimNamesSymbol, dim_names);
+  return R_NilValue;
+}
+
 namespace {
 void _DMatrixFinalizer(SEXP ext) {
  R_API_BEGIN();
@@ -1279,3 +1289,18 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {

  return r_out;
 }
+
+XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {
+  SEXP out = Rf_protect(XGBMakeEmptyAltrep());
+  R_API_BEGIN();
+  BoosterHandle handle_out = nullptr;
+  CHECK_CALL(XGBoosterSlice(R_ExternalPtrAddr(handle),
+                            Rf_asInteger(begin_layer),
+                            Rf_asInteger(end_layer),
+                            Rf_asInteger(step),
+                            &handle_out));
+  XGBAltrepSetPointer(out, handle_out);
+  R_API_END();
+  Rf_unprotect(1);
+  return out;
+}
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -23,6 +23,22 @@
 */
 XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle);

+/*!
+ * \brief set the dimensions of an array in-place
+ * \param arr
+ * \param dims dimensions to set to the array
+ * \return NULL value
+ */
+XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims);
+
+/*!
+ * \brief set the names of the dimensions of an array in-place
+ * \param arr
+ * \param dim_names names for the dimensions to set
+ * \return NULL value
+ */
+XGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names);
+
 /*!
 * \brief Set global configuration
 * \param json_str a JSON string representing the list of key-value pairs
@@ -386,4 +402,14 @@ XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle);
 */
 XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);

+/*!
+ * \brief Slice a fitted booster model (by rounds)
+ * \param handle handle to the fitted booster
+ * \param begin_layer start of the slice
+ * \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round
+ * \param step step size of the slice
+ * \return The sliced booster with the requested rounds only
+ */
+XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);
+
 #endif  // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -33,15 +33,11 @@ test_that("train and predict binary classification", {
  pred <- predict(bst, test$data)
  expect_length(pred, 1611)

-  pred1 <- predict(bst, train$data, ntreelimit = 1)
+  pred1 <- predict(bst, train$data, iterationrange = c(1, 1))
  expect_length(pred1, 6513)
  err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
  err_log <- attributes(bst)$evaluation_log[1, train_error]
  expect_lt(abs(err_pred1 - err_log), 10e-6)
-
-  pred2 <- predict(bst, train$data, iterationrange = c(1, 2))
-  expect_length(pred1, 6513)
-  expect_equal(pred1, pred2)
 })

 test_that("parameter validation works", {
@@ -117,8 +113,8 @@ test_that("dart prediction works", {
    nrounds = nrounds,
    objective = "reg:squarederror"
  )
-  pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0)
-  pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds)
+  pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, iterationrange = NULL)
+  pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, iterationrange = c(1, nrounds))
  expect_true(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))

  pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)
@@ -139,8 +135,8 @@ test_that("dart prediction works", {
    data = dtrain,
    nrounds = nrounds
  )
-  pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, ntreelimit = 0)
-  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds)
+  pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, iterationrange = NULL)
+  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, iterationrange = c(1, nrounds))
  pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)

  expect_true(all(matrix(pred_by_train_0, byrow = TRUE) == matrix(pred_by_xgboost_0, byrow = TRUE)))
@@ -162,7 +158,7 @@ test_that("train and predict softprob", {
  )
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
-  expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
+  expect_equal(xgb.get.num.boosted.rounds(bst), 5)
  pred <- predict(bst, as.matrix(iris[, -5]))
  expect_length(pred, nrow(iris) * 3)
  # row sums add up to total probability of 1:
@@ -174,12 +170,12 @@ test_that("train and predict softprob", {
  err <- sum(pred_labels != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)
  # manually calculate error at the 1st iteration:
-  mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 1)
+  mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
  pred_labels <- max.col(mpred) - 1
  err <- sum(pred_labels != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6)

-  mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 2))
+  mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
  expect_equal(mpred, mpred1)

  d <- cbind(
@@ -213,7 +209,7 @@ test_that("train and predict softmax", {
  )
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
-  expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
+  expect_equal(xgb.get.num.boosted.rounds(bst), 5)

  pred <- predict(bst, as.matrix(iris[, -5]))
  expect_length(pred, nrow(iris))
@@ -233,19 +229,15 @@ test_that("train and predict RF", {
    watchlist = list(train = xgb.DMatrix(train$data, label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 1)
-  expect_equal(xgb.ntree(bst), 20)

  pred <- predict(bst, train$data)
  pred_err <- sum((pred > 0.5) != lb) / length(lb)
  expect_lt(abs(attributes(bst)$evaluation_log[1, train_error] - pred_err), 10e-6)
  # expect_lt(pred_err, 0.03)

-  pred <- predict(bst, train$data, ntreelimit = 20)
+  pred <- predict(bst, train$data, iterationrange = c(1, 1))
  pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
  expect_equal(pred_err_20, pred_err)
-
-  pred1 <- predict(bst, train$data, iterationrange = c(1, 2))
-  expect_equal(pred, pred1)
 })

 test_that("train and predict RF with softprob", {
@@ -261,7 +253,6 @@ test_that("train and predict RF with softprob", {
    watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 15)
-  expect_equal(xgb.ntree(bst), 15 * 3 * 4)
  # predict for all iterations:
  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
  expect_equal(dim(pred), c(nrow(iris), 3))
@@ -269,7 +260,7 @@ test_that("train and predict RF with softprob", {
  err <- sum(pred_labels != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
  # predict for 7 iterations and adjust for 4 parallel trees per iteration
-  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 7 * 4)
+  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 7))
  err <- sum((max.col(pred) - 1) != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6)
 })
--- a/R-package/tests/testthat/test_booster_slicing.R
+++ b/R-package/tests/testthat/test_booster_slicing.R
@@ -0,0 +1,67 @@
+context("testing xgb.Booster slicing")
+
+data(agaricus.train, package = "xgboost")
+dm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1)
+# Note: here need large step sizes in order for the predictions
+# to have substantially different leaf assignments on each tree
+model <- xgb.train(
+  params = list(objective = "binary:logistic", nthread = 1, max_depth = 4, eta = 0.5),
+  data = dm,
+  nrounds = 20
+)
+pred <- predict(model, dm, predleaf = TRUE, reshape = TRUE)
+
+test_that("Slicing full model", {
+  new_model <- xgb.slice.Booster(model, 1, 0)
+  expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
+
+  new_model <- model[]
+  expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
+
+  new_model <- model[1:length(model)] # nolint
+  expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
+})
+
+test_that("Slicing sequence from start", {
+  new_model <- xgb.slice.Booster(model, 1, 10)
+  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+  expect_equal(new_pred, pred[, seq(1, 10)])
+
+  new_model <- model[1:10]
+  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+  expect_equal(new_pred, pred[, seq(1, 10)])
+})
+
+test_that("Slicing sequence from middle", {
+  new_model <- xgb.slice.Booster(model, 5, 10)
+  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+  expect_equal(new_pred, pred[, seq(5, 10)])
+
+  new_model <- model[5:10]
+  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+  expect_equal(new_pred, pred[, seq(5, 10)])
+})
+
+test_that("Slicing with non-unit step", {
+  for (s in 2:5) {
+    new_model <- xgb.slice.Booster(model, 1, 17, s)
+    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+    expect_equal(new_pred, pred[, seq(1, 17, s)])
+
+    new_model <- model[seq(1, 17, s)]
+    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+    expect_equal(new_pred, pred[, seq(1, 17, s)])
+  }
+})
+
+test_that("Slicing with non-unit step from middle", {
+  for (s in 2:5) {
+    new_model <- xgb.slice.Booster(model, 4, 17, s)
+    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+    expect_equal(new_pred, pred[, seq(4, 17, s)])
+
+    new_model <- model[seq(4, 17, s)]
+    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+    expect_equal(new_pred, pred[, seq(4, 17, s)])
+  }
+})
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -211,12 +211,11 @@ test_that("early stopping xgb.train works", {
  , "Stopping. Best iteration")
  expect_false(is.null(xgb.attr(bst, "best_iteration")))
  expect_lt(xgb.attr(bst, "best_iteration"), 19)
-  expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))

  pred <- predict(bst, dtest)
  expect_equal(length(pred), 1611)
  err_pred <- err(ltest, pred)
-  err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_error]
+  err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_error]
  expect_equal(err_log, err_pred, tolerance = 5e-6)

  set.seed(11)
@@ -231,8 +230,7 @@ test_that("early stopping xgb.train works", {
  loaded <- xgb.load(fname)

  expect_false(is.null(xgb.attr(loaded, "best_iteration")))
-  expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
-  expect_equal(xgb.attr(loaded, "best_ntreelimit"), xgb.attr(bst, "best_ntreelimit"))
+  expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_iteration"))
 })

 test_that("early stopping using a specific metric works", {
@@ -245,12 +243,11 @@ test_that("early stopping using a specific metric works", {
  , "Stopping. Best iteration")
  expect_false(is.null(xgb.attr(bst, "best_iteration")))
  expect_lt(xgb.attr(bst, "best_iteration"), 19)
-  expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))

-  pred <- predict(bst, dtest, ntreelimit = xgb.attr(bst, "best_ntreelimit"))
+  pred <- predict(bst, dtest, iterationrange = c(1, xgb.attr(bst, "best_iteration") + 1))
  expect_equal(length(pred), 1611)
  logloss_pred <- sum(-ltest * log(pred) - (1 - ltest) * log(1 - pred)) / length(ltest)
-  logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_logloss]
+  logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_logloss]
  expect_equal(logloss_log, logloss_pred, tolerance = 1e-5)
 })

@@ -286,7 +283,6 @@ test_that("early stopping xgb.cv works", {
  , "Stopping. Best iteration")
  expect_false(is.null(cv$best_iteration))
  expect_lt(cv$best_iteration, 19)
-  expect_equal(cv$best_iteration, cv$best_ntreelimit)
  # the best error is min error:
  expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)])
 })
@@ -354,3 +350,44 @@ test_that("prediction in xgb.cv for softprob works", {
  expect_equal(dim(cv$pred), c(nrow(iris), 3))
  expect_lt(diff(range(rowSums(cv$pred))), 1e-6)
 })
+
+test_that("prediction in xgb.cv works for multi-quantile", {
+  data(mtcars)
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = y, nthread = 1)
+  cv <- xgb.cv(
+    data = dm,
+    params = list(
+      objective = "reg:quantileerror",
+      quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),
+      nthread = 1
+    ),
+    nrounds = 5,
+    nfold = 3,
+    prediction = TRUE,
+    verbose = 0
+  )
+  expect_equal(dim(cv$pred), c(nrow(x), 5))
+})
+
+test_that("prediction in xgb.cv works for multi-output", {
+  data(mtcars)
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)
+  cv <- xgb.cv(
+    data = dm,
+    params = list(
+      tree_method = "hist",
+      multi_strategy = "multi_output_tree",
+      objective = "reg:squarederror",
+      nthread = n_threads
+    ),
+    nrounds = 5,
+    nfold = 3,
+    prediction = TRUE,
+    verbose = 0
+  )
+  expect_equal(dim(cv$pred), c(nrow(x), 2))
+})
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@@ -72,10 +72,10 @@ test_that("gblinear early stopping works", {
  booster <- xgb.train(
    param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
  )
-  expect_equal(xgb.attr(booster, "best_iteration"), 5)
+  expect_equal(xgb.attr(booster, "best_iteration"), 4)
  predt_es <- predict(booster, dtrain)

-  n <- xgb.attr(booster, "best_iteration") + es_round
+  n <- xgb.attr(booster, "best_iteration") + es_round + 1
  booster <- xgb.train(
    param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
  )
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -282,9 +282,6 @@ test_that("xgb.model.dt.tree works with and without feature names", {
    expect_equal(dim(dt.tree), c(188, 10))
  expect_output(str(dt.tree), 'Feature.*\\"Age\\"')

-  dt.tree.0 <- xgb.model.dt.tree(model = bst.Tree)
-  expect_equal(dt.tree, dt.tree.0)
-
  # when model contains no feature names:
  dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.unnamed)
  expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
@@ -304,7 +301,7 @@ test_that("xgb.model.dt.tree throws error for gblinear", {

 test_that("xgb.importance works with and without feature names", {
  .skip_if_vcd_not_available()
-  importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree)
+  importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree.unnamed)
  if (!flag_32bit)
    expect_equal(dim(importance.Tree), c(7, 4))
  expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency"))
@@ -330,9 +327,8 @@ test_that("xgb.importance works with and without feature names", {
  importance <- xgb.importance(feature_names = feature.names, model = bst.Tree, trees = trees)

  importance_from_dump <- function() {
-    model_text_dump <- xgb.dump(model = bst.Tree.unnamed, with_stats = TRUE, trees = trees)
+    model_text_dump <- xgb.dump(model = bst.Tree, with_stats = TRUE, trees = trees)
    imp <- xgb.model.dt.tree(
-      feature_names = feature.names,
      text = model_text_dump,
      trees = trees
    )[
--- a/R-package/tests/testthat/test_ranking.R
+++ b/R-package/tests/testthat/test_ranking.R
@@ -44,7 +44,7 @@ test_that('Test ranking with weighted data', {
  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
  for (i in 1:10) {
-    pred <- predict(bst, newdata = dtrain, ntreelimit = i)
+    pred <- predict(bst, newdata = dtrain, iterationrange = c(1, i))
    # is_sorted[i]: is i-th group correctly sorted by the ranking predictor?
    is_sorted <- lapply(seq(1, 20, by = 5),
      function(k) {