merge latest changes

2024-01-24 13:30:08 -08:00 · 2024-01-24 13:30:08 -08:00 · 3fe874078c
commit 3fe874078c
parent 069cf1d019 d12cc1090a
83 changed files with 1408 additions and 1273 deletions
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@ -54,7 +54,6 @@ jobs:
      matrix:
        config:
          - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'}
          - {os: windows-latest, r: '4.3.0', compiler: 'msvc', build: 'cmake'}
    env:
      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
      RSPM: ${{ matrix.config.rspm }}
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@ -65,6 +65,6 @@ Imports:
    data.table (>= 1.9.6),
    jsonlite (>= 1.0)
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0
 Encoding: UTF-8
 SystemRequirements: GNU make, C++17
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 S3method("[",xgb.Booster)
 S3method("[",xgb.DMatrix)
 S3method("dimnames<-",xgb.DMatrix)
 S3method(coef,xgb.Booster)
@ -7,6 +8,7 @@ S3method(dim,xgb.DMatrix)
 S3method(dimnames,xgb.DMatrix)
 S3method(getinfo,xgb.Booster)
 S3method(getinfo,xgb.DMatrix)
 S3method(length,xgb.Booster)
 S3method(predict,xgb.Booster)
 S3method(print,xgb.Booster)
 S3method(print,xgb.DMatrix)
@ -62,6 +64,7 @@ export(xgb.plot.tree)
 export(xgb.save)
 export(xgb.save.raw)
 export(xgb.set.config)
 export(xgb.slice.Booster)
 export(xgb.train)
 export(xgboost)
 import(methods)
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@ -280,7 +280,6 @@ cb.reset.parameters <- function(new_params) {
 #' \code{iteration},
 #' \code{begin_iteration},
 #' \code{end_iteration},
 #' \code{num_parallel_tree}.
 #'
 #' @seealso
 #' \code{\link{callbacks}},
@ -291,7 +290,6 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
                          metric_name = NULL, verbose = TRUE) {
  # state variables
  best_iteration <- -1
  best_ntreelimit <- -1
  best_score <- Inf
  best_msg <- NULL
  metric_idx <- 1
@ -358,12 +356,10 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
        # If the difference is due to floating-point truncation, update best_score
        best_score <- attr_best_score
      }
-      xgb.attr(env$bst, "best_iteration") <- best_iteration
+      xgb.attr(env$bst, "best_iteration") <- best_iteration - 1
      xgb.attr(env$bst, "best_ntreelimit") <- best_ntreelimit
      xgb.attr(env$bst, "best_score") <- best_score
    } else {
      env$basket$best_iteration <- best_iteration
      env$basket$best_ntreelimit <- best_ntreelimit
    }
  }
@ -385,14 +381,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
      )
      best_score <<- score
      best_iteration <<- i
      best_ntreelimit <<- best_iteration * env$num_parallel_tree
      # save the property to attributes, so they will occur in checkpoint
      if (!is.null(env$bst)) {
        xgb.attributes(env$bst) <- list(
          best_iteration = best_iteration - 1, # convert to 0-based index
          best_score = best_score,
-          best_msg = best_msg,
+          best_msg = best_msg
-          best_ntreelimit = best_ntreelimit)
+        )
      }
    } else if (i - best_iteration >= stopping_rounds) {
      env$stop_condition <- TRUE
@ -475,8 +470,6 @@ cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
 #' \code{data},
 #' \code{end_iteration},
 #' \code{params},
 #' \code{num_parallel_tree},
 #' \code{num_class}.
 #'
 #' @return
 #' Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
@ -499,19 +492,21 @@ cb.cv.predict <- function(save_models = FALSE) {
      stop("'cb.cv.predict' callback requires 'basket' and 'bst_folds' lists in its calling frame")
    N <- nrow(env$data)
-    pred <-
+    pred <- NULL
      if (env$num_class > 1) {
        matrix(NA_real_, N, env$num_class)
      } else {
        rep(NA_real_, N)
      }
-    iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration) + 1)
+    iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration))
    if (NVL(env$params[['booster']], '') == 'gblinear') {
-      iterationrange <- c(1, 1)  # must be 0 for gblinear
+      iterationrange <- "all"
    }
    for (fd in env$bst_folds) {
      pr <- predict(fd$bst, fd$watchlist[[2]], iterationrange = iterationrange, reshape = TRUE)
      if (is.null(pred)) {
        if (NCOL(pr) > 1L) {
          pred <- matrix(NA_real_, N, ncol(pr))
        } else {
          pred <- matrix(NA_real_, N)
        }
      }
      if (is.matrix(pred)) {
        pred[fd$index, ] <- pr
      } else {
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -208,7 +208,7 @@ xgb.iter.eval <- function(bst, watchlist, iter, feval) {
    res <- sapply(seq_along(watchlist), function(j) {
      w <- watchlist[[j]]
      ## predict using all trees
-      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = c(1, 1))
+      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
      eval_res <- feval(preds, w)
      out <- eval_res$value
      names(out) <- paste0(evnames[j], "-", eval_res$metric)
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@ -89,7 +89,6 @@ xgb.get.handle <- function(object) {
 #' @param outputmargin Whether the prediction should be returned in the form of original untransformed
 #'        sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
 #'        logistic regression would return log-odds instead of probabilities.
 #' @param ntreelimit Deprecated, use `iterationrange` instead.
 #' @param predleaf Whether to predict pre-tree leaf indices.
 #' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
 #' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
@ -99,11 +98,17 @@ xgb.get.handle <- function(object) {
 #'        or `predinteraction` is `TRUE`.
 #' @param training Whether the predictions are used for training. For dart booster,
 #'        training predicting will perform dropout.
-#' @param iterationrange Specifies which trees are used in prediction. For
+#' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
-#'        example, take a random forest with 100 rounds.
+#'        a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.
-#'        With `iterationrange=c(1, 21)`, only the trees built during `[1, 21)` (half open set)
+#'        base-1 indexing, and inclusive of both ends).
-#'        rounds are used in this prediction. The index is 1-based just like an R vector. When set
+#'
-#'        to `c(1, 1)`, XGBoost will use all trees.
+#'        For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
 #'        predict using only the first one.
 #'
 #'        If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
 #'        of the iterations (rounds) otherwise.
 #'
 #'        If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
 #' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
 #'        type and shape of predictions are invariant to the model type.
 #' @param ... Not used.
@ -189,7 +194,7 @@ xgb.get.handle <- function(object) {
 #' # use all trees by default
 #' pred <- predict(bst, test$data)
 #' # use only the 1st tree
-#' pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
+#' pred1 <- predict(bst, test$data, iterationrange = c(1, 1))
 #'
 #' # Predicting tree leafs:
 #' # the result is an nsamples X ntrees matrix
@ -260,11 +265,11 @@ xgb.get.handle <- function(object) {
 #' all.equal(pred, pred_labels)
 #' # prediction from using only 5 iterations should result
 #' # in the same error as seen in iteration 5:
-#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
+#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
 #' sum(pred5 != lb) / length(lb)
 #'
 #' @export
-predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
+predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
                                predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
                                reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
  if (!inherits(newdata, "xgb.DMatrix")) {
@ -275,25 +280,21 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
    )
  }
  if (NVL(xgb.booster_type(object), '') == 'gblinear' || is.null(ntreelimit))
    ntreelimit <- 0
-  if (ntreelimit != 0 && is.null(iterationrange)) {
+  if (!is.null(iterationrange)) {
-    ## only ntreelimit, initialize iteration range
+    if (is.character(iterationrange)) {
-    iterationrange <- c(0, 0)
+      stopifnot(iterationrange == "all")
-  } else if (ntreelimit == 0 && !is.null(iterationrange)) {
+      iterationrange <- c(0, 0)
-    ## only iteration range, handle 1-based indexing
+    } else {
-    iterationrange <- c(iterationrange[1] - 1, iterationrange[2] - 1)
+      iterationrange[1] <- iterationrange[1] - 1 # base-0 indexing
-  } else if (ntreelimit != 0 && !is.null(iterationrange)) {
+    }
    ## both are specified, let libgxgboost throw an error
  } else {
    ## no limit is supplied, use best
    best_iteration <- xgb.best_iteration(object)
    if (is.null(best_iteration)) {
      iterationrange <- c(0, 0)
    } else {
-      ## We don't need to + 1 as R is 1-based index.
+      iterationrange <- c(0, as.integer(best_iteration) + 1L)
      iterationrange <- c(0, as.integer(best_iteration))
    }
  }
  ## Handle the 0 length values.
@ -312,7 +313,6 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
    strict_shape = box(TRUE),
    iteration_begin = box(as.integer(iterationrange[1])),
    iteration_end = box(as.integer(iterationrange[2])),
    ntree_limit = box(as.integer(ntreelimit)),
    type = box(as.integer(0))
  )
@ -343,24 +343,24 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
  )
  names(predts) <- c("shape", "results")
  shape <- predts$shape
-  ret <- predts$results
+  arr <- predts$results
-  n_ret <- length(ret)
+  n_ret <- length(arr)
  n_row <- nrow(newdata)
  if (n_row != shape[1]) {
    stop("Incorrect predict shape.")
  }
-  arr <- array(data = ret, dim = rev(shape))
+  .Call(XGSetArrayDimInplace_R, arr, rev(shape))
  cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "BIAS") else NULL
  n_groups <- shape[2]
  ## Needed regardless of whether strict shape is being used.
  if (predcontrib) {
-    dimnames(arr) <- list(cnames, NULL, NULL)
+    .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, NULL, NULL))
  } else if (predinteraction) {
-    dimnames(arr) <- list(cnames, cnames, NULL, NULL)
+    .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, cnames, NULL, NULL))
  }
  if (strict_shape) {
    return(arr) # strict shape is calculated by libxgboost uniformly.
@ -368,43 +368,51 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
  if (predleaf) {
    ## Predict leaf
-    arr <- if (n_ret == n_row) {
+    if (n_ret == n_row) {
-      matrix(arr, ncol = 1)
+      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
    } else {
-      matrix(arr, nrow = n_row, byrow = TRUE)
+      arr <- matrix(arr, nrow = n_row, byrow = TRUE)
    }
  } else if (predcontrib) {
    ## Predict contribution
    arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
-    arr <- if (n_ret == n_row) {
+    if (n_ret == n_row) {
-      matrix(arr, ncol =  1, dimnames = list(NULL, cnames))
+      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
      .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
    } else if (n_groups != 1) {
      ## turns array into list of matrices
-      lapply(seq_len(n_groups), function(g) arr[g, , ])
+      arr <- lapply(seq_len(n_groups), function(g) arr[g, , ])
    } else {
      ## remove the first axis (group)
-      dn <- dimnames(arr)
+      newdim <- dim(arr)[2:3]
-      matrix(arr[1, , ], nrow = dim(arr)[2], ncol = dim(arr)[3], dimnames = c(dn[2], dn[3]))
+      newdn <- dimnames(arr)[2:3]
      arr <- arr[1, , ]
      .Call(XGSetArrayDimInplace_R, arr, newdim)
      .Call(XGSetArrayDimNamesInplace_R, arr, newdn)
    }
  } else if (predinteraction) {
    ## Predict interaction
    arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
-    arr <- if (n_ret == n_row) {
+    if (n_ret == n_row) {
-      matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
+      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
      .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
    } else if (n_groups != 1) {
      ## turns array into list of matrices
-      lapply(seq_len(n_groups), function(g) arr[g, , , ])
+      arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ])
    } else {
      ## remove the first axis (group)
      arr <- arr[1, , , , drop = FALSE]
-      array(arr, dim = dim(arr)[2:4], dimnames(arr)[2:4])
+      newdim <- dim(arr)[2:4]
      newdn <- dimnames(arr)[2:4]
      .Call(XGSetArrayDimInplace_R, arr, newdim)
      .Call(XGSetArrayDimNamesInplace_R, arr, newdn)
    }
  } else {
    ## Normal prediction
-    arr <- if (reshape && n_groups != 1) {
+    if (reshape && n_groups != 1) {
-      matrix(arr, ncol = n_groups, byrow = TRUE)
+      arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
    } else {
-      as.vector(ret)
+      .Call(XGSetArrayDimInplace_R, arr, NULL)
    }
  }
  return(arr)
@ -492,7 +500,7 @@ xgb.attr <- function(object, name) {
    return(NULL)
  }
  if (!is.null(out)) {
-    if (name %in% c("best_iteration", "best_ntreelimit", "best_score")) {
+    if (name %in% c("best_iteration", "best_score")) {
      out <- as.numeric(out)
    }
  }
@ -685,16 +693,94 @@ setinfo.xgb.Booster <- function(object, name, info) {
 }
 #' @title Get number of boosting in a fitted booster
-#' @param model A fitted `xgb.Booster` model.
+#' @param model,x A fitted `xgb.Booster` model.
 #' @return The number of rounds saved in the model, as an integer.
 #' @details Note that setting booster parameters related to training
 #' continuation / updates through \link{xgb.parameters<-} will reset the
 #' number of rounds to zero.
 #' @export
 #' @rdname xgb.get.num.boosted.rounds
 xgb.get.num.boosted.rounds <- function(model) {
  return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model)))
 }
 #' @rdname xgb.get.num.boosted.rounds
 #' @export
 length.xgb.Booster <- function(x) {
  return(xgb.get.num.boosted.rounds(x))
 }
 #' @title Slice Booster by Rounds
 #' @description Creates a new booster including only a selected range of rounds / iterations
 #' from an existing booster, as given by the sequence `seq(start, end, step)`.
 #' @details Note that any R attributes that the booster might have, will not be copied into
 #' the resulting object.
 #' @param model,x A fitted `xgb.Booster` object, which is to be sliced by taking only a subset
 #' of its rounds / iterations.
 #' @param start Start of the slice (base-1 and inclusive, like R's \link{seq}).
 #' @param end End of the slice (base-1 and inclusive, like R's \link{seq}).
 #'
 #' Passing a value of zero here is equivalent to passing the full number of rounds in the
 #' booster object.
 #' @param step Step size of the slice. Passing '1' will take every round in the sequence defined by
 #' `(start, end)`, while passing '2' will take every second value, and so on.
 #' @return A sliced booster object containing only the requested rounds.
 #' @examples
 #' data(mtcars)
 #' y <- mtcars$mpg
 #' x <- as.matrix(mtcars[, -1])
 #' dm <- xgb.DMatrix(x, label = y, nthread = 1)
 #' model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
 #' model_slice <- xgb.slice.Booster(model, 1, 3)
 #' # Prediction for first three rounds
 #' predict(model, x, predleaf = TRUE)[, 1:3]
 #'
 #' # The new model has only those rounds, so
 #' # a full prediction from it is equivalent
 #' predict(model_slice, x, predleaf = TRUE)
 #' @export
 #' @rdname xgb.slice.Booster
 xgb.slice.Booster <- function(model, start, end = xgb.get.num.boosted.rounds(model), step = 1L) {
  # This makes the slice mimic the behavior of R's 'seq',
  # which truncates on the end of the slice when the step
  # doesn't reach it.
  if (end > start && step > 1) {
    d <- (end - start + 1) / step
    if (d != floor(d)) {
      end <- start + step * ceiling(d) - 1
    }
  }
  return(
    .Call(
      XGBoosterSlice_R,
      xgb.get.handle(model),
      start - 1,
      end,
      step
    )
  )
 }
 #' @export
 #' @rdname xgb.slice.Booster
 #' @param i The indices - must be an increasing sequence as generated by e.g. `seq(...)`.
 `[.xgb.Booster` <- function(x, i) {
  if (missing(i)) {
    return(xgb.slice.Booster(x, 1, 0))
  }
  if (length(i) == 1) {
    return(xgb.slice.Booster(x, i, i))
  }
  steps <- diff(i)
  if (any(steps < 0)) {
    stop("Can only slice booster with ascending sequences.")
  }
  if (length(unique(steps)) > 1) {
    stop("Can only slice booster with fixed-step sequences.")
  }
  return(xgb.slice.Booster(x, i[1L], i[length(i)], steps[1L]))
 }
 #' @title Get Features Names from Booster
 #' @description Returns the feature / variable / column names from a fitted
 #' booster object, which are set automatically during the call to \link{xgb.train}
@ -710,12 +796,6 @@ variable.names.xgb.Booster <- function(object, ...) {
  return(getinfo(object, "feature_name"))
 }
 xgb.ntree <- function(bst) {
  config <- xgb.config(bst)
  out <- strtoi(config$learner$gradient_booster$gbtree_model_param$num_trees)
  return(out)
 }
 xgb.nthread <- function(bst) {
  config <- xgb.config(bst)
  out <- strtoi(config$learner$generic_param$nthread)
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@ -103,7 +103,6 @@
 #'         parameter or randomly generated.
 #'   \item \code{best_iteration} iteration number with the best evaluation metric value
 #'         (only available with early stopping).
 #'   \item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
 #'   \item \code{pred} CV prediction values available when \code{prediction} is set.
 #'         It is either vector or matrix (see \code{\link{cb.cv.predict}}).
 #'   \item \code{models} a list of the CV folds' models. It is only available with the explicit
@ -218,7 +217,6 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
  # extract parameters that can affect the relationship b/w #trees and #iterations
  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
  # those are fixed for CV (no training continuation)
  begin_iteration <- 1
@ -318,7 +316,7 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
      })
    }
-    for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
+    for (n in c('niter', 'best_iteration')) {
      if (is.null(x[[n]]))
        next
      cat(n, ': ', x[[n]], '\n', sep = '')
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@ -113,19 +113,12 @@
 #' xgb.importance(model = mbst)
 #'
 #' @export
-xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
+xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL,
                           data = NULL, label = NULL, target = NULL) {
  if (!(is.null(data) && is.null(label) && is.null(target)))
    warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
  if (is.null(feature_names)) {
    model_feature_names <- xgb.feature_names(model)
    if (NROW(model_feature_names)) {
      feature_names <- model_feature_names
    }
  }
  if (!(is.null(feature_names) || is.character(feature_names)))
    stop("feature_names: Has to be a character vector")
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@ -2,11 +2,8 @@
 #'
 #' Parse a boosted tree model text dump into a `data.table` structure.
 #'
-#' @param feature_names Character vector of feature names. If the model already
+#' @param model Object of class `xgb.Booster`. If it contains feature names (they can be set through
-#'        contains feature names, those will be used when \code{feature_names=NULL} (default value).
+#'        \link{setinfo}), they will be used in the output from this function.
 #'
 #'        Note that, if the model already contains feature names, it's \bold{not} possible to override them here.
 #' @param model Object of class `xgb.Booster`.
 #' @param text Character vector previously generated by the function [xgb.dump()]
 #'        (called with parameter `with_stats = TRUE`). `text` takes precedence over `model`.
 #' @param trees An integer vector of tree indices that should be used.
@ -58,7 +55,7 @@
 #'
 #' # This bst model already has feature_names stored with it, so those would be used when
 #' # feature_names is not set:
-#' (dt <- xgb.model.dt.tree(model = bst))
+#' dt <- xgb.model.dt.tree(bst)
 #'
 #' # How to match feature names of splits that are following a current 'Yes' branch:
 #' merge(
@ -69,7 +66,7 @@
 #' ]
 #'
 #' @export
-xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
+xgb.model.dt.tree <- function(model = NULL, text = NULL,
                              trees = NULL, use_int_id = FALSE, ...) {
  check.deprecation(...)
@ -79,24 +76,15 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
         "  (or NULL if 'model' was provided).")
  }
  model_feature_names <- NULL
  if (inherits(model, "xgb.Booster")) {
    model_feature_names <- xgb.feature_names(model)
    if (NROW(model_feature_names) && !is.null(feature_names)) {
      stop("'model' contains feature names. Cannot override them.")
    }
  }
  if (is.null(feature_names) && !is.null(model) && !is.null(model_feature_names))
    feature_names <- model_feature_names
  if (!(is.null(feature_names) || is.character(feature_names))) {
    stop("feature_names: must be a character vector")
  }
  if (!(is.null(trees) || is.numeric(trees))) {
    stop("trees: must be a vector of integers.")
  }
  feature_names <- NULL
  if (inherits(model, "xgb.Booster")) {
    feature_names <- xgb.feature_names(model)
  }
  from_text <- TRUE
  if (is.null(text)) {
    text <- xgb.dump(model = model, with_stats = TRUE)
@ -134,7 +122,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
  branch_rx_w_names <- paste0("\\d+:\\[(.+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
                              "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
  text_has_feature_names <- FALSE
-  if (NROW(model_feature_names)) {
+  if (NROW(feature_names)) {
    branch_rx <- branch_rx_w_names
    text_has_feature_names <- TRUE
  } else {
@ -148,9 +136,6 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
      }
    }
  }
  if (text_has_feature_names && is.null(model) && !is.null(feature_names)) {
    stop("'text' contains feature names. Cannot override them.")
  }
  branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
  td[
    isLeaf == FALSE,
--- a/R-package/R/xgb.plot.multi.trees.R
+++ b/R-package/R/xgb.plot.multi.trees.R
@ -62,13 +62,13 @@
 #' }
 #'
 #' @export
-xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, plot_width = NULL, plot_height = NULL,
+xgb.plot.multi.trees <- function(model, features_keep = 5, plot_width = NULL, plot_height = NULL,
                                 render = TRUE, ...) {
  if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
    stop("DiagrammeR is required for xgb.plot.multi.trees")
  }
  check.deprecation(...)
-  tree.matrix <- xgb.model.dt.tree(feature_names = feature_names, model = model)
+  tree.matrix <- xgb.model.dt.tree(model = model)
  # first number of the path represents the tree, then the following numbers are related to the path to follow
  # root init
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@ -2,9 +2,8 @@
 #'
 #' Read a tree model text dump and plot the model.
 #'
-#' @param feature_names Character vector used to overwrite the feature names
+#' @param model Object of class `xgb.Booster`. If it contains feature names (they can be set through
-#'        of the model. The default (`NULL`) uses the original feature names.
+#'        \link{setinfo}), they will be used in the output from this function.
 #' @param model Object of class `xgb.Booster`.
 #' @param trees An integer vector of tree indices that should be used.
 #'        The default (`NULL`) uses all trees.
 #'        Useful, e.g., in multiclass classification to get only
@ -103,7 +102,7 @@
 #' }
 #'
 #' @export
-xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
+xgb.plot.tree <- function(model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
                          render = TRUE, show_node_id = FALSE, style = c("R", "xgboost"), ...) {
  check.deprecation(...)
  if (!inherits(model, "xgb.Booster")) {
@ -120,17 +119,12 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
    if (NROW(trees) != 1L || !render || show_node_id) {
      stop("style='xgboost' is only supported for single, rendered tree, without node IDs.")
    }
    if (!is.null(feature_names)) {
      stop(
        "style='xgboost' cannot override 'feature_names'. Will automatically take them from the model."
      )
    }
    txt <- xgb.dump(model, dump_format = "dot")
    return(DiagrammeR::grViz(txt[[trees + 1]], width = plot_width, height = plot_height))
  }
-  dt <- xgb.model.dt.tree(feature_names = feature_names, model = model, trees = trees)
+  dt <- xgb.model.dt.tree(model = model, trees = trees)
  dt[, label := paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Gain)]
  if (show_node_id)
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -393,7 +393,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  # Note: it might look like these aren't used, but they need to be defined in this
  # environment for the callbacks for work correctly.
  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
  if (is_update && nrounds > niter_init)
    stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
--- a/R-package/demo/predict_first_ntree.R
+++ b/R-package/demo/predict_first_ntree.R
@ -15,7 +15,7 @@ cat('start testing prediction from first n trees\n')
 labels <- getinfo(dtest, 'label')
 ### predict using first 1 tree
-ypred1 <- predict(bst, dtest, ntreelimit = 1)
+ypred1 <- predict(bst, dtest, iterationrange = c(1, 1))
 # by default, we predict using all the trees
 ypred2 <- predict(bst, dtest)
--- a/R-package/man/cb.cv.predict.Rd
+++ b/R-package/man/cb.cv.predict.Rd
@ -35,8 +35,6 @@ Callback function expects the following values to be set in its calling frame:
 \code{data},
 \code{end_iteration},
 \code{params},
 \code{num_parallel_tree},
 \code{num_class}.
 }
 \seealso{
 \code{\link{callbacks}}
--- a/R-package/man/cb.early.stop.Rd
+++ b/R-package/man/cb.early.stop.Rd
@ -55,7 +55,6 @@ Callback function expects the following values to be set in its calling frame:
 \code{iteration},
 \code{begin_iteration},
 \code{end_iteration},
 \code{num_parallel_tree}.
 }
 \seealso{
 \code{\link{callbacks}},
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@ -9,7 +9,6 @@
  newdata,
  missing = NA,
  outputmargin = FALSE,
  ntreelimit = NULL,
  predleaf = FALSE,
  predcontrib = FALSE,
  approxcontrib = FALSE,
@ -36,8 +35,6 @@ missing values in data (e.g., 0 or some other extreme value).}
 sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
 logistic regression would return log-odds instead of probabilities.}
 \item{ntreelimit}{Deprecated, use \code{iterationrange} instead.}
 \item{predleaf}{Whether to predict pre-tree leaf indices.}
 \item{predcontrib}{Whether to return feature contributions to individual predictions (see Details).}
@ -53,11 +50,18 @@ or \code{predinteraction} is \code{TRUE}.}
 \item{training}{Whether the predictions are used for training. For dart booster,
 training predicting will perform dropout.}
-\item{iterationrange}{Specifies which trees are used in prediction. For
+\item{iterationrange}{Sequence of rounds/iterations from the model to use for prediction, specified by passing
-example, take a random forest with 100 rounds.
+a two-dimensional vector with the start and end numbers in the sequence (same format as R's \code{seq} - i.e.
-With \code{iterationrange=c(1, 21)}, only the trees built during \verb{[1, 21)} (half open set)
+base-1 indexing, and inclusive of both ends).
-rounds are used in this prediction. The index is 1-based just like an R vector. When set
+
-to \code{c(1, 1)}, XGBoost will use all trees.}
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
   predict using only the first one.
   If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
   of the iterations (rounds) otherwise.
   If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
 }\if{html}{\out{</div>}}}
 \item{strict_shape}{Default is \code{FALSE}. When set to \code{TRUE}, the output
 type and shape of predictions are invariant to the model type.}
@ -145,7 +149,7 @@ bst <- xgb.train(
 # use all trees by default
 pred <- predict(bst, test$data)
 # use only the 1st tree
-pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
+pred1 <- predict(bst, test$data, iterationrange = c(1, 1))
 # Predicting tree leafs:
 # the result is an nsamples X ntrees matrix
@ -216,7 +220,7 @@ str(pred)
 all.equal(pred, pred_labels)
 # prediction from using only 5 iterations should result
 # in the same error as seen in iteration 5:
-pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
+pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
 sum(pred5 != lb) / length(lb)
 }
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@ -135,7 +135,6 @@ It is created by the \code{\link{cb.evaluation.log}} callback.
 parameter or randomly generated.
 \item \code{best_iteration} iteration number with the best evaluation metric value
 (only available with early stopping).
 \item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
 \item \code{pred} CV prediction values available when \code{prediction} is set.
 It is either vector or matrix (see \code{\link{cb.cv.predict}}).
 \item \code{models} a list of the CV folds' models. It is only available with the explicit
--- a/R-package/man/xgb.get.num.boosted.rounds.Rd
+++ b/R-package/man/xgb.get.num.boosted.rounds.Rd
@ -2,12 +2,15 @@
 % Please edit documentation in R/xgb.Booster.R
 \name{xgb.get.num.boosted.rounds}
 \alias{xgb.get.num.boosted.rounds}
 \alias{length.xgb.Booster}
 \title{Get number of boosting in a fitted booster}
 \usage{
 xgb.get.num.boosted.rounds(model)
 \method{length}{xgb.Booster}(x)
 }
 \arguments{
-\item{model}{A fitted \code{xgb.Booster} model.}
+\item{model, x}{A fitted \code{xgb.Booster} model.}
 }
 \value{
 The number of rounds saved in the model, as an integer.
--- a/R-package/man/xgb.importance.Rd
+++ b/R-package/man/xgb.importance.Rd
@ -5,8 +5,8 @@
 \title{Feature importance}
 \usage{
 xgb.importance(
  feature_names = NULL,
  model = NULL,
  feature_names = getinfo(model, "feature_name"),
  trees = NULL,
  data = NULL,
  label = NULL,
@ -14,11 +14,11 @@ xgb.importance(
 )
 }
 \arguments{
 \item{model}{Object of class \code{xgb.Booster}.}
 \item{feature_names}{Character vector used to overwrite the feature names
 of the model. The default is \code{NULL} (use original feature names).}
 \item{model}{Object of class \code{xgb.Booster}.}
 \item{trees}{An integer vector of tree indices that should be included
 into the importance calculation (only for the "gbtree" booster).
 The default (\code{NULL}) parses all trees.
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@ -5,7 +5,6 @@
 \title{Parse model text dump}
 \usage{
 xgb.model.dt.tree(
  feature_names = NULL,
  model = NULL,
  text = NULL,
  trees = NULL,
@ -14,13 +13,8 @@ xgb.model.dt.tree(
 )
 }
 \arguments{
-\item{feature_names}{Character vector of feature names. If the model already
+\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
-contains feature names, those will be used when \code{feature_names=NULL} (default value).
+\link{setinfo}), they will be used in the output from this function.}
 \if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that, if the model already contains feature names, it's \\bold\{not\} possible to override them here.
 }\if{html}{\out{</div>}}}
 \item{model}{Object of class \code{xgb.Booster}.}
 \item{text}{Character vector previously generated by the function \code{\link[=xgb.dump]{xgb.dump()}}
 (called with parameter \code{with_stats = TRUE}). \code{text} takes precedence over \code{model}.}
@ -81,7 +75,7 @@ bst <- xgboost(
 # This bst model already has feature_names stored with it, so those would be used when
 # feature_names is not set:
-(dt <- xgb.model.dt.tree(model = bst))
+dt <- xgb.model.dt.tree(bst)
 # How to match feature names of splits that are following a current 'Yes' branch:
 merge(
--- a/R-package/man/xgb.plot.multi.trees.Rd
+++ b/R-package/man/xgb.plot.multi.trees.Rd
@ -6,7 +6,6 @@
 \usage{
 xgb.plot.multi.trees(
  model,
  feature_names = NULL,
  features_keep = 5,
  plot_width = NULL,
  plot_height = NULL,
@ -15,10 +14,8 @@ xgb.plot.multi.trees(
 )
 }
 \arguments{
-\item{model}{Object of class \code{xgb.Booster}.}
+\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
-
+\link{setinfo}), they will be used in the output from this function.}
 \item{feature_names}{Character vector used to overwrite the feature names
 of the model. The default (\code{NULL}) uses the original feature names.}
 \item{features_keep}{Number of features to keep in each position of the multi trees,
 by default 5.}
--- a/R-package/man/xgb.plot.tree.Rd
+++ b/R-package/man/xgb.plot.tree.Rd
@ -5,7 +5,6 @@
 \title{Plot boosted trees}
 \usage{
 xgb.plot.tree(
  feature_names = NULL,
  model = NULL,
  trees = NULL,
  plot_width = NULL,
@ -17,10 +16,8 @@ xgb.plot.tree(
 )
 }
 \arguments{
-\item{feature_names}{Character vector used to overwrite the feature names
+\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
-of the model. The default (\code{NULL}) uses the original feature names.}
+\link{setinfo}), they will be used in the output from this function.}
 \item{model}{Object of class \code{xgb.Booster}.}
 \item{trees}{An integer vector of tree indices that should be used.
 The default (\code{NULL}) uses all trees.
--- a/R-package/man/xgb.slice.Booster.Rd
+++ b/R-package/man/xgb.slice.Booster.Rd
@ -0,0 +1,57 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/xgb.Booster.R
 \name{xgb.slice.Booster}
 \alias{xgb.slice.Booster}
 \alias{[.xgb.Booster}
 \title{Slice Booster by Rounds}
 \usage{
 xgb.slice.Booster(
  model,
  start,
  end = xgb.get.num.boosted.rounds(model),
  step = 1L
 )
 \method{[}{xgb.Booster}(x, i)
 }
 \arguments{
 \item{model, x}{A fitted \code{xgb.Booster} object, which is to be sliced by taking only a subset
 of its rounds / iterations.}
 \item{start}{Start of the slice (base-1 and inclusive, like R's \link{seq}).}
 \item{end}{End of the slice (base-1 and inclusive, like R's \link{seq}).
 Passing a value of zero here is equivalent to passing the full number of rounds in the
 booster object.}
 \item{step}{Step size of the slice. Passing '1' will take every round in the sequence defined by
 \verb{(start, end)}, while passing '2' will take every second value, and so on.}
 \item{i}{The indices - must be an increasing sequence as generated by e.g. \code{seq(...)}.}
 }
 \value{
 A sliced booster object containing only the requested rounds.
 }
 \description{
 Creates a new booster including only a selected range of rounds / iterations
 from an existing booster, as given by the sequence \code{seq(start, end, step)}.
 }
 \details{
 Note that any R attributes that the booster might have, will not be copied into
 the resulting object.
 }
 \examples{
 data(mtcars)
 y <- mtcars$mpg
 x <- as.matrix(mtcars[, -1])
 dm <- xgb.DMatrix(x, label = y, nthread = 1)
 model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
 model_slice <- xgb.slice.Booster(model, 1, 3)
 # Prediction for first three rounds
 predict(model, x, predleaf = TRUE)[, 1:3]
 # The new model has only those rounds, so
 # a full prediction from it is equivalent
 predict(model_slice, x, predleaf = TRUE)
 }
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@ -42,6 +42,8 @@ extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterUpdateOneIter_R(SEXP, SEXP, SEXP);
 extern SEXP XGCheckNullPtr_R(SEXP);
 extern SEXP XGSetArrayDimInplace_R(SEXP, SEXP);
 extern SEXP XGSetArrayDimNamesInplace_R(SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromFile_R(SEXP, SEXP);
@ -62,6 +64,7 @@ extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP);
 extern SEXP XGBSetGlobalConfig_R(SEXP);
 extern SEXP XGBGetGlobalConfig_R(void);
 extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
 extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP);
 static const R_CallMethodDef CallEntries[] = {
  {"XGDuplicate_R",               (DL_FUNC) &XGDuplicate_R,               1},
@ -90,6 +93,8 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBoosterSetParam_R",         (DL_FUNC) &XGBoosterSetParam_R,         3},
  {"XGBoosterUpdateOneIter_R",    (DL_FUNC) &XGBoosterUpdateOneIter_R,    3},
  {"XGCheckNullPtr_R",            (DL_FUNC) &XGCheckNullPtr_R,            1},
  {"XGSetArrayDimInplace_R",      (DL_FUNC) &XGSetArrayDimInplace_R,      2},
  {"XGSetArrayDimNamesInplace_R", (DL_FUNC) &XGSetArrayDimNamesInplace_R, 2},
  {"XGDMatrixCreateFromCSC_R",    (DL_FUNC) &XGDMatrixCreateFromCSC_R,    6},
  {"XGDMatrixCreateFromCSR_R",    (DL_FUNC) &XGDMatrixCreateFromCSR_R,    6},
  {"XGDMatrixCreateFromFile_R",   (DL_FUNC) &XGDMatrixCreateFromFile_R,   2},
@ -110,6 +115,7 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBSetGlobalConfig_R",        (DL_FUNC) &XGBSetGlobalConfig_R,        1},
  {"XGBGetGlobalConfig_R",        (DL_FUNC) &XGBGetGlobalConfig_R,        0},
  {"XGBoosterFeatureScore_R",     (DL_FUNC) &XGBoosterFeatureScore_R,     2},
  {"XGBoosterSlice_R",            (DL_FUNC) &XGBoosterSlice_R,            4},
  {NULL, NULL, 0}
 };
--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@ -263,6 +263,16 @@ XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle) {
  return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == nullptr);
 }
 XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims) {
  Rf_setAttrib(arr, R_DimSymbol, dims);
  return R_NilValue;
 }
 XGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names) {
  Rf_setAttrib(arr, R_DimNamesSymbol, dim_names);
  return R_NilValue;
 }
 namespace {
 void _DMatrixFinalizer(SEXP ext) {
  R_API_BEGIN();
@ -1279,3 +1289,18 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {
  return r_out;
 }
 XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {
  SEXP out = Rf_protect(XGBMakeEmptyAltrep());
  R_API_BEGIN();
  BoosterHandle handle_out = nullptr;
  CHECK_CALL(XGBoosterSlice(R_ExternalPtrAddr(handle),
                            Rf_asInteger(begin_layer),
                            Rf_asInteger(end_layer),
                            Rf_asInteger(step),
                            &handle_out));
  XGBAltrepSetPointer(out, handle_out);
  R_API_END();
  Rf_unprotect(1);
  return out;
 }
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@ -23,6 +23,22 @@
 */
 XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle);
 /*!
 * \brief set the dimensions of an array in-place
 * \param arr
 * \param dims dimensions to set to the array
 * \return NULL value
 */
 XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims);
 /*!
 * \brief set the names of the dimensions of an array in-place
 * \param arr
 * \param dim_names names for the dimensions to set
 * \return NULL value
 */
 XGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names);
 /*!
 * \brief Set global configuration
 * \param json_str a JSON string representing the list of key-value pairs
@ -386,4 +402,14 @@ XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle);
 */
 XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);
 /*!
 * \brief Slice a fitted booster model (by rounds)
 * \param handle handle to the fitted booster
 * \param begin_layer start of the slice
 * \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round
 * \param step step size of the slice
 * \return The sliced booster with the requested rounds only
 */
 XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);
 #endif  // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -33,15 +33,11 @@ test_that("train and predict binary classification", {
  pred <- predict(bst, test$data)
  expect_length(pred, 1611)
-  pred1 <- predict(bst, train$data, ntreelimit = 1)
+  pred1 <- predict(bst, train$data, iterationrange = c(1, 1))
  expect_length(pred1, 6513)
  err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
  err_log <- attributes(bst)$evaluation_log[1, train_error]
  expect_lt(abs(err_pred1 - err_log), 10e-6)
  pred2 <- predict(bst, train$data, iterationrange = c(1, 2))
  expect_length(pred1, 6513)
  expect_equal(pred1, pred2)
 })
 test_that("parameter validation works", {
@ -117,8 +113,8 @@ test_that("dart prediction works", {
    nrounds = nrounds,
    objective = "reg:squarederror"
  )
-  pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0)
+  pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, iterationrange = NULL)
-  pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds)
+  pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, iterationrange = c(1, nrounds))
  expect_true(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))
  pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)
@ -139,8 +135,8 @@ test_that("dart prediction works", {
    data = dtrain,
    nrounds = nrounds
  )
-  pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, ntreelimit = 0)
+  pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, iterationrange = NULL)
-  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds)
+  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, iterationrange = c(1, nrounds))
  pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)
  expect_true(all(matrix(pred_by_train_0, byrow = TRUE) == matrix(pred_by_xgboost_0, byrow = TRUE)))
@ -162,7 +158,7 @@ test_that("train and predict softprob", {
  )
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
-  expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
+  expect_equal(xgb.get.num.boosted.rounds(bst), 5)
  pred <- predict(bst, as.matrix(iris[, -5]))
  expect_length(pred, nrow(iris) * 3)
  # row sums add up to total probability of 1:
@ -174,12 +170,12 @@ test_that("train and predict softprob", {
  err <- sum(pred_labels != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)
  # manually calculate error at the 1st iteration:
-  mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 1)
+  mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
  pred_labels <- max.col(mpred) - 1
  err <- sum(pred_labels != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6)
-  mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 2))
+  mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
  expect_equal(mpred, mpred1)
  d <- cbind(
@ -213,7 +209,7 @@ test_that("train and predict softmax", {
  )
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
-  expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
+  expect_equal(xgb.get.num.boosted.rounds(bst), 5)
  pred <- predict(bst, as.matrix(iris[, -5]))
  expect_length(pred, nrow(iris))
@ -233,19 +229,15 @@ test_that("train and predict RF", {
    watchlist = list(train = xgb.DMatrix(train$data, label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 1)
  expect_equal(xgb.ntree(bst), 20)
  pred <- predict(bst, train$data)
  pred_err <- sum((pred > 0.5) != lb) / length(lb)
  expect_lt(abs(attributes(bst)$evaluation_log[1, train_error] - pred_err), 10e-6)
  # expect_lt(pred_err, 0.03)
-  pred <- predict(bst, train$data, ntreelimit = 20)
+  pred <- predict(bst, train$data, iterationrange = c(1, 1))
  pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
  expect_equal(pred_err_20, pred_err)
  pred1 <- predict(bst, train$data, iterationrange = c(1, 2))
  expect_equal(pred, pred1)
 })
 test_that("train and predict RF with softprob", {
@ -261,7 +253,6 @@ test_that("train and predict RF with softprob", {
    watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 15)
  expect_equal(xgb.ntree(bst), 15 * 3 * 4)
  # predict for all iterations:
  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
  expect_equal(dim(pred), c(nrow(iris), 3))
@ -269,7 +260,7 @@ test_that("train and predict RF with softprob", {
  err <- sum(pred_labels != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
  # predict for 7 iterations and adjust for 4 parallel trees per iteration
-  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 7 * 4)
+  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 7))
  err <- sum((max.col(pred) - 1) != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6)
 })
--- a/R-package/tests/testthat/test_booster_slicing.R
+++ b/R-package/tests/testthat/test_booster_slicing.R
@ -0,0 +1,67 @@
 context("testing xgb.Booster slicing")
 data(agaricus.train, package = "xgboost")
 dm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1)
 # Note: here need large step sizes in order for the predictions
 # to have substantially different leaf assignments on each tree
 model <- xgb.train(
  params = list(objective = "binary:logistic", nthread = 1, max_depth = 4, eta = 0.5),
  data = dm,
  nrounds = 20
 )
 pred <- predict(model, dm, predleaf = TRUE, reshape = TRUE)
 test_that("Slicing full model", {
  new_model <- xgb.slice.Booster(model, 1, 0)
  expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
  new_model <- model[]
  expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
  new_model <- model[1:length(model)] # nolint
  expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
 })
 test_that("Slicing sequence from start", {
  new_model <- xgb.slice.Booster(model, 1, 10)
  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
  expect_equal(new_pred, pred[, seq(1, 10)])
  new_model <- model[1:10]
  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
  expect_equal(new_pred, pred[, seq(1, 10)])
 })
 test_that("Slicing sequence from middle", {
  new_model <- xgb.slice.Booster(model, 5, 10)
  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
  expect_equal(new_pred, pred[, seq(5, 10)])
  new_model <- model[5:10]
  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
  expect_equal(new_pred, pred[, seq(5, 10)])
 })
 test_that("Slicing with non-unit step", {
  for (s in 2:5) {
    new_model <- xgb.slice.Booster(model, 1, 17, s)
    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
    expect_equal(new_pred, pred[, seq(1, 17, s)])
    new_model <- model[seq(1, 17, s)]
    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
    expect_equal(new_pred, pred[, seq(1, 17, s)])
  }
 })
 test_that("Slicing with non-unit step from middle", {
  for (s in 2:5) {
    new_model <- xgb.slice.Booster(model, 4, 17, s)
    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
    expect_equal(new_pred, pred[, seq(4, 17, s)])
    new_model <- model[seq(4, 17, s)]
    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
    expect_equal(new_pred, pred[, seq(4, 17, s)])
  }
 })
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@ -211,12 +211,11 @@ test_that("early stopping xgb.train works", {
  , "Stopping. Best iteration")
  expect_false(is.null(xgb.attr(bst, "best_iteration")))
  expect_lt(xgb.attr(bst, "best_iteration"), 19)
  expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
  pred <- predict(bst, dtest)
  expect_equal(length(pred), 1611)
  err_pred <- err(ltest, pred)
-  err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_error]
+  err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_error]
  expect_equal(err_log, err_pred, tolerance = 5e-6)
  set.seed(11)
@ -231,8 +230,7 @@ test_that("early stopping xgb.train works", {
  loaded <- xgb.load(fname)
  expect_false(is.null(xgb.attr(loaded, "best_iteration")))
-  expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
+  expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_iteration"))
  expect_equal(xgb.attr(loaded, "best_ntreelimit"), xgb.attr(bst, "best_ntreelimit"))
 })
 test_that("early stopping using a specific metric works", {
@ -245,12 +243,11 @@ test_that("early stopping using a specific metric works", {
  , "Stopping. Best iteration")
  expect_false(is.null(xgb.attr(bst, "best_iteration")))
  expect_lt(xgb.attr(bst, "best_iteration"), 19)
  expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
-  pred <- predict(bst, dtest, ntreelimit = xgb.attr(bst, "best_ntreelimit"))
+  pred <- predict(bst, dtest, iterationrange = c(1, xgb.attr(bst, "best_iteration") + 1))
  expect_equal(length(pred), 1611)
  logloss_pred <- sum(-ltest * log(pred) - (1 - ltest) * log(1 - pred)) / length(ltest)
-  logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_logloss]
+  logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_logloss]
  expect_equal(logloss_log, logloss_pred, tolerance = 1e-5)
 })
@ -286,7 +283,6 @@ test_that("early stopping xgb.cv works", {
  , "Stopping. Best iteration")
  expect_false(is.null(cv$best_iteration))
  expect_lt(cv$best_iteration, 19)
  expect_equal(cv$best_iteration, cv$best_ntreelimit)
  # the best error is min error:
  expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)])
 })
@ -354,3 +350,44 @@ test_that("prediction in xgb.cv for softprob works", {
  expect_equal(dim(cv$pred), c(nrow(iris), 3))
  expect_lt(diff(range(rowSums(cv$pred))), 1e-6)
 })
 test_that("prediction in xgb.cv works for multi-quantile", {
  data(mtcars)
  y <- mtcars$mpg
  x <- as.matrix(mtcars[, -1])
  dm <- xgb.DMatrix(x, label = y, nthread = 1)
  cv <- xgb.cv(
    data = dm,
    params = list(
      objective = "reg:quantileerror",
      quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),
      nthread = 1
    ),
    nrounds = 5,
    nfold = 3,
    prediction = TRUE,
    verbose = 0
  )
  expect_equal(dim(cv$pred), c(nrow(x), 5))
 })
 test_that("prediction in xgb.cv works for multi-output", {
  data(mtcars)
  y <- mtcars$mpg
  x <- as.matrix(mtcars[, -1])
  dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)
  cv <- xgb.cv(
    data = dm,
    params = list(
      tree_method = "hist",
      multi_strategy = "multi_output_tree",
      objective = "reg:squarederror",
      nthread = n_threads
    ),
    nrounds = 5,
    nfold = 3,
    prediction = TRUE,
    verbose = 0
  )
  expect_equal(dim(cv$pred), c(nrow(x), 2))
 })
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@ -72,10 +72,10 @@ test_that("gblinear early stopping works", {
  booster <- xgb.train(
    param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
  )
-  expect_equal(xgb.attr(booster, "best_iteration"), 5)
+  expect_equal(xgb.attr(booster, "best_iteration"), 4)
  predt_es <- predict(booster, dtrain)
-  n <- xgb.attr(booster, "best_iteration") + es_round
+  n <- xgb.attr(booster, "best_iteration") + es_round + 1
  booster <- xgb.train(
    param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
  )
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@ -282,9 +282,6 @@ test_that("xgb.model.dt.tree works with and without feature names", {
    expect_equal(dim(dt.tree), c(188, 10))
  expect_output(str(dt.tree), 'Feature.*\\"Age\\"')
  dt.tree.0 <- xgb.model.dt.tree(model = bst.Tree)
  expect_equal(dt.tree, dt.tree.0)
  # when model contains no feature names:
  dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.unnamed)
  expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
@ -304,7 +301,7 @@ test_that("xgb.model.dt.tree throws error for gblinear", {
 test_that("xgb.importance works with and without feature names", {
  .skip_if_vcd_not_available()
-  importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree)
+  importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree.unnamed)
  if (!flag_32bit)
    expect_equal(dim(importance.Tree), c(7, 4))
  expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency"))
@ -330,9 +327,8 @@ test_that("xgb.importance works with and without feature names", {
  importance <- xgb.importance(feature_names = feature.names, model = bst.Tree, trees = trees)
  importance_from_dump <- function() {
-    model_text_dump <- xgb.dump(model = bst.Tree.unnamed, with_stats = TRUE, trees = trees)
+    model_text_dump <- xgb.dump(model = bst.Tree, with_stats = TRUE, trees = trees)
    imp <- xgb.model.dt.tree(
      feature_names = feature.names,
      text = model_text_dump,
      trees = trees
    )[
--- a/R-package/tests/testthat/test_ranking.R
+++ b/R-package/tests/testthat/test_ranking.R
@ -44,7 +44,7 @@ test_that('Test ranking with weighted data', {
  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
  for (i in 1:10) {
-    pred <- predict(bst, newdata = dtrain, ntreelimit = i)
+    pred <- predict(bst, newdata = dtrain, iterationrange = c(1, i))
    # is_sorted[i]: is i-th group correctly sorted by the ranking predictor?
    is_sorted <- lapply(seq(1, 20, by = 5),
      function(k) {
--- a/demo/guide-python/continuation.py
+++ b/demo/guide-python/continuation.py
@ -16,14 +16,14 @@ def training_continuation(tmpdir: str, use_pickle: bool) -> None:
    """Basic training continuation."""
    # Train 128 iterations in 1 session
    X, y = load_breast_cancer(return_X_y=True)
-    clf = xgboost.XGBClassifier(n_estimators=128)
+    clf = xgboost.XGBClassifier(n_estimators=128, eval_metric="logloss")
-    clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
+    clf.fit(X, y, eval_set=[(X, y)])
    print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
    # Train 128 iterations in 2 sessions, with the first one runs for 32 iterations and
    # the second one runs for 96 iterations
-    clf = xgboost.XGBClassifier(n_estimators=32)
+    clf = xgboost.XGBClassifier(n_estimators=32, eval_metric="logloss")
-    clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
+    clf.fit(X, y, eval_set=[(X, y)])
    assert clf.get_booster().num_boosted_rounds() == 32
    # load back the model, this could be a checkpoint
@ -39,8 +39,8 @@ def training_continuation(tmpdir: str, use_pickle: bool) -> None:
        loaded = xgboost.XGBClassifier()
        loaded.load_model(path)
-    clf = xgboost.XGBClassifier(n_estimators=128 - 32)
+    clf = xgboost.XGBClassifier(n_estimators=128 - 32, eval_metric="logloss")
-    clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", xgb_model=loaded)
+    clf.fit(X, y, eval_set=[(X, y)], xgb_model=loaded)
    print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
@ -56,19 +56,24 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:
    n_estimators = 512
    X, y = load_breast_cancer(return_X_y=True)
-    clf = xgboost.XGBClassifier(n_estimators=n_estimators)
+    clf = xgboost.XGBClassifier(
-    clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop])
+        n_estimators=n_estimators, eval_metric="logloss", callbacks=[early_stop]
    )
    clf.fit(X, y, eval_set=[(X, y)])
    print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
    best = clf.best_iteration
    # Train 512 iterations in 2 sessions, with the first one runs for 128 iterations and
    # the second one runs until early stop.
-    clf = xgboost.XGBClassifier(n_estimators=128)
+    clf = xgboost.XGBClassifier(
        n_estimators=128, eval_metric="logloss", callbacks=[early_stop]
    )
    # Reinitialize the early stop callback
    early_stop = xgboost.callback.EarlyStopping(
        rounds=early_stopping_rounds, save_best=True
    )
-    clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop])
+    clf.set_params(callbacks=[early_stop])
    clf.fit(X, y, eval_set=[(X, y)])
    assert clf.get_booster().num_boosted_rounds() == 128
    # load back the model, this could be a checkpoint
@ -87,13 +92,13 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:
    early_stop = xgboost.callback.EarlyStopping(
        rounds=early_stopping_rounds, save_best=True
    )
-    clf = xgboost.XGBClassifier(n_estimators=n_estimators - 128)
+    clf = xgboost.XGBClassifier(
        n_estimators=n_estimators - 128, eval_metric="logloss", callbacks=[early_stop]
    )
    clf.fit(
        X,
        y,
        eval_set=[(X, y)],
        eval_metric="logloss",
        callbacks=[early_stop],
        xgb_model=loaded,
    )
--- a/demo/guide-python/quantile_regression.py
+++ b/demo/guide-python/quantile_regression.py
@ -46,10 +46,11 @@ def quantile_loss(args: argparse.Namespace) -> None:
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
    # We will be using the `hist` tree method, quantile DMatrix can be used to preserve
-    # memory.
+    # memory (which has nothing to do with quantile regression itself, see its document
    # for details).
    # Do not use the `exact` tree method for quantile regression, otherwise the
    # performance might drop.
-    Xy = xgb.QuantileDMatrix(X, y)
+    Xy = xgb.QuantileDMatrix(X_train, y_train)
    # use Xy as a reference
    Xy_test = xgb.QuantileDMatrix(X_test, y_test, ref=Xy)
--- a/demo/guide-python/sklearn_evals_result.py
+++ b/demo/guide-python/sklearn_evals_result.py
@ -16,30 +16,35 @@ labels, y = np.unique(y, return_inverse=True)
 X_train, X_test = X[:1600], X[1600:]
 y_train, y_test = y[:1600], y[1600:]
-param_dist = {'objective':'binary:logistic', 'n_estimators':2}
+param_dist = {"objective": "binary:logistic", "n_estimators": 2}
-clf = xgb.XGBModel(**param_dist)
+clf = xgb.XGBModel(
    **param_dist,
    eval_metric="logloss",
 )
 # Or you can use: clf = xgb.XGBClassifier(**param_dist)
-clf.fit(X_train, y_train,
+clf.fit(
-        eval_set=[(X_train, y_train), (X_test, y_test)],
+    X_train,
-        eval_metric='logloss',
+    y_train,
-        verbose=True)
+    eval_set=[(X_train, y_train), (X_test, y_test)],
    verbose=True,
 )
 # Load evals result by calling the evals_result() function
 evals_result = clf.evals_result()
-print('Access logloss metric directly from validation_0:')
+print("Access logloss metric directly from validation_0:")
-print(evals_result['validation_0']['logloss'])
+print(evals_result["validation_0"]["logloss"])
-print('')
+print("")
-print('Access metrics through a loop:')
+print("Access metrics through a loop:")
 for e_name, e_mtrs in evals_result.items():
-    print('- {}'.format(e_name))
+    print("- {}".format(e_name))
    for e_mtr_name, e_mtr_vals in e_mtrs.items():
-        print('   - {}'.format(e_mtr_name))
+        print("   - {}".format(e_mtr_name))
-        print('      - {}'.format(e_mtr_vals))
+        print("      - {}".format(e_mtr_vals))
-print('')
+print("")
-print('Access complete dict:')
+print("Access complete dict:")
 print(evals_result)
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@ -1,4 +1,4 @@
-'''
+"""
 Collection of examples for using sklearn interface
 ==================================================
@ -8,7 +8,7 @@ For an introduction to XGBoost's scikit-learn estimator interface, see
 Created on 1 Apr 2015
@author: Jamie Hall
-'''
+"""
 import pickle
 import numpy as np
@ -22,8 +22,8 @@ rng = np.random.RandomState(31337)
 print("Zeros and Ones from the Digits dataset: binary classification")
 digits = load_digits(n_class=2)
-y = digits['target']
+y = digits["target"]
-X = digits['data']
+X = digits["data"]
 kf = KFold(n_splits=2, shuffle=True, random_state=rng)
 for train_index, test_index in kf.split(X):
    xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
@ -33,8 +33,8 @@ for train_index, test_index in kf.split(X):
 print("Iris: multiclass classification")
 iris = load_iris()
-y = iris['target']
+y = iris["target"]
-X = iris['data']
+X = iris["data"]
 kf = KFold(n_splits=2, shuffle=True, random_state=rng)
 for train_index, test_index in kf.split(X):
    xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
@ -53,9 +53,13 @@ for train_index, test_index in kf.split(X):
 print("Parameter optimization")
 xgb_model = xgb.XGBRegressor(n_jobs=1)
-clf = GridSearchCV(xgb_model,
+clf = GridSearchCV(
-                   {'max_depth': [2, 4],
+    xgb_model,
-                    'n_estimators': [50, 100]}, verbose=1, n_jobs=1, cv=3)
+    {"max_depth": [2, 4], "n_estimators": [50, 100]},
    verbose=1,
    n_jobs=1,
    cv=3,
 )
 clf.fit(X, y)
 print(clf.best_score_)
 print(clf.best_params_)
@ -69,9 +73,8 @@ print(np.allclose(clf.predict(X), clf2.predict(X)))
 # Early-stopping
-X = digits['data']
+X = digits["data"]
-y = digits['target']
+y = digits["target"]
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-clf = xgb.XGBClassifier(n_jobs=1)
+clf = xgb.XGBClassifier(n_jobs=1, early_stopping_rounds=10, eval_metric="auc")
-clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
+clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
        eval_set=[(X_test, y_test)])
--- a/demo/guide-python/sklearn_parallel.py
+++ b/demo/guide-python/sklearn_parallel.py
@ -12,6 +12,7 @@ import xgboost as xgb
 if __name__ == "__main__":
    print("Parallel Parameter optimization")
    X, y = fetch_california_housing(return_X_y=True)
    # Make sure the number of threads is balanced.
    xgb_model = xgb.XGBRegressor(
        n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist"
    )
--- a/dev/change_scala_version.py
+++ b/dev/change_scala_version.py
@ -0,0 +1,79 @@
 import argparse
 import pathlib
 import re
 import shutil
 def main(args):
    if args.scala_version == "2.12":
        scala_ver = "2.12"
        scala_patchver = "2.12.18"
    elif args.scala_version == "2.13":
        scala_ver = "2.13"
        scala_patchver = "2.13.11"
    else:
        raise ValueError(f"Unsupported Scala version: {args.scala_version}")
    # Clean artifacts
    if args.purge_artifacts:
        for target in pathlib.Path("jvm-packages/").glob("**/target"):
            if target.is_dir():
                print(f"Removing {target}...")
                shutil.rmtree(target)
    # Update pom.xml
    for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"):
        print(f"Updating {pom}...")
        with open(pom, "r", encoding="utf-8") as f:
            lines = f.readlines()
        with open(pom, "w", encoding="utf-8") as f:
            replaced_scalaver = False
            replaced_scala_binver = False
            for line in lines:
                for artifact in [
                    "xgboost-jvm",
                    "xgboost4j",
                    "xgboost4j-gpu",
                    "xgboost4j-spark",
                    "xgboost4j-spark-gpu",
                    "xgboost4j-flink",
                    "xgboost4j-example",
                ]:
                    line = re.sub(
                        f"<artifactId>{artifact}_[0-9\\.]*",
                        f"<artifactId>{artifact}_{scala_ver}",
                        line,
                    )
                # Only replace the first occurrence of scala.version
                if not replaced_scalaver:
                    line, nsubs = re.subn(
                        r"<scala.version>[0-9\.]*",
                        f"<scala.version>{scala_patchver}",
                        line,
                    )
                    if nsubs > 0:
                        replaced_scalaver = True
                # Only replace the first occurrence of scala.binary.version
                if not replaced_scala_binver:
                    line, nsubs = re.subn(
                        r"<scala.binary.version>[0-9\.]*",
                        f"<scala.binary.version>{scala_ver}",
                        line,
                    )
                    if nsubs > 0:
                        replaced_scala_binver = True
                f.write(line)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--purge-artifacts", action="store_true")
    parser.add_argument(
        "--scala-version",
        type=str,
        required=True,
        help="Version of Scala to use in the JVM packages",
        choices=["2.12", "2.13"],
    )
    parsed_args = parser.parse_args()
    main(parsed_args)
--- a/dev/prepare_jvm_release.py
+++ b/dev/prepare_jvm_release.py
@ -2,7 +2,6 @@ import argparse
 import errno
 import glob
 import os
 import platform
 import re
 import shutil
 import subprocess
@ -88,10 +87,6 @@ def main():
        help="Version of the release being prepared",
    )
    args = parser.parse_args()
    if sys.platform != "darwin" or platform.machine() != "arm64":
        raise NotImplementedError("Please run this script using an M1 Mac")
    version = args.release_version
    expected_git_tag = "v" + version
    current_git_tag = get_current_git_tag()
@ -141,6 +136,7 @@ def main():
            ("linux", "x86_64"),
            ("windows", "x86_64"),
            ("macos", "x86_64"),
            ("macos", "aarch64"),
        ]:
            output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
            maybe_makedirs(output_dir)
@ -164,6 +160,10 @@ def main():
            url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib",
            filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib",
        )
        retrieve(
            url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib",
            filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib",
        )
        with tempfile.TemporaryDirectory() as tempdir:
            # libxgboost4j.so for Linux x86_64, CPU only
@ -210,13 +210,31 @@ def main():
        "2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
        "https://central.sonatype.org/publish/publish-maven/"
    )
-    print("3. Now on a Mac machine, run:")
+    print(
-    print("   GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests")
+        "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. "
        "Make sure to use an Internet connection with fast upload speed:"
    )
    print(
        "   # Skip native build, since we have all needed native binaries from CI\n"
        "   export MAVEN_SKIP_NATIVE_BUILD=1\n"
        "   GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests"
    )
    print(
        "4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
-        "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-1085 "
+        "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx "
        "to inspect the staged JAR files. Finally, press Release button to publish the "
-        "artifacts to the Maven Central repository."
+        "artifacts to the Maven Central repository. The top-level metapackage should be "
        "named xgboost-jvm_2.12."
    )
    print(
        "5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n"
        "   export MAVEN_SKIP_NATIVE_BUILD=1\n"
        "   python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n"
        "   GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests"
    )
    print(
        "6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. "
        "The top-level metapackage should be named xgboost-jvm_2.13."
    )
--- a/dev/release-artifacts.py
+++ b/dev/release-artifacts.py
@ -153,7 +153,7 @@ Following steps should be done manually:
 def download_r_packages(
    release: str, branch: str, rc: str, commit: str, outdir: str
 ) -> Tuple[Dict[str, str], List[str]]:
-    platforms = ["win64", "linux"]
+    platforms = ["linux"]
    dirname = os.path.join(outdir, "r-packages")
    if not os.path.exists(dirname):
        os.mkdir(dirname)
--- a/doc/tutorials/custom_metric_obj.rst
+++ b/doc/tutorials/custom_metric_obj.rst
@ -123,11 +123,11 @@ monitor our model's performance.  As mentioned above, the default metric for ``S
        elements = np.power(np.log1p(y) - np.log1p(predt), 2)
        return 'PyRMSLE', float(np.sqrt(np.sum(elements) / len(y)))
-Since we are demonstrating in Python, the metric or objective need not be a function,
+Since we are demonstrating in Python, the metric or objective need not be a function, any
-any callable object should suffice.  Similar to the objective function, our metric also
+callable object should suffice.  Similar to the objective function, our metric also
-accepts ``predt`` and ``dtrain`` as inputs, but returns the name of the metric itself and a
+accepts ``predt`` and ``dtrain`` as inputs, but returns the name of the metric itself and
-floating point value as the result.  After passing it into XGBoost as argument of ``feval``
+a floating point value as the result.  After passing it into XGBoost as argument of
-parameter:
+``custom_metric`` parameter:
 .. code-block:: python
@ -136,7 +136,7 @@ parameter:
              dtrain=dtrain,
              num_boost_round=10,
              obj=squared_log,
-              feval=rmsle,
+              custom_metric=rmsle,
              evals=[(dtrain, 'dtrain'), (dtest, 'dtest')],
              evals_result=results)
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@ -398,8 +398,8 @@ class RegTree : public Model {
      if (!func(nidx)) {
        return;
      }
-      auto left = self[nidx].LeftChild();
+      auto left = self.LeftChild(nidx);
-      auto right = self[nidx].RightChild();
+      auto right = self.RightChild(nidx);
      if (left != RegTree::kInvalidNodeId) {
        nodes.push(left);
      }
--- a/jvm-packages/create_jni.py
+++ b/jvm-packages/create_jni.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import errno
 import argparse
 import errno
 import glob
 import os
 import platform
@ -19,13 +19,12 @@ CONFIG = {
    "USE_HDFS": "OFF",
    "USE_AZURE": "OFF",
    "USE_S3": "OFF",
    "USE_CUDA": "OFF",
    "USE_NCCL": "OFF",
    "USE_HIP": "OFF",
    "USE_RCCL": "OFF",
    "JVM_BINDINGS": "ON",
-    "LOG_CAPI_INVOCATION": "OFF"
+    "LOG_CAPI_INVOCATION": "OFF",
 }
@ -72,18 +71,13 @@ def normpath(path):
        return normalized
-if __name__ == "__main__":
+def native_build(args):
    parser = argparse.ArgumentParser()
    parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF')
    parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
    parser.add_argument('--use-hip', type=str, choices=['ON', 'OFF'], default='OFF')
    cli_args = parser.parse_args()
    if sys.platform == "darwin":
        # Enable of your compiler supports OpenMP.
        CONFIG["USE_OPENMP"] = "OFF"
-        os.environ["JAVA_HOME"] = subprocess.check_output(
+        os.environ["JAVA_HOME"] = (
-            "/usr/libexec/java_home").strip().decode()
+            subprocess.check_output("/usr/libexec/java_home").strip().decode()
        )
    print("building Java wrapper")
    with cd(".."):
@ -92,7 +86,7 @@ if __name__ == "__main__":
        with cd(build_dir):
            if sys.platform == "win32":
                # Force x64 build on Windows.
-                maybe_generator = ' -A x64'
+                maybe_generator = " -A x64"
            else:
                maybe_generator = ""
            if sys.platform == "linux":
@ -100,12 +94,12 @@ if __name__ == "__main__":
            else:
                maybe_parallel_build = ""
-            if cli_args.log_capi_invocation == 'ON':
+            if cli_args.log_capi_invocation == "ON":
-                CONFIG['LOG_CAPI_INVOCATION'] = 'ON'
+                CONFIG["LOG_CAPI_INVOCATION"] = "ON"
-            if cli_args.use_cuda == 'ON':
+            if cli_args.use_cuda == "ON":
-                CONFIG['USE_CUDA'] = 'ON'
+                CONFIG["USE_CUDA"] = "ON"
-                CONFIG['USE_NCCL'] = 'ON'
+                CONFIG["USE_NCCL"] = "ON"
                CONFIG["USE_DLOPEN_NCCL"] = "OFF"
            elif cli_args.use_hip== 'ON':
                CONFIG['USE_HIP'] = 'ON'
@ -123,7 +117,7 @@ if __name__ == "__main__":
            if gpu_arch_flag is not None:
                args.append("%s" % gpu_arch_flag)
-            lib_dir = os.path.join(os.pardir, 'lib')
+            lib_dir = os.path.join(os.pardir, "lib")
            if os.path.exists(lib_dir):
                shutil.rmtree(lib_dir)
            run("cmake .. " + " ".join(args) + maybe_generator)
@ -133,8 +127,10 @@ if __name__ == "__main__":
            run(f'"{sys.executable}" mapfeat.py')
            run(f'"{sys.executable}" mknfold.py machine.txt 1')
-    xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip== 'ON' else 'xgboost4j'
+    xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip== "ON" else "xgboost4j"
-    xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'xgboost4j-spark'
+    xgboost4j_spark = (
        "xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "xgboost4j-spark"
    )
    print("copying native library")
    library_name, os_folder = {
@ -149,14 +145,19 @@ if __name__ == "__main__":
        "i86pc": "x86_64",  # on Solaris x86_64
        "sun4v": "sparc",  # on Solaris sparc
        "arm64": "aarch64",  # on macOS & Windows ARM 64-bit
-        "aarch64": "aarch64"
+        "aarch64": "aarch64",
    }[platform.machine().lower()]
-    output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder)
+    output_folder = "{}/src/main/resources/lib/{}/{}".format(
        xgboost4j, os_folder, arch_folder
    )
    maybe_makedirs(output_folder)
    cp("../lib/" + library_name, output_folder)
    print("copying pure-Python tracker")
-    cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j))
+    cp(
        "../python-package/xgboost/tracker.py",
        "{}/src/main/resources".format(xgboost4j),
    )
    print("copying train/test files")
    maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
@ -172,3 +173,18 @@ if __name__ == "__main__":
    maybe_makedirs("{}/src/test/resources".format(xgboost4j))
    for file in glob.glob("../demo/data/agaricus.*"):
        cp(file, "{}/src/test/resources".format(xgboost4j))
 if __name__ == "__main__":
    if "MAVEN_SKIP_NATIVE_BUILD" in os.environ:
        print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...")
    else:
        parser = argparse.ArgumentParser()
        parser.add_argument(
            "--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF"
        )
        parser.add_argument(
            "--use-cuda", type=str, choices=["ON", "OFF"], default="OFF"
        )
        cli_args = parser.parse_args()
        native_build(cli_args)
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@ -5,7 +5,7 @@
    <modelVersion>4.0.0</modelVersion>
    <groupId>ml.dmlc</groupId>
-    <artifactId>xgboost-jvm</artifactId>
+    <artifactId>xgboost-jvm_2.12</artifactId>
    <version>2.1.0-SNAPSHOT</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
@ -43,10 +43,10 @@
        <maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count>
        <log.capi.invocation>OFF</log.capi.invocation>
        <use.cuda>OFF</use.cuda>
-        <cudf.version>23.10.0</cudf.version>
+        <cudf.version>23.12.1</cudf.version>
-        <spark.rapids.version>23.10.0</spark.rapids.version>
+        <spark.rapids.version>23.12.1</spark.rapids.version>
        <cudf.classifier>cuda12</cudf.classifier>
        <use.hip>OFF</use.hip>
        <cudf.classifier>cuda11</cudf.classifier>
        <scalatest.version>3.2.17</scalatest.version>
        <scala-collection-compat.version>2.11.0</scala-collection-compat.version>
@ -91,14 +91,6 @@
            </modules>
        </profile>
        <profile>
            <id>scala-2.13</id>
            <properties>
                <scala.binary.version>2.13</scala.binary.version>
                <scala.version>2.13.11</scala.version>
            </properties>
        </profile>
        <!-- gpu profile with both cpu and gpu test suites -->
        <profile>
            <id>gpu</id>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@ -5,11 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm</artifactId>
+        <artifactId>xgboost-jvm_2.12</artifactId>
        <version>2.1.0-SNAPSHOT</version>
    </parent>
    <name>xgboost4j-example</name>
-    <artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
+    <artifactId>xgboost4j-example_2.12</artifactId>
    <version>2.1.0-SNAPSHOT</version>
    <packaging>jar</packaging>
    <build>
@ -26,7 +26,7 @@
    <dependencies>
        <dependency>
            <groupId>ml.dmlc</groupId>
-            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
+            <artifactId>xgboost4j-spark_2.12</artifactId>
            <version>${project.version}</version>
        </dependency>
        <dependency>
@ -37,7 +37,7 @@
        </dependency>
        <dependency>
            <groupId>ml.dmlc</groupId>
-            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
+            <artifactId>xgboost4j-flink_2.12</artifactId>
            <version>${project.version}</version>
        </dependency>
    </dependencies>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@ -5,12 +5,12 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm</artifactId>
+        <artifactId>xgboost-jvm_2.12</artifactId>
        <version>2.1.0-SNAPSHOT</version>
    </parent>
    <name>xgboost4j-flink</name>
-    <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
+    <artifactId>xgboost4j-flink_2.12</artifactId>
    <version>2.1.0-SNAPSHOT</version>
    <properties>
      <flink-ml.version>2.2.0</flink-ml.version>
@ -30,7 +30,7 @@
    <dependencies>
        <dependency>
            <groupId>ml.dmlc</groupId>
-            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
+            <artifactId>xgboost4j_2.12</artifactId>
            <version>${project.version}</version>
        </dependency>
        <dependency>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@ -5,10 +5,10 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm</artifactId>
+        <artifactId>xgboost-jvm_2.12</artifactId>
        <version>2.1.0-SNAPSHOT</version>
    </parent>
-    <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
+    <artifactId>xgboost4j-gpu_2.12</artifactId>
    <name>xgboost4j-gpu</name>
    <version>2.1.0-SNAPSHOT</version>
    <packaging>jar</packaging>
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@ -5,11 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm</artifactId>
+        <artifactId>xgboost-jvm_2.12</artifactId>
        <version>2.1.0-SNAPSHOT</version>
    </parent>
    <name>xgboost4j-spark-gpu</name>
-    <artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
+    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
    <build>
        <plugins>
            <plugin>
@ -24,7 +24,7 @@
    <dependencies>
        <dependency>
            <groupId>ml.dmlc</groupId>
-            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
+            <artifactId>xgboost4j-gpu_2.12</artifactId>
            <version>${project.version}</version>
        </dependency>
        <dependency>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@ -5,11 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm</artifactId>
+        <artifactId>xgboost-jvm_2.12</artifactId>
        <version>2.1.0-SNAPSHOT</version>
    </parent>
    <name>xgboost4j-spark</name>
-    <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
+    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
        <plugins>
            <plugin>
@ -24,7 +24,7 @@
    <dependencies>
        <dependency>
            <groupId>ml.dmlc</groupId>
-            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
+            <artifactId>xgboost4j_2.12</artifactId>
            <version>${project.version}</version>
        </dependency>
        <dependency>
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@ -5,11 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm</artifactId>
+        <artifactId>xgboost-jvm_2.12</artifactId>
        <version>2.1.0-SNAPSHOT</version>
    </parent>
    <name>xgboost4j</name>
-    <artifactId>xgboost4j_${scala.binary.version}</artifactId>
+    <artifactId>xgboost4j_2.12</artifactId>
    <version>2.1.0-SNAPSHOT</version>
    <packaging>jar</packaging>
--- a/python-package/xgboost/dask/init.py
+++ b/python-package/xgboost/dask/init.py
@ -61,7 +61,7 @@ from typing import (
 import numpy
 from xgboost import collective, config
-from xgboost._typing import _T, FeatureNames, FeatureTypes, ModelIn
+from xgboost._typing import _T, FeatureNames, FeatureTypes
 from xgboost.callback import TrainingCallback
 from xgboost.compat import DataFrame, LazyLoader, concat, lazy_isinstance
 from xgboost.core import (
@ -1774,14 +1774,11 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
        sample_weight: Optional[_DaskCollection],
        base_margin: Optional[_DaskCollection],
        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
        eval_metric: Optional[Union[str, Sequence[str], Metric]],
        sample_weight_eval_set: Optional[Sequence[_DaskCollection]],
        base_margin_eval_set: Optional[Sequence[_DaskCollection]],
        early_stopping_rounds: Optional[int],
        verbose: Union[int, bool],
        xgb_model: Optional[Union[Booster, XGBModel]],
        feature_weights: Optional[_DaskCollection],
        callbacks: Optional[Sequence[TrainingCallback]],
    ) -> _DaskCollection:
        params = self.get_xgb_params()
        dtrain, evals = await _async_wrap_evaluation_matrices(
@ -1809,9 +1806,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
            obj: Optional[Callable] = _objective_decorator(self.objective)
        else:
            obj = None
-        model, metric, params, early_stopping_rounds, callbacks = self._configure_fit(
+        model, metric, params = self._configure_fit(xgb_model, params)
            xgb_model, eval_metric, params, early_stopping_rounds, callbacks
        )
        results = await self.client.sync(
            _train_async,
            asynchronous=True,
@ -1826,8 +1821,8 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
            feval=None,
            custom_metric=metric,
            verbose_eval=verbose,
-            early_stopping_rounds=early_stopping_rounds,
+            early_stopping_rounds=self.early_stopping_rounds,
-            callbacks=callbacks,
+            callbacks=self.callbacks,
            xgb_model=model,
        )
        self._Booster = results["booster"]
@ -1844,14 +1839,11 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
        sample_weight: Optional[_DaskCollection] = None,
        base_margin: Optional[_DaskCollection] = None,
        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Callable]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Union[int, bool] = True,
        xgb_model: Optional[Union[Booster, XGBModel]] = None,
        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,
        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,
        feature_weights: Optional[_DaskCollection] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "DaskXGBRegressor":
        _assert_dask_support()
        args = {k: v for k, v in locals().items() if k not in ("self", "__class__")}
@ -1871,14 +1863,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
        sample_weight: Optional[_DaskCollection],
        base_margin: Optional[_DaskCollection],
        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
        eval_metric: Optional[Union[str, Sequence[str], Metric]],
        sample_weight_eval_set: Optional[Sequence[_DaskCollection]],
        base_margin_eval_set: Optional[Sequence[_DaskCollection]],
        early_stopping_rounds: Optional[int],
        verbose: Union[int, bool],
        xgb_model: Optional[Union[Booster, XGBModel]],
        feature_weights: Optional[_DaskCollection],
        callbacks: Optional[Sequence[TrainingCallback]],
    ) -> "DaskXGBClassifier":
        params = self.get_xgb_params()
        dtrain, evals = await _async_wrap_evaluation_matrices(
@ -1924,9 +1913,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
            obj: Optional[Callable] = _objective_decorator(self.objective)
        else:
            obj = None
-        model, metric, params, early_stopping_rounds, callbacks = self._configure_fit(
+        model, metric, params = self._configure_fit(xgb_model, params)
            xgb_model, eval_metric, params, early_stopping_rounds, callbacks
        )
        results = await self.client.sync(
            _train_async,
            asynchronous=True,
@ -1941,8 +1928,8 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
            feval=None,
            custom_metric=metric,
            verbose_eval=verbose,
-            early_stopping_rounds=early_stopping_rounds,
+            early_stopping_rounds=self.early_stopping_rounds,
-            callbacks=callbacks,
+            callbacks=self.callbacks,
            xgb_model=model,
        )
        self._Booster = results["booster"]
@ -1960,14 +1947,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
        sample_weight: Optional[_DaskCollection] = None,
        base_margin: Optional[_DaskCollection] = None,
        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Callable]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Union[int, bool] = True,
        xgb_model: Optional[Union[Booster, XGBModel]] = None,
        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,
        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,
        feature_weights: Optional[_DaskCollection] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "DaskXGBClassifier":
        _assert_dask_support()
        args = {k: v for k, v in locals().items() if k not in ("self", "__class__")}
@ -2063,7 +2047,7 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
    def __init__(self, *, objective: str = "rank:pairwise", **kwargs: Any):
        if callable(objective):
            raise ValueError("Custom objective function not supported by XGBRanker.")
-        super().__init__(objective=objective, kwargs=kwargs)
+        super().__init__(objective=objective, **kwargs)
    async def _fit_async(
        self,
@ -2078,12 +2062,9 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
        base_margin_eval_set: Optional[Sequence[_DaskCollection]],
        eval_group: Optional[Sequence[_DaskCollection]],
        eval_qid: Optional[Sequence[_DaskCollection]],
        eval_metric: Optional[Union[str, Sequence[str], Metric]],
        early_stopping_rounds: Optional[int],
        verbose: Union[int, bool],
        xgb_model: Optional[Union[XGBModel, Booster]],
        feature_weights: Optional[_DaskCollection],
        callbacks: Optional[Sequence[TrainingCallback]],
    ) -> "DaskXGBRanker":
        msg = "Use `qid` instead of `group` on dask interface."
        if not (group is None and eval_group is None):
@ -2111,14 +2092,7 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
            enable_categorical=self.enable_categorical,
            feature_types=self.feature_types,
        )
-        if eval_metric is not None:
+        model, metric, params = self._configure_fit(xgb_model, params)
            if callable(eval_metric):
                raise ValueError(
                    "Custom evaluation metric is not yet supported for XGBRanker."
                )
        model, metric, params, early_stopping_rounds, callbacks = self._configure_fit(
            xgb_model, eval_metric, params, early_stopping_rounds, callbacks
        )
        results = await self.client.sync(
            _train_async,
            asynchronous=True,
@ -2133,8 +2107,8 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
            feval=None,
            custom_metric=metric,
            verbose_eval=verbose,
-            early_stopping_rounds=early_stopping_rounds,
+            early_stopping_rounds=self.early_stopping_rounds,
-            callbacks=callbacks,
+            callbacks=self.callbacks,
            xgb_model=model,
        )
        self._Booster = results["booster"]
@ -2155,14 +2129,11 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,
        eval_group: Optional[Sequence[_DaskCollection]] = None,
        eval_qid: Optional[Sequence[_DaskCollection]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Callable]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Union[int, bool] = False,
        xgb_model: Optional[Union[XGBModel, Booster]] = None,
        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,
        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,
        feature_weights: Optional[_DaskCollection] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "DaskXGBRanker":
        _assert_dask_support()
        args = {k: v for k, v in locals().items() if k not in ("self", "__class__")}
@ -2221,18 +2192,15 @@ class DaskXGBRFRegressor(DaskXGBRegressor):
        sample_weight: Optional[_DaskCollection] = None,
        base_margin: Optional[_DaskCollection] = None,
        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Callable]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Union[int, bool] = True,
        xgb_model: Optional[Union[Booster, XGBModel]] = None,
        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,
        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,
        feature_weights: Optional[_DaskCollection] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "DaskXGBRFRegressor":
        _assert_dask_support()
        args = {k: v for k, v in locals().items() if k not in ("self", "__class__")}
-        _check_rf_callback(early_stopping_rounds, callbacks)
+        _check_rf_callback(self.early_stopping_rounds, self.callbacks)
        super().fit(**args)
        return self
@ -2285,17 +2253,14 @@ class DaskXGBRFClassifier(DaskXGBClassifier):
        sample_weight: Optional[_DaskCollection] = None,
        base_margin: Optional[_DaskCollection] = None,
        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Callable]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Union[int, bool] = True,
        xgb_model: Optional[Union[Booster, XGBModel]] = None,
        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,
        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,
        feature_weights: Optional[_DaskCollection] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "DaskXGBRFClassifier":
        _assert_dask_support()
        args = {k: v for k, v in locals().items() if k not in ("self", "__class__")}
-        _check_rf_callback(early_stopping_rounds, callbacks)
+        _check_rf_callback(self.early_stopping_rounds, self.callbacks)
        super().fit(**args)
        return self
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@ -349,12 +349,6 @@ __model_doc = f"""
        See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more
        information.
        .. note::
             This parameter replaces `eval_metric` in :py:meth:`fit` method.  The old
             one receives un-transformed prediction regardless of whether custom
             objective is being used.
        .. code-block:: python
            from sklearn.datasets import load_diabetes
@ -389,10 +383,6 @@ __model_doc = f"""
          early stopping.  If there's more than one metric in **eval_metric**, the last
          metric will be used for early stopping.
        .. note::
            This parameter replaces `early_stopping_rounds` in :py:meth:`fit` method.
    callbacks : Optional[List[TrainingCallback]]
        List of callback functions that are applied at end of each iteration.
        It is possible to use predefined callbacks by using
@ -872,16 +862,11 @@ class XGBModel(XGBModelBase):
    def _configure_fit(
        self,
        booster: Optional[Union[Booster, "XGBModel", str]],
        eval_metric: Optional[Union[Callable, str, Sequence[str]]],
        params: Dict[str, Any],
        early_stopping_rounds: Optional[int],
        callbacks: Optional[Sequence[TrainingCallback]],
    ) -> Tuple[
        Optional[Union[Booster, str, "XGBModel"]],
        Optional[Metric],
        Dict[str, Any],
        Optional[int],
        Optional[Sequence[TrainingCallback]],
    ]:
        """Configure parameters for :py:meth:`fit`."""
        if isinstance(booster, XGBModel):
@ -903,49 +888,16 @@ class XGBModel(XGBModelBase):
                "or `set_params` instead."
            )
        # Configure evaluation metric.
        if eval_metric is not None:
            _deprecated("eval_metric")
        if self.eval_metric is not None and eval_metric is not None:
            _duplicated("eval_metric")
        # - track where does the evaluation metric come from
        if self.eval_metric is not None:
            from_fit = False
            eval_metric = self.eval_metric
        else:
            from_fit = True
        # - configure callable evaluation metric
        metric: Optional[Metric] = None
-        if eval_metric is not None:
+        if self.eval_metric is not None:
-            if callable(eval_metric) and from_fit:
+            if callable(self.eval_metric):
                # No need to wrap the evaluation function for old parameter.
                metric = eval_metric
            elif callable(eval_metric):
                # Parameter from constructor or set_params
                if self._get_type() == "ranker":
-                    metric = ltr_metric_decorator(eval_metric, self.n_jobs)
+                    metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
                else:
-                    metric = _metric_decorator(eval_metric)
+                    metric = _metric_decorator(self.eval_metric)
            else:
-                params.update({"eval_metric": eval_metric})
+                params.update({"eval_metric": self.eval_metric})
        # Configure early_stopping_rounds
        if early_stopping_rounds is not None:
            _deprecated("early_stopping_rounds")
        if early_stopping_rounds is not None and self.early_stopping_rounds is not None:
            _duplicated("early_stopping_rounds")
        early_stopping_rounds = (
            self.early_stopping_rounds
            if self.early_stopping_rounds is not None
            else early_stopping_rounds
        )
        # Configure callbacks
        if callbacks is not None:
            _deprecated("callbacks")
        if callbacks is not None and self.callbacks is not None:
            _duplicated("callbacks")
        callbacks = self.callbacks if self.callbacks is not None else callbacks
        tree_method = params.get("tree_method", None)
        if self.enable_categorical and tree_method == "exact":
@ -953,7 +905,7 @@ class XGBModel(XGBModelBase):
                "Experimental support for categorical data is not implemented for"
                " current tree method yet."
            )
-        return model, metric, params, early_stopping_rounds, callbacks
+        return model, metric, params
    def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:
        # Use `QuantileDMatrix` to save memory.
@ -979,14 +931,11 @@ class XGBModel(XGBModelBase):
        sample_weight: Optional[ArrayLike] = None,
        base_margin: Optional[ArrayLike] = None,
        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Metric]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Optional[Union[bool, int]] = True,
        xgb_model: Optional[Union[Booster, str, "XGBModel"]] = None,
        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,
        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,
        feature_weights: Optional[ArrayLike] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "XGBModel":
        # pylint: disable=invalid-name,attribute-defined-outside-init
        """Fit gradient boosting model.
@ -1017,18 +966,6 @@ class XGBModel(XGBModelBase):
            metrics will be computed.
            Validation metrics will help us track the performance of the model.
        eval_metric : str, list of str, or callable, optional
            .. deprecated:: 1.6.0
            Use `eval_metric` in :py:meth:`__init__` or :py:meth:`set_params` instead.
        early_stopping_rounds : int
            .. deprecated:: 1.6.0
            Use `early_stopping_rounds` in :py:meth:`__init__` or :py:meth:`set_params`
            instead.
        verbose :
            If `verbose` is True and an evaluation set is used, the evaluation metric
            measured on the validation set is printed to stdout at each boosting stage.
@ -1049,10 +986,6 @@ class XGBModel(XGBModelBase):
            selected when colsample is being used.  All values must be greater than 0,
            otherwise a `ValueError` is thrown.
        callbacks :
            .. deprecated:: 1.6.0
                Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.
        """
        with config_context(verbosity=self.verbosity):
            evals_result: TrainingCallback.EvalsLog = {}
@ -1082,27 +1015,19 @@ class XGBModel(XGBModelBase):
            else:
                obj = None
-            (
+            model, metric, params = self._configure_fit(xgb_model, params)
                model,
                metric,
                params,
                early_stopping_rounds,
                callbacks,
            ) = self._configure_fit(
                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
            )
            self._Booster = train(
                params,
                train_dmatrix,
                self.get_num_boosting_rounds(),
                evals=evals,
-                early_stopping_rounds=early_stopping_rounds,
+                early_stopping_rounds=self.early_stopping_rounds,
                evals_result=evals_result,
                obj=obj,
                custom_metric=metric,
                verbose_eval=verbose,
                xgb_model=model,
-                callbacks=callbacks,
+                callbacks=self.callbacks,
            )
            self._set_evaluation_result(evals_result)
@ -1437,14 +1362,11 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
        sample_weight: Optional[ArrayLike] = None,
        base_margin: Optional[ArrayLike] = None,
        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Metric]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Optional[Union[bool, int]] = True,
        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,
        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,
        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,
        feature_weights: Optional[ArrayLike] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "XGBClassifier":
        # pylint: disable = attribute-defined-outside-init,too-many-statements
        with config_context(verbosity=self.verbosity):
@ -1492,15 +1414,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
                    params["objective"] = "multi:softprob"
                params["num_class"] = self.n_classes_
-            (
+            model, metric, params = self._configure_fit(xgb_model, params)
                model,
                metric,
                params,
                early_stopping_rounds,
                callbacks,
            ) = self._configure_fit(
                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
            )
            train_dmatrix, evals = _wrap_evaluation_matrices(
                missing=self.missing,
                X=X,
@ -1525,13 +1439,13 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
                train_dmatrix,
                self.get_num_boosting_rounds(),
                evals=evals,
-                early_stopping_rounds=early_stopping_rounds,
+                early_stopping_rounds=self.early_stopping_rounds,
                evals_result=evals_result,
                obj=obj,
                custom_metric=metric,
                verbose_eval=verbose,
                xgb_model=model,
-                callbacks=callbacks,
+                callbacks=self.callbacks,
            )
            if not callable(self.objective):
@ -1693,17 +1607,14 @@ class XGBRFClassifier(XGBClassifier):
        sample_weight: Optional[ArrayLike] = None,
        base_margin: Optional[ArrayLike] = None,
        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Metric]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Optional[Union[bool, int]] = True,
        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,
        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,
        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,
        feature_weights: Optional[ArrayLike] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "XGBRFClassifier":
        args = {k: v for k, v in locals().items() if k not in ("self", "__class__")}
-        _check_rf_callback(early_stopping_rounds, callbacks)
+        _check_rf_callback(self.early_stopping_rounds, self.callbacks)
        super().fit(**args)
        return self
@ -1768,17 +1679,14 @@ class XGBRFRegressor(XGBRegressor):
        sample_weight: Optional[ArrayLike] = None,
        base_margin: Optional[ArrayLike] = None,
        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Metric]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Optional[Union[bool, int]] = True,
        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,
        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,
        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,
        feature_weights: Optional[ArrayLike] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "XGBRFRegressor":
        args = {k: v for k, v in locals().items() if k not in ("self", "__class__")}
-        _check_rf_callback(early_stopping_rounds, callbacks)
+        _check_rf_callback(self.early_stopping_rounds, self.callbacks)
        super().fit(**args)
        return self
@ -1883,14 +1791,11 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,
        eval_group: Optional[Sequence[ArrayLike]] = None,
        eval_qid: Optional[Sequence[ArrayLike]] = None,
        eval_metric: Optional[Union[str, Sequence[str], Metric]] = None,
        early_stopping_rounds: Optional[int] = None,
        verbose: Optional[Union[bool, int]] = False,
        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,
        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,
        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,
        feature_weights: Optional[ArrayLike] = None,
        callbacks: Optional[Sequence[TrainingCallback]] = None,
    ) -> "XGBRanker":
        # pylint: disable = attribute-defined-outside-init,arguments-differ
        """Fit gradient boosting ranker
@ -1960,15 +1865,6 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
            pair in **eval_set**. The special column convention in `X` applies to
            validation datasets as well.
        eval_metric : str, list of str, optional
            .. deprecated:: 1.6.0
                use `eval_metric` in :py:meth:`__init__` or :py:meth:`set_params` instead.
        early_stopping_rounds : int
            .. deprecated:: 1.6.0
                use `early_stopping_rounds` in :py:meth:`__init__` or
                :py:meth:`set_params` instead.
        verbose :
            If `verbose` is True and an evaluation set is used, the evaluation metric
            measured on the validation set is printed to stdout at each boosting stage.
@ -1996,10 +1892,6 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
            selected when colsample is being used.  All values must be greater than 0,
            otherwise a `ValueError` is thrown.
        callbacks :
            .. deprecated:: 1.6.0
                Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.
        """
        with config_context(verbosity=self.verbosity):
            train_dmatrix, evals = _wrap_evaluation_matrices(
@ -2024,27 +1916,19 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
            evals_result: TrainingCallback.EvalsLog = {}
            params = self.get_xgb_params()
-            (
+            model, metric, params = self._configure_fit(xgb_model, params)
                model,
                metric,
                params,
                early_stopping_rounds,
                callbacks,
            ) = self._configure_fit(
                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
            )
            self._Booster = train(
                params,
                train_dmatrix,
                num_boost_round=self.get_num_boosting_rounds(),
-                early_stopping_rounds=early_stopping_rounds,
+                early_stopping_rounds=self.early_stopping_rounds,
                evals=evals,
                evals_result=evals_result,
                custom_metric=metric,
                verbose_eval=verbose,
                xgb_model=model,
-                callbacks=callbacks,
+                callbacks=self.callbacks,
            )
            self.objective = params["objective"]
--- a/python-package/xgboost/testing/continuation.py
+++ b/python-package/xgboost/testing/continuation.py
@ -0,0 +1,58 @@
 """Tests for training continuation."""
 import json
 from typing import Any, Dict, TypeVar
 import numpy as np
 import pytest
 import xgboost as xgb
 # pylint: disable=too-many-locals
 def run_training_continuation_model_output(device: str, tree_method: str) -> None:
    """Run training continuation test."""
    datasets = pytest.importorskip("sklearn.datasets")
    n_samples = 64
    n_features = 32
    X, y = datasets.make_regression(n_samples, n_features, random_state=1)
    dtrain = xgb.DMatrix(X, y)
    params = {
        "tree_method": tree_method,
        "max_depth": "2",
        "gamma": "0.1",
        "alpha": "0.01",
        "device": device,
    }
    bst_0 = xgb.train(params, dtrain, num_boost_round=64)
    dump_0 = bst_0.get_dump(dump_format="json")
    bst_1 = xgb.train(params, dtrain, num_boost_round=32)
    bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
    dump_1 = bst_1.get_dump(dump_format="json")
    T = TypeVar("T", Dict[str, Any], float, str, int, list)
    def recursive_compare(obj_0: T, obj_1: T) -> None:
        if isinstance(obj_0, float):
            assert np.isclose(obj_0, obj_1, atol=1e-6)
        elif isinstance(obj_0, str):
            assert obj_0 == obj_1
        elif isinstance(obj_0, int):
            assert obj_0 == obj_1
        elif isinstance(obj_0, dict):
            for i in range(len(obj_0.items())):
                assert list(obj_0.keys())[i] == list(obj_1.keys())[i]
                if list(obj_0.keys())[i] != "missing":
                    recursive_compare(list(obj_0.values()), list(obj_1.values()))
        else:
            for i, lhs in enumerate(obj_0):
                rhs = obj_1[i]
                recursive_compare(lhs, rhs)
    assert len(dump_0) == len(dump_1)
    for i, lhs in enumerate(dump_0):
        obj_0 = json.loads(lhs)
        obj_1 = json.loads(dump_1[i])
        recursive_compare(obj_0, obj_1)
--- a/src/collective/coll.cc
+++ b/src/collective/coll.cc
@ -18,6 +18,8 @@
 #if defined(XGBOOST_USE_CUDA)
 #include "cuda_fp16.h"  // for __half
 #elif defined(XGBOOST_USE_HIP)
 #include <hip/hip_fp16.h>  // for __half
 #endif
 namespace xgboost::collective {
--- a/src/collective/loop.cc
+++ b/src/collective/loop.cc
@ -1,11 +1,19 @@
 /**
- * Copyright 2023, XGBoost Contributors
+ * Copyright 2023-2024, XGBoost Contributors
 */
 #include "loop.h"
-#include <queue>  // for queue
+#include <cstddef>    // for size_t
 #include <cstdint>    // for int32_t
 #include <exception>  // for exception, current_exception, rethrow_exception
 #include <mutex>      // for lock_guard, unique_lock
 #include <queue>      // for queue
 #include <string>     // for string
 #include <thread>     // for thread
 #include <utility>    // for move
 #include "rabit/internal/socket.h"      // for PollHelper
 #include "xgboost/collective/result.h"  // for Fail, Success
 #include "xgboost/collective/socket.h"  // for FailWithCode
 #include "xgboost/logging.h"            // for CHECK
@ -109,62 +117,94 @@ Result Loop::EmptyQueue(std::queue<Op>* p_queue) const {
 }
 void Loop::Process() {
-  // consumer
+  auto set_rc = [this](Result&& rc) {
-  while (true) {
+    std::lock_guard lock{rc_lock_};
-    std::unique_lock lock{mu_};
+    rc_ = std::forward<Result>(rc);
-    cv_.wait(lock, [this] { return !this->queue_.empty() || stop_; });
+  };
-    if (stop_) {
+
-      break;
+  // This loop cannot exit unless `stop_` is set to true. There must always be a thread to
-    }
+  // answer the blocking call even if there are errors, otherwise the blocking will wait
  // forever.
  while (true) {
    try {
      std::unique_lock lock{mu_};
      cv_.wait(lock, [this] { return !this->queue_.empty() || stop_; });
      if (stop_) {
        break;  // only point where this loop can exit.
      }
      // Move the global queue into a local variable to unblock it.
      std::queue<Op> qcopy;
      bool is_blocking = false;
      while (!queue_.empty()) {
        auto op = queue_.front();
        queue_.pop();
        if (op.code == Op::kBlock) {
          is_blocking = true;
          // Block must be the last op in the current batch since no further submit can be
          // issued until the blocking call is finished.
          CHECK(queue_.empty());
        } else {
          qcopy.push(op);
        }
      }
    auto unlock_notify = [&](bool is_blocking, bool stop) {
      if (!is_blocking) {
-        std::lock_guard guard{mu_};
+        // Unblock, we can write to the global queue again.
        stop_ = stop;
      } else {
        stop_ = stop;
        lock.unlock();
      }
      cv_.notify_one();
    };
-    // move the queue
+      // Clear the local queue, this is blocking the current worker thread (but not the
-    std::queue<Op> qcopy;
+      // client thread), wait until all operations are finished.
-    bool is_blocking = false;
+      auto rc = this->EmptyQueue(&qcopy);
-    while (!queue_.empty()) {
+
-      auto op = queue_.front();
+      if (is_blocking) {
-      queue_.pop();
+        // The unlock is delayed if this is a blocking call
-      if (op.code == Op::kBlock) {
+        lock.unlock();
        is_blocking = true;
      } else {
        qcopy.push(op);
      }
    }
    // unblock the queue
    if (!is_blocking) {
      lock.unlock();
    }
    // clear the queue
    auto rc = this->EmptyQueue(&qcopy);
    // Handle error
    if (!rc.OK()) {
      unlock_notify(is_blocking, true);
      std::lock_guard<std::mutex> guard{rc_lock_};
      this->rc_ = std::move(rc);
      return;
    }
-    CHECK(qcopy.empty());
+      // Notify the client thread who called block after all error conditions are set.
-    unlock_notify(is_blocking, false);
+      auto notify_if_block = [&] {
        if (is_blocking) {
          std::unique_lock lock{mu_};
          block_done_ = true;
          lock.unlock();
          block_cv_.notify_one();
        }
      };
      // Handle error
      if (!rc.OK()) {
        set_rc(std::move(rc));
      } else {
        CHECK(qcopy.empty());
      }
      notify_if_block();
    } catch (std::exception const& e) {
      curr_exce_ = std::current_exception();
      set_rc(Fail("Exception inside the event loop:" + std::string{e.what()}));
    } catch (...) {
      curr_exce_ = std::current_exception();
      set_rc(Fail("Unknown exception inside the event loop."));
    }
  }
 }
 Result Loop::Stop() {
  // Finish all remaining tasks
  CHECK_EQ(this->Block().OK(), this->rc_.OK());
  // Notify the loop to stop
  std::unique_lock lock{mu_};
  stop_ = true;
  lock.unlock();
  this->cv_.notify_one();
-  CHECK_EQ(this->Block().OK(), this->rc_.OK());
+  if (this->worker_.joinable()) {
    this->worker_.join();
  }
  if (curr_exce_) {
    std::rethrow_exception(curr_exce_);
@ -175,17 +215,29 @@ Result Loop::Stop() {
 [[nodiscard]] Result Loop::Block() {
  {
    // Check whether the last op was successful, stop if not.
    std::lock_guard<std::mutex> guard{rc_lock_};
    if (!rc_.OK()) {
-      return std::move(rc_);
+      stop_ = true;
    }
  }
-  this->Submit(Op{Op::kBlock});
+
-  {
+  if (!this->worker_.joinable()) {
-    std::unique_lock lock{mu_};
+    std::lock_guard<std::mutex> guard{rc_lock_};
-    cv_.wait(lock, [this] { return (this->queue_.empty()) || stop_; });
+    return Fail("Worker has stopped.", std::move(rc_));
  }
  this->Submit(Op{Op::kBlock});
  {
    // Wait for the block call to finish.
    std::unique_lock lock{mu_};
    block_cv_.wait(lock, [this] { return block_done_ || stop_; });
    block_done_ = false;
  }
  {
    // Transfer the rc.
    std::lock_guard<std::mutex> lock{rc_lock_};
    return std::move(rc_);
  }
@ -193,26 +245,6 @@ Result Loop::Stop() {
 Loop::Loop(std::chrono::seconds timeout) : timeout_{timeout} {
  timer_.Init(__func__);
-  worker_ = std::thread{[this] {
+  worker_ = std::thread{[this] { this->Process(); }};
    try {
      this->Process();
    } catch (std::exception const& e) {
      std::lock_guard<std::mutex> guard{mu_};
      if (!curr_exce_) {
        curr_exce_ = std::current_exception();
        rc_ = Fail("Exception was thrown");
      }
      stop_ = true;
      cv_.notify_all();
    } catch (...) {
      std::lock_guard<std::mutex> guard{mu_};
      if (!curr_exce_) {
        curr_exce_ = std::current_exception();
        rc_ = Fail("Exception was thrown");
      }
      stop_ = true;
      cv_.notify_all();
    }
  }};
 }
 }  // namespace xgboost::collective
--- a/src/collective/loop.h
+++ b/src/collective/loop.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2023, XGBoost Contributors
+ * Copyright 2023-2024, XGBoost Contributors
 */
 #pragma once
 #include <chrono>              // for seconds
@ -10,7 +10,6 @@
 #include <mutex>               // for unique_lock, mutex
 #include <queue>               // for queue
 #include <thread>              // for thread
 #include <utility>             // for move
 #include "../common/timer.h"            // for Monitor
 #include "xgboost/collective/result.h"  // for Result
@ -37,10 +36,15 @@ class Loop {
  };
 private:
-  std::thread worker_;
+  std::thread worker_;  // thread worker to execute the tasks
-  std::condition_variable cv_;
+
-  std::mutex mu_;
+  std::condition_variable cv_;        // CV used to notify a new submit call
-  std::queue<Op> queue_;
+  std::condition_variable block_cv_;  // CV used to notify the blocking call
  bool block_done_{false};            // Flag to indicate whether the blocking call has finished.
  std::queue<Op> queue_;  // event queue
  std::mutex mu_;         // mutex to protect the queue, cv, and block_done
  std::chrono::seconds timeout_;
  Result rc_;
@ -51,29 +55,33 @@ class Loop {
  common::Monitor mutable timer_;
  Result EmptyQueue(std::queue<Op>* p_queue) const;
  // The cunsumer function that runs inside a worker thread.
  void Process();
 public:
  /**
   * @brief Stop the worker thread.
   */
  Result Stop();
  void Submit(Op op) {
    // producer
    std::unique_lock lock{mu_};
    queue_.push(op);
    lock.unlock();
    cv_.notify_one();
  }
  /**
   * @brief Block the event loop until all ops are finished. In the case of failure, this
   *        loop should be not be used for new operations.
   */
  [[nodiscard]] Result Block();
  explicit Loop(std::chrono::seconds timeout);
  ~Loop() noexcept(false) {
    // The worker will be joined in the stop function.
    this->Stop();
    if (worker_.joinable()) {
      worker_.join();
    }
  }
 };
 }  // namespace xgboost::collective
--- a/src/tree/gpu_hist/histogram.cu
+++ b/src/tree/gpu_hist/histogram.cu
@ -318,7 +318,6 @@ void BuildGradientHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const&
    grid_size = std::min(common::DivRoundUp(grid_size, num_groups), static_cast<std::uint32_t>(
                                        common::DivRoundUp(items_per_group, kMinItemsPerBlock)));
 #endif
    dh::LaunchKernel {dim3(grid_size, num_groups), static_cast<uint32_t>(kBlockThreads), smem_size,
                     ctx->Stream()} (kernel, matrix, feature_groups, d_ridx, histogram.data(),
                                     gpair.data(), rounding);
--- a/src/tree/hist/evaluate_splits.h
+++ b/src/tree/hist/evaluate_splits.h
@ -730,6 +730,9 @@ class HistMultiEvaluator {
    std::size_t n_nodes = p_tree->Size();
    gain_.resize(n_nodes);
    // Re-calculate weight without learning rate.
    CalcWeight(*param_, left_sum, left_weight);
    CalcWeight(*param_, right_sum, right_weight);
    gain_[left_child] = CalcGainGivenWeight(*param_, left_sum, left_weight);
    gain_[right_child] = CalcGainGivenWeight(*param_, right_sum, right_weight);
--- a/src/tree/multi_target_tree_model.cc
+++ b/src/tree/multi_target_tree_model.cc
@ -195,8 +195,9 @@ void MultiTargetTree::Expand(bst_node_t nidx, bst_feature_t split_idx, float spl
  split_index_.resize(n);
  split_index_[nidx] = split_idx;
-  split_conds_.resize(n);
+  split_conds_.resize(n, std::numeric_limits<float>::quiet_NaN());
  split_conds_[nidx] = split_cond;
  default_left_.resize(n);
  default_left_[nidx] = static_cast<std::uint8_t>(default_left);
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@ -149,6 +149,9 @@ class MultiTargetHistBuilder {
  }
  void InitData(DMatrix *p_fmat, RegTree const *p_tree) {
    if (collective::IsDistributed()) {
      LOG(FATAL) << "Distributed training for vector-leaf is not yet supported.";
    }
    monitor_->Start(__func__);
    p_last_fmat_ = p_fmat;
--- a/tests/README.md
+++ b/tests/README.md
@ -4,15 +4,13 @@ facilities.
 # Directories
  * ci_build:  Test facilities for Jenkins CI and GitHub action.
  * cli: Basic test for command line executable `xgboost`.  Most of the other command line
-    specific tests are in Python test `test_cli.py`
+    specific tests are in Python test `test_cli.py`.
  * cpp: Tests for C++ core, using Google test framework.
  * python: Tests for Python package, demonstrations and CLI.  For how to setup the
    dependencies for tests, see conda files in `ci_build`.
  * python-gpu: Similar to python tests, but for GPU.
  * travis: CI facilities for Travis.
-  * distributed: Test for distributed system.
+  * test_distributed: Test for distributed systems including spark and dask.
  * benchmark: Legacy benchmark code.  There are a number of benchmark projects for
    XGBoost with much better configurations.
 # Others
  * pytest.ini: Describes the `pytest` marker for python tests, some markers are generated
--- a/tests/benchmark/benchmark_linear.py
+++ b/tests/benchmark/benchmark_linear.py
@ -1,69 +0,0 @@
 #pylint: skip-file
 import argparse
 import xgboost as xgb
 import numpy as np
 from sklearn.datasets import make_classification
 from sklearn.model_selection import train_test_split
 import time
 import ast
 rng = np.random.RandomState(1994)
 def run_benchmark(args):
    try:
        dtest = xgb.DMatrix('dtest.dm')
        dtrain = xgb.DMatrix('dtrain.dm')
        if not (dtest.num_col() == args.columns \
                and dtrain.num_col() == args.columns):
            raise ValueError("Wrong cols")
        if not (dtest.num_row() == args.rows * args.test_size \
                and dtrain.num_row() == args.rows * (1-args.test_size)):
            raise ValueError("Wrong rows")
    except:
        print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
        print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
        tmp = time.time()
        X, y = make_classification(args.rows, n_features=args.columns, n_redundant=0, n_informative=args.columns, n_repeated=0, random_state=7)
        if args.sparsity < 1.0:
           X = np.array([[np.nan if rng.uniform(0, 1) < args.sparsity else x for x in x_row] for x_row in X])
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7)
        print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
        tmp = time.time()
        print ("DMatrix Start")
        dtrain = xgb.DMatrix(X_train, y_train)
        dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
        print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
        dtest.save_binary('dtest.dm')
        dtrain.save_binary('dtrain.dm')
    param = {'objective': 'binary:logistic','booster':'gblinear'}
    if args.params != '':
        param.update(ast.literal_eval(args.params))
    param['updater'] = args.updater
    print("Training with '%s'" % param['updater'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations, evals=[(dtrain,"train")], early_stopping_rounds = args.columns)
    print ("Train Time: %s seconds" % (str(time.time() - tmp)))
 parser = argparse.ArgumentParser()
 parser.add_argument('--updater', default='coord_descent')
 parser.add_argument('--sparsity', type=float, default=0.0)
 parser.add_argument('--lambda', type=float, default=1.0)
 parser.add_argument('--tol', type=float, default=1e-5)
 parser.add_argument('--alpha', type=float, default=1.0)
 parser.add_argument('--rows', type=int, default=1000000)
 parser.add_argument('--iterations', type=int, default=10000)
 parser.add_argument('--columns', type=int, default=50)
 parser.add_argument('--test_size', type=float, default=0.25)
 parser.add_argument('--standardise', type=bool, default=False)
 parser.add_argument('--params', default='', help='Provide additional parameters as a Python dict string, e.g. --params \"{\'max_depth\':2}\"')
 args = parser.parse_args()
 run_benchmark(args)
--- a/tests/benchmark/benchmark_tree.py
+++ b/tests/benchmark/benchmark_tree.py
@ -1,86 +0,0 @@
 """Run benchmark on the tree booster."""
 import argparse
 import ast
 import time
 import numpy as np
 import xgboost as xgb
 RNG = np.random.RandomState(1994)
 def run_benchmark(args):
    """Runs the benchmark."""
    try:
        dtest = xgb.DMatrix('dtest.dm')
        dtrain = xgb.DMatrix('dtrain.dm')
        if not (dtest.num_col() == args.columns
                and dtrain.num_col() == args.columns):
            raise ValueError("Wrong cols")
        if not (dtest.num_row() == args.rows * args.test_size
                and dtrain.num_row() == args.rows * (1 - args.test_size)):
            raise ValueError("Wrong rows")
    except:
        print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
        print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
        tmp = time.time()
        X = RNG.rand(args.rows, args.columns)
        y = RNG.randint(0, 2, args.rows)
        if 0.0 < args.sparsity < 1.0:
            X = np.array([[np.nan if RNG.uniform(0, 1) < args.sparsity else x for x in x_row]
                          for x_row in X])
        train_rows = int(args.rows * (1.0 - args.test_size))
        test_rows = int(args.rows * args.test_size)
        X_train = X[:train_rows, :]
        X_test = X[-test_rows:, :]
        y_train = y[:train_rows]
        y_test = y[-test_rows:]
        print("Generate Time: %s seconds" % (str(time.time() - tmp)))
        del X, y
        tmp = time.time()
        print("DMatrix Start")
        dtrain = xgb.DMatrix(X_train, y_train, nthread=-1)
        dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
        print("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
        del X_train, y_train, X_test, y_test
        dtest.save_binary('dtest.dm')
        dtrain.save_binary('dtrain.dm')
    param = {'objective': 'binary:logistic'}
    if args.params != '':
        param.update(ast.literal_eval(args.params))
    param['tree_method'] = args.tree_method
    print("Training with '%s'" % param['tree_method'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
    print("Train Time: %s seconds" % (str(time.time() - tmp)))
 def main():
    """The main function.
    Defines and parses command line arguments and calls the benchmark.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--tree_method', default='gpu_hist')
    parser.add_argument('--sparsity', type=float, default=0.0)
    parser.add_argument('--rows', type=int, default=1000000)
    parser.add_argument('--columns', type=int, default=50)
    parser.add_argument('--iterations', type=int, default=500)
    parser.add_argument('--test_size', type=float, default=0.25)
    parser.add_argument('--params', default='',
                        help='Provide additional parameters as a Python dict string, e.g. --params '
                             '\"{\'max_depth\':2}\"')
    args = parser.parse_args()
    run_benchmark(args)
 if __name__ == '__main__':
    main()
--- a/tests/benchmark/generate_libsvm.py
+++ b/tests/benchmark/generate_libsvm.py
@ -1,87 +0,0 @@
 """Generate synthetic data in LIBSVM format."""
 import argparse
 import io
 import time
 import numpy as np
 from sklearn.datasets import make_classification
 from sklearn.model_selection import train_test_split
 RNG = np.random.RandomState(2019)
 def generate_data(args):
    """Generates the data."""
    print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
    print("Sparsity {}".format(args.sparsity))
    print("{}/{} train/test split".format(1.0 - args.test_size, args.test_size))
    tmp = time.time()
    n_informative = args.columns * 7 // 10
    n_redundant = args.columns // 10
    n_repeated = args.columns // 10
    print("n_informative: {}, n_redundant: {}, n_repeated: {}".format(n_informative, n_redundant,
                                                                      n_repeated))
    x, y = make_classification(n_samples=args.rows, n_features=args.columns,
                               n_informative=n_informative, n_redundant=n_redundant,
                               n_repeated=n_repeated, shuffle=False, random_state=RNG)
    print("Generate Time: {} seconds".format(time.time() - tmp))
    tmp = time.time()
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=args.test_size,
                                                        random_state=RNG, shuffle=False)
    print("Train/Test Split Time: {} seconds".format(time.time() - tmp))
    tmp = time.time()
    write_file('train.libsvm', x_train, y_train, args.sparsity)
    print("Write Train Time: {} seconds".format(time.time() - tmp))
    tmp = time.time()
    write_file('test.libsvm', x_test, y_test, args.sparsity)
    print("Write Test Time: {} seconds".format(time.time() - tmp))
 def write_file(filename, x_data, y_data, sparsity):
    with open(filename, 'w') as f:
        for x, y in zip(x_data, y_data):
            write_line(f, x, y, sparsity)
 def write_line(f, x, y, sparsity):
    with io.StringIO() as line:
        line.write(str(y))
        for i, col in enumerate(x):
            if 0.0 < sparsity < 1.0:
                if RNG.uniform(0, 1) > sparsity:
                    write_feature(line, i, col)
            else:
                write_feature(line, i, col)
        line.write('\n')
        f.write(line.getvalue())
 def write_feature(line, index, feature):
    line.write(' ')
    line.write(str(index))
    line.write(':')
    line.write(str(feature))
 def main():
    """The main function.
    Defines and parses command line arguments and calls the generator.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--rows', type=int, default=1000000)
    parser.add_argument('--columns', type=int, default=50)
    parser.add_argument('--sparsity', type=float, default=0.0)
    parser.add_argument('--test_size', type=float, default=0.01)
    args = parser.parse_args()
    generate_data(args)
 if __name__ == '__main__':
    main()
--- a/tests/buildkite/build-jvm-packages.sh
+++ b/tests/buildkite/build-jvm-packages.sh
@ -8,13 +8,18 @@ echo "--- Build XGBoost JVM packages scala 2.12"
 tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
  ${SPARK_VERSION}
 echo "--- Stash XGBoost4J JARs (Scala 2.12)"
 buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
 buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
 buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
 buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"
 echo "--- Build XGBoost JVM packages scala 2.13"
 tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
  ${SPARK_VERSION} "" "" "true"
-echo "--- Stash XGBoost4J JARs"
+echo "--- Stash XGBoost4J JARs (Scala 2.13)"
 buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
 buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
 buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
--- a/tests/buildkite/build-rpkg-win64-gpu.ps1
+++ b/tests/buildkite/build-rpkg-win64-gpu.ps1
@ -1,21 +0,0 @@
 $ErrorActionPreference = "Stop"
 . tests/buildkite/conftest.ps1
 Write-Host "--- Build XGBoost R package with CUDA"
 nvcc --version
 $arch_flag = "-DGPU_COMPUTE_VER=75"
 bash tests/ci_build/build_r_pkg_with_cuda_win64.sh $Env:BUILDKITE_COMMIT
 if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
 if ( $is_release_branch -eq 1 ) {
  Write-Host "--- Upload R tarball"
  Get-ChildItem . -Filter xgboost_r_gpu_win64_*.tar.gz |
  Foreach-Object {
    & aws s3 cp $_ s3://xgboost-nightly-builds/$Env:BUILDKITE_BRANCH/ `
    --acl public-read --no-progress
    if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
  }
 }
--- a/tests/buildkite/pipeline-win64.yml
+++ b/tests/buildkite/pipeline-win64.yml
@ -13,11 +13,6 @@ steps:
    key: build-win64-gpu
    agents:
      queue: windows-cpu
  - label: ":windows: Build XGBoost R package for Windows with CUDA"
    command: "tests/buildkite/build-rpkg-win64-gpu.ps1"
    key: build-rpkg-win64-gpu
    agents:
      queue: windows-cpu
  - wait
--- a/tests/ci_build/build_jvm_packages.sh
+++ b/tests/ci_build/build_jvm_packages.sh
@ -24,12 +24,13 @@ if [ "x$gpu_arch" != "x" ]; then
  export GPU_ARCH_FLAG=$gpu_arch
 fi
 mvn_profile_string=""
 if [ "x$use_scala213" != "x" ]; then
-  export mvn_profile_string="-Pdefault,scala-2.13"
+  cd ..
  python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
  cd jvm-packages
 fi
-mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options
+mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options
 set +x
 set +e
--- a/tests/ci_build/build_r_pkg_with_cuda_win64.sh
+++ b/tests/ci_build/build_r_pkg_with_cuda_win64.sh
@ -1,36 +0,0 @@
 #!/bin/bash
 set -e
 set -x
 if [ "$#" -ne 1 ]
 then
  echo "Build the R package tarball with CUDA code. Usage: $0 [commit hash]"
  exit 1
 fi
 commit_hash="$1"
 # Clear all positional args
 set --
 source activate
 python tests/ci_build/test_r_package.py --task=pack
 mv xgboost/ xgboost_rpack/
 mkdir build
 cd build
 cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-4.3.2" -DCMAKE_PREFIX_PATH="C:\\rtools43\\x86_64-w64-mingw32.static.posix\\bin"
 cmake --build . --config Release --parallel
 cd ..
 # This super wacky hack is found in cmake/RPackageInstall.cmake.in and
 # cmake/RPackageInstallTargetSetup.cmake. This hack lets us bypass the normal build process of R
 # and have R use xgboost.dll that we've already built.
 rm -v xgboost_rpack/configure
 rm -rfv xgboost_rpack/src
 mkdir -p xgboost_rpack/src
 cp -v lib/xgboost.dll xgboost_rpack/src/
 echo 'all:' > xgboost_rpack/src/Makefile
 echo 'all:' > xgboost_rpack/src/Makefile.win
 mv xgboost_rpack/ xgboost/
 /c/Rtools43/usr/bin/tar -cvf xgboost_r_gpu_win64_${commit_hash}.tar xgboost/
 /c/Rtools43/usr/bin/gzip -9c xgboost_r_gpu_win64_${commit_hash}.tar > xgboost_r_gpu_win64_${commit_hash}.tar.gz
--- a/tests/ci_build/deploy_jvm_packages.sh
+++ b/tests/ci_build/deploy_jvm_packages.sh
@ -27,7 +27,10 @@ rm -rf ../build/
 # Deploy to S3 bucket xgboost-maven-repo
 mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests
 # Deploy scala 2.13 to S3 bucket xgboost-maven-repo
-mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests
+cd ..
 python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
 cd jvm-packages/
 mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests
 set +x
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@ -18,14 +18,17 @@ class LintersPaths:
        "python-package/",
        # tests
        "tests/python/test_config.py",
        "tests/python/test_callback.py",
        "tests/python/test_data_iterator.py",
        "tests/python/test_dmatrix.py",
        "tests/python/test_dt.py",
        "tests/python/test_demos.py",
        "tests/python/test_eval_metrics.py",
        "tests/python/test_multi_target.py",
        "tests/python/test_predict.py",
        "tests/python/test_quantile_dmatrix.py",
        "tests/python/test_tree_regularization.py",
        "tests/python/test_training_continuation.py",
        "tests/python/test_shap.py",
        "tests/python/test_model_io.py",
        "tests/python/test_with_pandas.py",
@ -39,12 +42,15 @@ class LintersPaths:
        "demo/dask/",
        "demo/rmm_plugin",
        "demo/json-model/json_parser.py",
        "demo/guide-python/continuation.py",
        "demo/guide-python/cat_in_the_dat.py",
        "demo/guide-python/callbacks.py",
        "demo/guide-python/categorical.py",
        "demo/guide-python/cat_pipeline.py",
        "demo/guide-python/feature_weights.py",
        "demo/guide-python/sklearn_parallel.py",
        "demo/guide-python/sklearn_examples.py",
        "demo/guide-python/sklearn_evals_result.py",
        "demo/guide-python/spark_estimator_examples.py",
        "demo/guide-python/external_memory.py",
        "demo/guide-python/individual_trees.py",
@ -86,6 +92,7 @@ class LintersPaths:
        "tests/python/test_multi_target.py",
        "tests/python-gpu/test_gpu_data_iterator.py",
        "tests/python-gpu/load_pickle.py",
        "tests/python-gpu/test_gpu_training_continuation.py",
        "tests/python/test_model_io.py",
        "tests/test_distributed/test_with_spark/test_data.py",
        "tests/test_distributed/test_gpu_with_spark/test_data.py",
@ -93,6 +100,7 @@ class LintersPaths:
        # demo
        "demo/json-model/json_parser.py",
        "demo/guide-python/external_memory.py",
        "demo/guide-python/continuation.py",
        "demo/guide-python/callbacks.py",
        "demo/guide-python/cat_in_the_dat.py",
        "demo/guide-python/categorical.py",
--- a/tests/ci_build/test_jvm_cross.sh
+++ b/tests/ci_build/test_jvm_cross.sh
@ -20,10 +20,11 @@ if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
  cd $jvm_packages_dir
 fi
-# including maven profiles for different scala versions: 2.12 is the default at the moment.
+for scala_binary_version in "2.12" "2.13"; do
-for _maven_profile_string in "" "-Pdefault,scala-2.13"; do
+  cd ..
-  scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
+  python dev/change_scala_version.py --scala-version ${scala_binary_version}
-  scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout)
+  cd jvm-packages
  scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout)
  # Install XGBoost4J JAR into local Maven repository
  mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@ -253,6 +253,5 @@ void TestColumnSplit(bst_target_t n_targets) {
 TEST(QuantileHist, ColumnSplit) { TestColumnSplit(1); }
-TEST(QuantileHist, ColumnSplitMultiTarget) { TestColumnSplit(3); }
+TEST(QuantileHist, DISABLED_ColumnSplitMultiTarget) { TestColumnSplit(3); }
 }  // namespace xgboost::tree
--- a/tests/cpp/tree/test_tree_stat.cc
+++ b/tests/cpp/tree/test_tree_stat.cc
@ -1,18 +1,21 @@
 /**
- * Copyright 2020-2023 by XGBoost Contributors
+ * Copyright 2020-2024, XGBoost Contributors
 */
 #include <gtest/gtest.h>
-#include <xgboost/context.h>  // for Context
+#include <xgboost/context.h>       // for Context
-#include <xgboost/task.h>     // for ObjInfo
+#include <xgboost/task.h>          // for ObjInfo
-#include <xgboost/tree_model.h>
+#include <xgboost/tree_model.h>    // for RegTree
-#include <xgboost/tree_updater.h>
+#include <xgboost/tree_updater.h>  // for TreeUpdater
-#include <memory>                     // for unique_ptr
+#include <memory>  // for unique_ptr
 #include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"
 namespace xgboost {
 /**
 * @brief Test the tree statistic (like sum Hessian) is correct.
 */
 class UpdaterTreeStatTest : public ::testing::Test {
 protected:
  std::shared_ptr<DMatrix> p_dmat_;
@ -28,13 +31,12 @@ class UpdaterTreeStatTest : public ::testing::Test {
    gpairs_.Data()->Copy(g);
  }
-  void RunTest(std::string updater) {
+  void RunTest(Context const* ctx, std::string updater) {
    tree::TrainParam param;
    ObjInfo task{ObjInfo::kRegression};
    param.Init(Args{});
-    Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));
+    auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, ctx, &task)};
    auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
    up->Configure(Args{});
    RegTree tree{1u, kCols};
    std::vector<HostDeviceVector<bst_node_t>> position(1);
@ -51,77 +53,136 @@ class UpdaterTreeStatTest : public ::testing::Test {
 };
 #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
-TEST_F(UpdaterTreeStatTest, GpuHist) { this->RunTest("grow_gpu_hist"); }
+TEST_F(UpdaterTreeStatTest, GpuHist) {
-#endif  // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
+  auto ctx = MakeCUDACtx(0);
  this->RunTest(&ctx, "grow_gpu_hist");
 }
-TEST_F(UpdaterTreeStatTest, Hist) { this->RunTest("grow_quantile_histmaker"); }
+TEST_F(UpdaterTreeStatTest, GpuApprox) {
  auto ctx = MakeCUDACtx(0);
  this->RunTest(&ctx, "grow_gpu_approx");
 }
 #endif  // defined(XGBOOST_USE_CUDA)
-TEST_F(UpdaterTreeStatTest, Exact) { this->RunTest("grow_colmaker"); }
+TEST_F(UpdaterTreeStatTest, Hist) {
  Context ctx;
  this->RunTest(&ctx, "grow_quantile_histmaker");
 }
-TEST_F(UpdaterTreeStatTest, Approx) { this->RunTest("grow_histmaker"); }
+TEST_F(UpdaterTreeStatTest, Exact) {
  Context ctx;
  this->RunTest(&ctx, "grow_colmaker");
 }
-class UpdaterEtaTest : public ::testing::Test {
+TEST_F(UpdaterTreeStatTest, Approx) {
  Context ctx;
  this->RunTest(&ctx, "grow_histmaker");
 }
 /**
 * @brief Test changing learning rate doesn't change internal splits.
 */
 class TestSplitWithEta : public ::testing::Test {
 protected:
-  std::shared_ptr<DMatrix> p_dmat_;
+  void Run(Context const* ctx, bst_target_t n_targets, std::string name) {
-  linalg::Matrix<GradientPair> gpairs_;
+    auto Xy = RandomDataGenerator{512, 64, 0.2}.Targets(n_targets).GenerateDMatrix(true);
  size_t constexpr static kRows = 10;
  size_t constexpr static kCols = 10;
  size_t constexpr static kClasses = 10;
-  void SetUp() override {
+    auto gen_tree = [&](float eta) {
-    p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix(true, false, kClasses);
+      auto tree =
-    auto g = GenerateRandomGradients(kRows);
+          std::make_unique<RegTree>(n_targets, static_cast<bst_feature_t>(Xy->Info().num_col_));
-    gpairs_.Reshape(kRows, 1);
+      std::vector<RegTree*> trees{tree.get()};
-    gpairs_.Data()->Copy(g);
+      ObjInfo task{ObjInfo::kRegression};
-  }
+      std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, ctx, &task)};
      updater->Configure({});
-  void RunTest(std::string updater) {
+      auto grad = GenerateRandomGradients(ctx, Xy->Info().num_row_, n_targets);
-    ObjInfo task{ObjInfo::kClassification};
+      CHECK_EQ(grad.Shape(1), n_targets);
      tree::TrainParam param;
      param.Init(Args{{"learning_rate", std::to_string(eta)}});
      HostDeviceVector<bst_node_t> position;
-    Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));
+      updater->Update(&param, &grad, Xy.get(), common::Span{&position, 1}, trees);
-
+      CHECK_EQ(tree->NumTargets(), n_targets);
-    float eta = 0.4;
+      if (n_targets > 1) {
-    auto up_0 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
+        CHECK(tree->IsMultiTarget());
    up_0->Configure(Args{});
    tree::TrainParam param0;
    param0.Init(Args{{"eta", std::to_string(eta)}});
    auto up_1 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
    up_1->Configure(Args{{"eta", "1.0"}});
    tree::TrainParam param1;
    param1.Init(Args{{"eta", "1.0"}});
    for (size_t iter = 0; iter < 4; ++iter) {
      RegTree tree_0{1u, kCols};
      {
        std::vector<HostDeviceVector<bst_node_t>> position(1);
        up_0->Update(&param0, &gpairs_, p_dmat_.get(), position, {&tree_0});
      }
      return tree;
    };
-      RegTree tree_1{1u, kCols};
+    auto eta_ratio = 8.0f;
-      {
+    auto p_tree0 = gen_tree(0.1f);
-        std::vector<HostDeviceVector<bst_node_t>> position(1);
+    auto p_tree1 = gen_tree(0.1f * eta_ratio);
-        up_1->Update(&param1, &gpairs_, p_dmat_.get(), position, {&tree_1});
+    // Just to make sure we are not testing a stump.
-      }
+    CHECK_GE(p_tree0->NumExtraNodes(), 32);
-      tree_0.WalkTree([&](bst_node_t nidx) {
+
-        if (tree_0[nidx].IsLeaf()) {
+    bst_node_t n_nodes{0};
-          EXPECT_NEAR(tree_1[nidx].LeafValue() * eta, tree_0[nidx].LeafValue(), kRtEps);
+    p_tree0->WalkTree([&](bst_node_t nidx) {
      if (p_tree0->IsLeaf(nidx)) {
        CHECK(p_tree1->IsLeaf(nidx));
        if (p_tree0->IsMultiTarget()) {
          CHECK(p_tree1->IsMultiTarget());
          auto leaf_0 = p_tree0->GetMultiTargetTree()->LeafValue(nidx);
          auto leaf_1 = p_tree1->GetMultiTargetTree()->LeafValue(nidx);
          CHECK_EQ(leaf_0.Size(), leaf_1.Size());
          for (std::size_t i = 0; i < leaf_0.Size(); ++i) {
            CHECK_EQ(leaf_0(i) * eta_ratio, leaf_1(i));
          }
          CHECK(std::isnan(p_tree0->SplitCond(nidx)));
          CHECK(std::isnan(p_tree1->SplitCond(nidx)));
        } else {
          // NON-mt tree reuses split cond for leaf value.
          auto leaf_0 = p_tree0->SplitCond(nidx);
          auto leaf_1 = p_tree1->SplitCond(nidx);
          CHECK_EQ(leaf_0 * eta_ratio, leaf_1);
        }
-        return true;
+      } else {
-      });
+        CHECK(!p_tree1->IsLeaf(nidx));
-    }
+        CHECK_EQ(p_tree0->SplitCond(nidx), p_tree1->SplitCond(nidx));
      }
      n_nodes++;
      return true;
    });
    ASSERT_EQ(n_nodes, p_tree0->NumExtraNodes() + 1);
  }
 };
-TEST_F(UpdaterEtaTest, Hist) { this->RunTest("grow_quantile_histmaker"); }
+TEST_F(TestSplitWithEta, HistMulti) {
  Context ctx;
  bst_target_t n_targets{3};
  this->Run(&ctx, n_targets, "grow_quantile_histmaker");
 }
-TEST_F(UpdaterEtaTest, Exact) { this->RunTest("grow_colmaker"); }
+TEST_F(TestSplitWithEta, Hist) {
  Context ctx;
  bst_target_t n_targets{1};
  this->Run(&ctx, n_targets, "grow_quantile_histmaker");
 }
-TEST_F(UpdaterEtaTest, Approx) { this->RunTest("grow_histmaker"); }
+TEST_F(TestSplitWithEta, Approx) {
  Context ctx;
  bst_target_t n_targets{1};
  this->Run(&ctx, n_targets, "grow_histmaker");
 }
 TEST_F(TestSplitWithEta, Exact) {
  Context ctx;
  bst_target_t n_targets{1};
  this->Run(&ctx, n_targets, "grow_colmaker");
 }
 #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
-TEST_F(UpdaterEtaTest, GpuHist) { this->RunTest("grow_gpu_hist"); }
+TEST_F(TestSplitWithEta, GpuHist) {
-#endif  // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
+  auto ctx = MakeCUDACtx(0);
  bst_target_t n_targets{1};
  this->Run(&ctx, n_targets, "grow_gpu_hist");
 }
 TEST_F(TestSplitWithEta, GpuApprox) {
  auto ctx = MakeCUDACtx(0);
  bst_target_t n_targets{1};
  this->Run(&ctx, n_targets, "grow_gpu_approx");
 }
 #endif  // defined(XGBOOST_USE_CUDA)
 class TestMinSplitLoss : public ::testing::Test {
  std::shared_ptr<DMatrix> dmat_;
--- a/tests/python-gpu/test_gpu_training_continuation.py
+++ b/tests/python-gpu/test_gpu_training_continuation.py
@ -1,54 +1,12 @@
 import json
 import numpy as np
 import pytest
-import xgboost as xgb
+from xgboost.testing.continuation import run_training_continuation_model_output
 rng = np.random.RandomState(1994)
 class TestGPUTrainingContinuation:
-    def test_training_continuation(self):
+    @pytest.mark.parametrize("tree_method", ["hist", "approx"])
-        kRows = 64
+    def test_model_output(self, tree_method: str) -> None:
-        kCols = 32
+        run_training_continuation_model_output("cuda", tree_method)
        X = np.random.randn(kRows, kCols)
        y = np.random.randn(kRows)
        dtrain = xgb.DMatrix(X, y)
        params = {
            "tree_method": "gpu_hist",
            "max_depth": "2",
            "gamma": "0.1",
            "alpha": "0.01",
        }
        bst_0 = xgb.train(params, dtrain, num_boost_round=64)
        dump_0 = bst_0.get_dump(dump_format="json")
        bst_1 = xgb.train(params, dtrain, num_boost_round=32)
        bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
        dump_1 = bst_1.get_dump(dump_format="json")
        def recursive_compare(obj_0, obj_1):
            if isinstance(obj_0, float):
                assert np.isclose(obj_0, obj_1, atol=1e-6)
            elif isinstance(obj_0, str):
                assert obj_0 == obj_1
            elif isinstance(obj_0, int):
                assert obj_0 == obj_1
            elif isinstance(obj_0, dict):
                keys_0 = list(obj_0.keys())
                keys_1 = list(obj_1.keys())
                values_0 = list(obj_0.values())
                values_1 = list(obj_1.values())
                for i in range(len(obj_0.items())):
                    assert keys_0[i] == keys_1[i]
                    if list(obj_0.keys())[i] != "missing":
                        recursive_compare(values_0[i], values_1[i])
            else:
                for i in range(len(obj_0)):
                    recursive_compare(obj_0[i], obj_1[i])
        assert len(dump_0) == len(dump_1)
        for i in range(len(dump_0)):
            obj_0 = json.loads(dump_0[i])
            obj_1 = json.loads(dump_1[i])
            recursive_compare(obj_0, obj_1)
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@ -16,13 +16,14 @@ class TestCallbacks:
    @classmethod
    def setup_class(cls):
        from sklearn.datasets import load_breast_cancer
        X, y = load_breast_cancer(return_X_y=True)
        cls.X = X
        cls.y = y
-        split = int(X.shape[0]*0.8)
+        split = int(X.shape[0] * 0.8)
-        cls.X_train = X[: split, ...]
+        cls.X_train = X[:split, ...]
-        cls.y_train = y[: split, ...]
+        cls.y_train = y[:split, ...]
        cls.X_valid = X[split:, ...]
        cls.y_valid = y[split:, ...]
@ -31,31 +32,32 @@ class TestCallbacks:
        D_train: xgb.DMatrix,
        D_valid: xgb.DMatrix,
        rounds: int,
-        verbose_eval: Union[bool, int]
+        verbose_eval: Union[bool, int],
    ):
        def check_output(output: str) -> None:
            if int(verbose_eval) == 1:
                # Should print each iteration info
-                assert len(output.split('\n')) == rounds
+                assert len(output.split("\n")) == rounds
            elif int(verbose_eval) > rounds:
                # Should print first and latest iteration info
-                assert len(output.split('\n')) == 2
+                assert len(output.split("\n")) == 2
            else:
                # Should print info by each period additionaly to first and latest
                # iteration
                num_periods = rounds // int(verbose_eval)
                # Extra information is required for latest iteration
                is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
-                assert len(output.split('\n')) == (
+                assert len(output.split("\n")) == (
                    1 + num_periods + int(is_extra_info_required)
                )
        evals_result: xgb.callback.TrainingCallback.EvalsLog = {}
-        params = {'objective': 'binary:logistic', 'eval_metric': 'error'}
+        params = {"objective": "binary:logistic", "eval_metric": "error"}
        with tm.captured_output() as (out, err):
            xgb.train(
-                params, D_train,
+                params,
-                evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+                D_train,
                evals=[(D_train, "Train"), (D_valid, "Valid")],
                num_boost_round=rounds,
                evals_result=evals_result,
                verbose_eval=verbose_eval,
@ -73,14 +75,16 @@ class TestCallbacks:
        D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
        evals_result = {}
        rounds = 10
-        xgb.train({'objective': 'binary:logistic',
+        xgb.train(
-                   'eval_metric': 'error'}, D_train,
+            {"objective": "binary:logistic", "eval_metric": "error"},
-                  evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+            D_train,
-                  num_boost_round=rounds,
+            evals=[(D_train, "Train"), (D_valid, "Valid")],
-                  evals_result=evals_result,
+            num_boost_round=rounds,
-                  verbose_eval=True)
+            evals_result=evals_result,
-        assert len(evals_result['Train']['error']) == rounds
+            verbose_eval=True,
-        assert len(evals_result['Valid']['error']) == rounds
+        )
        assert len(evals_result["Train"]["error"]) == rounds
        assert len(evals_result["Valid"]["error"]) == rounds
        self.run_evaluation_monitor(D_train, D_valid, rounds, True)
        self.run_evaluation_monitor(D_train, D_valid, rounds, 2)
@ -93,72 +97,83 @@ class TestCallbacks:
        evals_result = {}
        rounds = 30
        early_stopping_rounds = 5
-        booster = xgb.train({'objective': 'binary:logistic',
+        booster = xgb.train(
-                             'eval_metric': 'error'}, D_train,
+            {"objective": "binary:logistic", "eval_metric": "error"},
-                            evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+            D_train,
-                            num_boost_round=rounds,
+            evals=[(D_train, "Train"), (D_valid, "Valid")],
-                            evals_result=evals_result,
+            num_boost_round=rounds,
-                            verbose_eval=True,
+            evals_result=evals_result,
-                            early_stopping_rounds=early_stopping_rounds)
+            verbose_eval=True,
-        dump = booster.get_dump(dump_format='json')
+            early_stopping_rounds=early_stopping_rounds,
        )
        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
    def test_early_stopping_custom_eval(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
        D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
        early_stopping_rounds = 5
-        booster = xgb.train({'objective': 'binary:logistic',
+        booster = xgb.train(
-                             'eval_metric': 'error',
+            {
-                             'tree_method': 'hist'}, D_train,
+                "objective": "binary:logistic",
-                            evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+                "eval_metric": "error",
-                            feval=tm.eval_error_metric,
+                "tree_method": "hist",
-                            num_boost_round=1000,
+            },
-                            early_stopping_rounds=early_stopping_rounds,
+            D_train,
-                            verbose_eval=False)
+            evals=[(D_train, "Train"), (D_valid, "Valid")],
-        dump = booster.get_dump(dump_format='json')
+            feval=tm.eval_error_metric,
            num_boost_round=1000,
            early_stopping_rounds=early_stopping_rounds,
            verbose_eval=False,
        )
        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
    def test_early_stopping_customize(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
        D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
        early_stopping_rounds = 5
-        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
+        early_stop = xgb.callback.EarlyStopping(
-                                                metric_name='CustomErr',
+            rounds=early_stopping_rounds, metric_name="CustomErr", data_name="Train"
-                                                data_name='Train')
+        )
        # Specify which dataset and which metric should be used for early stopping.
        booster = xgb.train(
-            {'objective': 'binary:logistic',
+            {
-             'eval_metric': ['error', 'rmse'],
+                "objective": "binary:logistic",
-             'tree_method': 'hist'}, D_train,
+                "eval_metric": ["error", "rmse"],
-            evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+                "tree_method": "hist",
            },
            D_train,
            evals=[(D_train, "Train"), (D_valid, "Valid")],
            feval=tm.eval_error_metric,
            num_boost_round=1000,
            callbacks=[early_stop],
-            verbose_eval=False)
+            verbose_eval=False,
-        dump = booster.get_dump(dump_format='json')
+        )
        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
-        assert len(early_stop.stopping_history['Train']['CustomErr']) == len(dump)
+        assert len(early_stop.stopping_history["Train"]["CustomErr"]) == len(dump)
        rounds = 100
        early_stop = xgb.callback.EarlyStopping(
            rounds=early_stopping_rounds,
-            metric_name='CustomErr',
+            metric_name="CustomErr",
-            data_name='Train',
+            data_name="Train",
            min_delta=100,
            save_best=True,
        )
        booster = xgb.train(
            {
-                'objective': 'binary:logistic',
+                "objective": "binary:logistic",
-                'eval_metric': ['error', 'rmse'],
+                "eval_metric": ["error", "rmse"],
-                'tree_method': 'hist'
+                "tree_method": "hist",
            },
            D_train,
-            evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+            evals=[(D_train, "Train"), (D_valid, "Valid")],
            feval=tm.eval_error_metric,
            num_boost_round=rounds,
            callbacks=[early_stop],
-            verbose_eval=False
+            verbose_eval=False,
        )
        # No iteration can be made with min_delta == 100
        assert booster.best_iteration == 0
@ -166,18 +181,20 @@ class TestCallbacks:
    def test_early_stopping_skl(self):
        from sklearn.datasets import load_breast_cancer
        X, y = load_breast_cancer(return_X_y=True)
        early_stopping_rounds = 5
        cls = xgb.XGBClassifier(
-            early_stopping_rounds=early_stopping_rounds, eval_metric='error'
+            early_stopping_rounds=early_stopping_rounds, eval_metric="error"
        )
        cls.fit(X, y, eval_set=[(X, y)])
        booster = cls.get_booster()
-        dump = booster.get_dump(dump_format='json')
+        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
    def test_early_stopping_custom_eval_skl(self):
        from sklearn.datasets import load_breast_cancer
        X, y = load_breast_cancer(return_X_y=True)
        early_stopping_rounds = 5
        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds)
@ -186,11 +203,12 @@ class TestCallbacks:
        )
        cls.fit(X, y, eval_set=[(X, y)])
        booster = cls.get_booster()
-        dump = booster.get_dump(dump_format='json')
+        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
    def test_early_stopping_save_best_model(self):
        from sklearn.datasets import load_breast_cancer
        X, y = load_breast_cancer(return_X_y=True)
        n_estimators = 100
        early_stopping_rounds = 5
@ -200,11 +218,11 @@ class TestCallbacks:
        cls = xgb.XGBClassifier(
            n_estimators=n_estimators,
            eval_metric=tm.eval_error_metric_skl,
-            callbacks=[early_stop]
+            callbacks=[early_stop],
        )
        cls.fit(X, y, eval_set=[(X, y)])
        booster = cls.get_booster()
-        dump = booster.get_dump(dump_format='json')
+        dump = booster.get_dump(dump_format="json")
        assert len(dump) == booster.best_iteration + 1
        early_stop = xgb.callback.EarlyStopping(
@ -220,8 +238,9 @@ class TestCallbacks:
            cls.fit(X, y, eval_set=[(X, y)])
        # No error
-        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
+        early_stop = xgb.callback.EarlyStopping(
-                                                save_best=False)
+            rounds=early_stopping_rounds, save_best=False
        )
        xgb.XGBClassifier(
            booster="gblinear",
            n_estimators=10,
@ -231,14 +250,17 @@ class TestCallbacks:
    def test_early_stopping_continuation(self):
        from sklearn.datasets import load_breast_cancer
        X, y = load_breast_cancer(return_X_y=True)
-        cls = xgb.XGBClassifier(eval_metric=tm.eval_error_metric_skl)
+
        early_stopping_rounds = 5
        early_stop = xgb.callback.EarlyStopping(
            rounds=early_stopping_rounds, save_best=True
        )
-        with pytest.warns(UserWarning):
+        cls = xgb.XGBClassifier(
-            cls.fit(X, y, eval_set=[(X, y)], callbacks=[early_stop])
+            eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]
        )
        cls.fit(X, y, eval_set=[(X, y)])
        booster = cls.get_booster()
        assert booster.num_boosted_rounds() == booster.best_iteration + 1
@ -256,21 +278,10 @@ class TestCallbacks:
            )
            cls.fit(X, y, eval_set=[(X, y)])
            booster = cls.get_booster()
-            assert booster.num_boosted_rounds() == \
+            assert (
-                booster.best_iteration + early_stopping_rounds + 1
+                booster.num_boosted_rounds()
-
+                == booster.best_iteration + early_stopping_rounds + 1
-    def test_deprecated(self):
+            )
        from sklearn.datasets import load_breast_cancer
        X, y = load_breast_cancer(return_X_y=True)
        early_stopping_rounds = 5
        early_stop = xgb.callback.EarlyStopping(
            rounds=early_stopping_rounds, save_best=True
        )
        clf = xgb.XGBClassifier(
            eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]
        )
        with pytest.raises(ValueError, match=r".*set_params.*"):
            clf.fit(X, y, eval_set=[(X, y)], callbacks=[early_stop])
    def run_eta_decay(self, tree_method):
        """Test learning rate scheduler, used by both CPU and GPU tests."""
@ -343,7 +354,7 @@ class TestCallbacks:
            callbacks=[scheduler([0, 0, 0, 0])],
            evals_result=evals_result,
        )
-        eval_errors_2 = list(map(float, evals_result['eval']['error']))
+        eval_errors_2 = list(map(float, evals_result["eval"]["error"]))
        assert isinstance(bst, xgb.core.Booster)
        # validation error should not decrease, if eta/learning_rate = 0
        assert eval_errors_2[0] == eval_errors_2[-1]
@ -361,7 +372,7 @@ class TestCallbacks:
            callbacks=[scheduler(eta_decay)],
            evals_result=evals_result,
        )
-        eval_errors_3 = list(map(float, evals_result['eval']['error']))
+        eval_errors_3 = list(map(float, evals_result["eval"]["error"]))
        assert isinstance(bst, xgb.core.Booster)
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@ -15,23 +15,23 @@ class TestEarlyStopping:
        from sklearn.model_selection import train_test_split
        digits = load_digits(n_class=2)
-        X = digits['data']
+        X = digits["data"]
-        y = digits['target']
+        y = digits["target"]
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-        clf1 = xgb.XGBClassifier(learning_rate=0.1)
+        clf1 = xgb.XGBClassifier(
-        clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc",
+            learning_rate=0.1, early_stopping_rounds=5, eval_metric="auc"
-                 eval_set=[(X_test, y_test)])
+        )
-        clf2 = xgb.XGBClassifier(learning_rate=0.1)
+        clf1.fit(X_train, y_train, eval_set=[(X_test, y_test)])
-        clf2.fit(X_train, y_train, early_stopping_rounds=4, eval_metric="auc",
+        clf2 = xgb.XGBClassifier(
-                 eval_set=[(X_test, y_test)])
+            learning_rate=0.1, early_stopping_rounds=4, eval_metric="auc"
        )
        clf2.fit(X_train, y_train, eval_set=[(X_test, y_test)])
        # should be the same
        assert clf1.best_score == clf2.best_score
        assert clf1.best_score != 1
        # check overfit
        clf3 = xgb.XGBClassifier(
-            learning_rate=0.1,
+            learning_rate=0.1, eval_metric="auc", early_stopping_rounds=10
            eval_metric="auc",
            early_stopping_rounds=10
        )
        clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
        base_score = get_basescore(clf3)
@ -39,9 +39,9 @@ class TestEarlyStopping:
        clf3 = xgb.XGBClassifier(
            learning_rate=0.1,
-            base_score=.5,
+            base_score=0.5,
            eval_metric="auc",
-            early_stopping_rounds=10
+            early_stopping_rounds=10,
        )
        clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@ -9,37 +9,41 @@ rng = np.random.RandomState(1337)
 class TestEvalMetrics:
-    xgb_params_01 = {'nthread': 1, 'eval_metric': 'error'}
+    xgb_params_01 = {"nthread": 1, "eval_metric": "error"}
-    xgb_params_02 = {'nthread': 1, 'eval_metric': ['error']}
+    xgb_params_02 = {"nthread": 1, "eval_metric": ["error"]}
-    xgb_params_03 = {'nthread': 1, 'eval_metric': ['rmse', 'error']}
+    xgb_params_03 = {"nthread": 1, "eval_metric": ["rmse", "error"]}
-    xgb_params_04 = {'nthread': 1, 'eval_metric': ['error', 'rmse']}
+    xgb_params_04 = {"nthread": 1, "eval_metric": ["error", "rmse"]}
    def evalerror_01(self, preds, dtrain):
        labels = dtrain.get_label()
-        return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
+        return "error", float(sum(labels != (preds > 0.0))) / len(labels)
    def evalerror_02(self, preds, dtrain):
        labels = dtrain.get_label()
-        return [('error', float(sum(labels != (preds > 0.0))) / len(labels))]
+        return [("error", float(sum(labels != (preds > 0.0))) / len(labels))]
    @pytest.mark.skipif(**tm.no_sklearn())
    def evalerror_03(self, preds, dtrain):
        from sklearn.metrics import mean_squared_error
        labels = dtrain.get_label()
-        return [('rmse', mean_squared_error(labels, preds)),
+        return [
-                ('error', float(sum(labels != (preds > 0.0))) / len(labels))]
+            ("rmse", mean_squared_error(labels, preds)),
            ("error", float(sum(labels != (preds > 0.0))) / len(labels)),
        ]
    @pytest.mark.skipif(**tm.no_sklearn())
    def evalerror_04(self, preds, dtrain):
        from sklearn.metrics import mean_squared_error
        labels = dtrain.get_label()
-        return [('error', float(sum(labels != (preds > 0.0))) / len(labels)),
+        return [
-                ('rmse', mean_squared_error(labels, preds))]
+            ("error", float(sum(labels != (preds > 0.0))) / len(labels)),
            ("rmse", mean_squared_error(labels, preds)),
        ]
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_eval_metrics(self):
@ -50,15 +54,15 @@ class TestEvalMetrics:
        from sklearn.datasets import load_digits
        digits = load_digits(n_class=2)
-        X = digits['data']
+        X = digits["data"]
-        y = digits['target']
+        y = digits["target"]
        Xt, Xv, yt, yv = train_test_split(X, y, test_size=0.2, random_state=0)
        dtrain = xgb.DMatrix(Xt, label=yt)
        dvalid = xgb.DMatrix(Xv, label=yv)
-        watchlist = [(dtrain, 'train'), (dvalid, 'val')]
+        watchlist = [(dtrain, "train"), (dvalid, "val")]
        gbdt_01 = xgb.train(self.xgb_params_01, dtrain, num_boost_round=10)
        gbdt_02 = xgb.train(self.xgb_params_02, dtrain, num_boost_round=10)
@ -66,26 +70,54 @@ class TestEvalMetrics:
        assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]
        assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]
-        gbdt_01 = xgb.train(self.xgb_params_01, dtrain, 10, watchlist,
+        gbdt_01 = xgb.train(
-                            early_stopping_rounds=2)
+            self.xgb_params_01, dtrain, 10, watchlist, early_stopping_rounds=2
-        gbdt_02 = xgb.train(self.xgb_params_02, dtrain, 10, watchlist,
+        )
-                            early_stopping_rounds=2)
+        gbdt_02 = xgb.train(
-        gbdt_03 = xgb.train(self.xgb_params_03, dtrain, 10, watchlist,
+            self.xgb_params_02, dtrain, 10, watchlist, early_stopping_rounds=2
-                            early_stopping_rounds=2)
+        )
-        gbdt_04 = xgb.train(self.xgb_params_04, dtrain, 10, watchlist,
+        gbdt_03 = xgb.train(
-                            early_stopping_rounds=2)
+            self.xgb_params_03, dtrain, 10, watchlist, early_stopping_rounds=2
        )
        gbdt_04 = xgb.train(
            self.xgb_params_04, dtrain, 10, watchlist, early_stopping_rounds=2
        )
        assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]
        assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]
        assert gbdt_03.predict(dvalid)[0] != gbdt_04.predict(dvalid)[0]
-        gbdt_01 = xgb.train(self.xgb_params_01, dtrain, 10, watchlist,
+        gbdt_01 = xgb.train(
-                            early_stopping_rounds=2, feval=self.evalerror_01)
+            self.xgb_params_01,
-        gbdt_02 = xgb.train(self.xgb_params_02, dtrain, 10, watchlist,
+            dtrain,
-                            early_stopping_rounds=2, feval=self.evalerror_02)
+            10,
-        gbdt_03 = xgb.train(self.xgb_params_03, dtrain, 10, watchlist,
+            watchlist,
-                            early_stopping_rounds=2, feval=self.evalerror_03)
+            early_stopping_rounds=2,
-        gbdt_04 = xgb.train(self.xgb_params_04, dtrain, 10, watchlist,
+            feval=self.evalerror_01,
-                            early_stopping_rounds=2, feval=self.evalerror_04)
+        )
        gbdt_02 = xgb.train(
            self.xgb_params_02,
            dtrain,
            10,
            watchlist,
            early_stopping_rounds=2,
            feval=self.evalerror_02,
        )
        gbdt_03 = xgb.train(
            self.xgb_params_03,
            dtrain,
            10,
            watchlist,
            early_stopping_rounds=2,
            feval=self.evalerror_03,
        )
        gbdt_04 = xgb.train(
            self.xgb_params_04,
            dtrain,
            10,
            watchlist,
            early_stopping_rounds=2,
            feval=self.evalerror_04,
        )
        assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]
        assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]
        assert gbdt_03.predict(dvalid)[0] != gbdt_04.predict(dvalid)[0]
@ -93,6 +125,7 @@ class TestEvalMetrics:
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_gamma_deviance(self):
        from sklearn.metrics import mean_gamma_deviance
        rng = np.random.RandomState(1994)
        n_samples = 100
        n_features = 30
@ -101,8 +134,13 @@ class TestEvalMetrics:
        y = rng.randn(n_samples)
        y = y - y.min() * 100
-        reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=10)
+        reg = xgb.XGBRegressor(
-        reg.fit(X, y, eval_metric="gamma-deviance")
+            tree_method="hist",
            objective="reg:gamma",
            n_estimators=10,
            eval_metric="gamma-deviance",
        )
        reg.fit(X, y)
        booster = reg.get_booster()
        score = reg.predict(X)
@ -113,16 +151,26 @@ class TestEvalMetrics:
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_gamma_lik(self) -> None:
        import scipy.stats as stats
        rng = np.random.default_rng(1994)
        n_samples = 32
        n_features = 10
-        X = rng.normal(0, 1, size=n_samples * n_features).reshape((n_samples, n_features))
+        X = rng.normal(0, 1, size=n_samples * n_features).reshape(
            (n_samples, n_features)
        )
        alpha, loc, beta = 5.0, 11.1, 22
-        y = stats.gamma.rvs(alpha, loc=loc, scale=beta, size=n_samples, random_state=rng)
+        y = stats.gamma.rvs(
-        reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=64)
+            alpha, loc=loc, scale=beta, size=n_samples, random_state=rng
-        reg.fit(X, y, eval_metric="gamma-nloglik", eval_set=[(X, y)])
+        )
        reg = xgb.XGBRegressor(
            tree_method="hist",
            objective="reg:gamma",
            n_estimators=64,
            eval_metric="gamma-nloglik",
        )
        reg.fit(X, y, eval_set=[(X, y)])
        score = reg.predict(X)
@ -134,7 +182,7 @@ class TestEvalMetrics:
        # XGBoost uses the canonical link function of gamma in evaluation function.
        # so \theta = - (1.0 / y)
        # dispersion is hardcoded as 1.0, so shape (a in scipy parameter) is also 1.0
-        beta = - (1.0 / (- (1.0 / y)))  # == y
+        beta = -(1.0 / (-(1.0 / y)))  # == y
        nloglik_stats = -stats.gamma.logpdf(score, a=1.0, scale=beta)
        np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)
@ -153,7 +201,7 @@ class TestEvalMetrics:
            n_features,
            n_informative=n_features,
            n_redundant=0,
-            random_state=rng
+            random_state=rng,
        )
        Xy = xgb.DMatrix(X, y)
        booster = xgb.train(
@ -197,7 +245,7 @@ class TestEvalMetrics:
            n_informative=n_features,
            n_redundant=0,
            n_classes=n_classes,
-            random_state=rng
+            random_state=rng,
        )
        if weighted:
            weights = rng.randn(n_samples)
@ -242,20 +290,25 @@ class TestEvalMetrics:
    def run_pr_auc_binary(self, tree_method):
        from sklearn.datasets import make_classification
        from sklearn.metrics import auc, precision_recall_curve
        X, y = make_classification(128, 4, n_classes=2, random_state=1994)
-        clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
+        clf = xgb.XGBClassifier(
-        clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
+            tree_method=tree_method, n_estimators=1, eval_metric="aucpr"
        )
        clf.fit(X, y, eval_set=[(X, y)])
        evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
        y_score = clf.predict_proba(X)[:, 1]  # get the positive column
        precision, recall, _ = precision_recall_curve(y, y_score)
        prauc = auc(recall, precision)
-        # Interpolation results are slightly different from sklearn, but overall should be
+        # Interpolation results are slightly different from sklearn, but overall should
-        # similar.
+        # be similar.
        np.testing.assert_allclose(prauc, evals_result, rtol=1e-2)
-        clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=10)
+        clf = xgb.XGBClassifier(
-        clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
+            tree_method=tree_method, n_estimators=10, eval_metric="aucpr"
        )
        clf.fit(X, y, eval_set=[(X, y)])
        evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
        np.testing.assert_allclose(0.99, evals_result, rtol=1e-2)
@ -264,16 +317,21 @@ class TestEvalMetrics:
    def run_pr_auc_multi(self, tree_method):
        from sklearn.datasets import make_classification
        X, y = make_classification(
            64, 16, n_informative=8, n_classes=3, random_state=1994
        )
-        clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
+        clf = xgb.XGBClassifier(
-        clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
+            tree_method=tree_method, n_estimators=1, eval_metric="aucpr"
        )
        clf.fit(X, y, eval_set=[(X, y)])
        evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
-        # No available implementation for comparison, just check that XGBoost converges to
+        # No available implementation for comparison, just check that XGBoost converges
-        # 1.0
+        # to 1.0
-        clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=10)
+        clf = xgb.XGBClassifier(
-        clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
+            tree_method=tree_method, n_estimators=10, eval_metric="aucpr"
        )
        clf.fit(X, y, eval_set=[(X, y)])
        evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
        np.testing.assert_allclose(1.0, evals_result, rtol=1e-2)
@ -282,9 +340,13 @@ class TestEvalMetrics:
    def run_pr_auc_ltr(self, tree_method):
        from sklearn.datasets import make_classification
        X, y = make_classification(128, 4, n_classes=2, random_state=1994)
        ltr = xgb.XGBRanker(
-            tree_method=tree_method, n_estimators=16, objective="rank:pairwise"
+            tree_method=tree_method,
            n_estimators=16,
            objective="rank:pairwise",
            eval_metric="aucpr",
        )
        groups = np.array([32, 32, 64])
        ltr.fit(
@ -293,7 +355,6 @@ class TestEvalMetrics:
            group=groups,
            eval_set=[(X, y)],
            eval_group=[groups],
            eval_metric="aucpr",
        )
        results = ltr.evals_result()["validation_0"]["aucpr"]
        assert results[-1] >= 0.99
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@ -6,6 +6,7 @@ import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 from xgboost.testing.continuation import run_training_continuation_model_output
 rng = np.random.RandomState(1337)
@ -15,54 +16,51 @@ class TestTrainingContinuation:
    def generate_parameters(self):
        xgb_params_01_binary = {
-            'nthread': 1,
+            "nthread": 1,
        }
        xgb_params_02_binary = {
-            'nthread': 1,
+            "nthread": 1,
-            'num_parallel_tree': self.num_parallel_tree
+            "num_parallel_tree": self.num_parallel_tree,
        }
        xgb_params_03_binary = {
-            'nthread': 1,
+            "nthread": 1,
-            'num_class': 5,
+            "num_class": 5,
-            'num_parallel_tree': self.num_parallel_tree
+            "num_parallel_tree": self.num_parallel_tree,
        }
-        return [
+        return [xgb_params_01_binary, xgb_params_02_binary, xgb_params_03_binary]
            xgb_params_01_binary, xgb_params_02_binary, xgb_params_03_binary
        ]
-    def run_training_continuation(self, xgb_params_01, xgb_params_02,
+    def run_training_continuation(self, xgb_params_01, xgb_params_02, xgb_params_03):
                                  xgb_params_03):
        from sklearn.datasets import load_digits
        from sklearn.metrics import mean_squared_error
        digits_2class = load_digits(n_class=2)
        digits_5class = load_digits(n_class=5)
-        X_2class = digits_2class['data']
+        X_2class = digits_2class["data"]
-        y_2class = digits_2class['target']
+        y_2class = digits_2class["target"]
-        X_5class = digits_5class['data']
+        X_5class = digits_5class["data"]
-        y_5class = digits_5class['target']
+        y_5class = digits_5class["target"]
        dtrain_2class = xgb.DMatrix(X_2class, label=y_2class)
        dtrain_5class = xgb.DMatrix(X_5class, label=y_5class)
-        gbdt_01 = xgb.train(xgb_params_01, dtrain_2class,
+        gbdt_01 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=10)
                            num_boost_round=10)
        ntrees_01 = len(gbdt_01.get_dump())
        assert ntrees_01 == 10
-        gbdt_02 = xgb.train(xgb_params_01, dtrain_2class,
+        gbdt_02 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=0)
-                            num_boost_round=0)
+        gbdt_02.save_model("xgb_tc.json")
        gbdt_02.save_model('xgb_tc.json')
-        gbdt_02a = xgb.train(xgb_params_01, dtrain_2class,
+        gbdt_02a = xgb.train(
-                             num_boost_round=10, xgb_model=gbdt_02)
+            xgb_params_01, dtrain_2class, num_boost_round=10, xgb_model=gbdt_02
-        gbdt_02b = xgb.train(xgb_params_01, dtrain_2class,
+        )
-                             num_boost_round=10, xgb_model="xgb_tc.json")
+        gbdt_02b = xgb.train(
            xgb_params_01, dtrain_2class, num_boost_round=10, xgb_model="xgb_tc.json"
        )
        ntrees_02a = len(gbdt_02a.get_dump())
        ntrees_02b = len(gbdt_02b.get_dump())
        assert ntrees_02a == 10
@ -76,20 +74,21 @@ class TestTrainingContinuation:
        res2 = mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))
        assert res1 == res2
-        gbdt_03 = xgb.train(xgb_params_01, dtrain_2class,
+        gbdt_03 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=3)
-                            num_boost_round=3)
+        gbdt_03.save_model("xgb_tc.json")
        gbdt_03.save_model('xgb_tc.json')
-        gbdt_03a = xgb.train(xgb_params_01, dtrain_2class,
+        gbdt_03a = xgb.train(
-                             num_boost_round=7, xgb_model=gbdt_03)
+            xgb_params_01, dtrain_2class, num_boost_round=7, xgb_model=gbdt_03
-        gbdt_03b = xgb.train(xgb_params_01, dtrain_2class,
+        )
-                             num_boost_round=7, xgb_model="xgb_tc.json")
+        gbdt_03b = xgb.train(
            xgb_params_01, dtrain_2class, num_boost_round=7, xgb_model="xgb_tc.json"
        )
        ntrees_03a = len(gbdt_03a.get_dump())
        ntrees_03b = len(gbdt_03b.get_dump())
        assert ntrees_03a == 10
        assert ntrees_03b == 10
-        os.remove('xgb_tc.json')
+        os.remove("xgb_tc.json")
        res1 = mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class))
        res2 = mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))
@ -113,16 +112,14 @@ class TestTrainingContinuation:
            y_2class,
            gbdt_04.predict(
                dtrain_2class, iteration_range=(0, gbdt_04.num_boosted_rounds())
-            )
+            ),
        )
        assert res1 == res2
-        gbdt_05 = xgb.train(xgb_params_03, dtrain_5class,
+        gbdt_05 = xgb.train(xgb_params_03, dtrain_5class, num_boost_round=7)
-                            num_boost_round=7)
+        gbdt_05 = xgb.train(
-        gbdt_05 = xgb.train(xgb_params_03,
+            xgb_params_03, dtrain_5class, num_boost_round=3, xgb_model=gbdt_05
-                            dtrain_5class,
+        )
                            num_boost_round=3,
                            xgb_model=gbdt_05)
        res1 = gbdt_05.predict(dtrain_5class)
        res2 = gbdt_05.predict(
@ -149,8 +146,8 @@ class TestTrainingContinuation:
        from sklearn.datasets import load_breast_cancer
        X, y = load_breast_cancer(return_X_y=True)
-        clf = xgb.XGBClassifier(n_estimators=2)
+        clf = xgb.XGBClassifier(n_estimators=2, eval_metric="logloss")
-        clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
+        clf.fit(X, y, eval_set=[(X, y)])
        assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
        with tempfile.TemporaryDirectory() as tmpdir:
@ -160,5 +157,10 @@ class TestTrainingContinuation:
        clf = xgb.XGBClassifier(n_estimators=2)
        # change metric to error
-        clf.fit(X, y, eval_set=[(X, y)], eval_metric="error")
+        clf.set_params(eval_metric="error")
        clf.fit(X, y, eval_set=[(X, y)], xgb_model=loaded)
        assert tm.non_increasing(clf.evals_result()["validation_0"]["error"])
    @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
    def test_model_output(self, tree_method: str) -> None:
        run_training_continuation_model_output("cpu", tree_method)
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@ -30,8 +30,8 @@ def test_binary_classification():
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
        for train_index, test_index in kf.split(X, y):
-            clf = cls(random_state=42)
+            clf = cls(random_state=42, eval_metric=['auc', 'logloss'])
-            xgb_model = clf.fit(X[train_index], y[train_index], eval_metric=['auc', 'logloss'])
+            xgb_model = clf.fit(X[train_index], y[train_index])
            preds = xgb_model.predict(X[test_index])
            labels = y[test_index]
            err = sum(1 for i in range(len(preds))
@ -101,10 +101,11 @@ def test_best_iteration():
    def train(booster: str, forest: Optional[int]) -> None:
        rounds = 4
        cls = xgb.XGBClassifier(
-            n_estimators=rounds, num_parallel_tree=forest, booster=booster
+            n_estimators=rounds,
-        ).fit(
+            num_parallel_tree=forest,
-            X, y, eval_set=[(X, y)], early_stopping_rounds=3
+            booster=booster,
-        )
+            early_stopping_rounds=3,
        ).fit(X, y, eval_set=[(X, y)])
        assert cls.best_iteration == rounds - 1
        # best_iteration is used by default, assert that under gblinear it's
@ -112,9 +113,9 @@ def test_best_iteration():
        cls.predict(X)
    num_parallel_tree = 4
-    train('gbtree', num_parallel_tree)
+    train("gbtree", num_parallel_tree)
-    train('dart', num_parallel_tree)
+    train("dart", num_parallel_tree)
-    train('gblinear', None)
+    train("gblinear", None)
 def test_ranking():
@ -258,6 +259,7 @@ def test_stacking_classification():
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    clf.fit(X_train, y_train).score(X_test, y_test)
@pytest.mark.skipif(**tm.no_pandas())
 def test_feature_importances_weight():
    from sklearn.datasets import load_digits
@ -474,7 +476,8 @@ def run_housing_rf_regression(tree_method):
    rfreg = xgb.XGBRFRegressor()
    with pytest.raises(NotImplementedError):
-        rfreg.fit(X, y, early_stopping_rounds=10)
+        rfreg.set_params(early_stopping_rounds=10)
        rfreg.fit(X, y)
 def test_rf_regression():
@ -574,7 +577,7 @@ def test_classification_with_custom_objective():
        return logregobj(y, p)
    cls.set_params(objective=wrapped)
-    cls.predict(X)              # no throw
+    cls.predict(X)  # no throw
    cls.fit(X, y)
    assert is_called[0]
@ -844,51 +847,65 @@ def run_validation_weights(model):
    y_train, y_test = y[:1600], y[1600:]
    # instantiate model
-    param_dist = {'objective': 'binary:logistic', 'n_estimators': 2,
+    param_dist = {
-                  'random_state': 123}
+        "objective": "binary:logistic",
        "n_estimators": 2,
        "random_state": 123,
    }
    clf = model(**param_dist)
    # train it using instance weights only in the training set
    weights_train = np.random.choice([1, 2], len(X_train))
-    clf.fit(X_train, y_train,
+    clf.set_params(eval_metric="logloss")
-            sample_weight=weights_train,
+    clf.fit(
-            eval_set=[(X_test, y_test)],
+        X_train,
-            eval_metric='logloss',
+        y_train,
-            verbose=False)
+        sample_weight=weights_train,
-
+        eval_set=[(X_test, y_test)],
        verbose=False,
    )
    # evaluate logloss metric on test set *without* using weights
    evals_result_without_weights = clf.evals_result()
-    logloss_without_weights = evals_result_without_weights[
+    logloss_without_weights = evals_result_without_weights["validation_0"]["logloss"]
        "validation_0"]["logloss"]
    # now use weights for the test set
    np.random.seed(0)
    weights_test = np.random.choice([1, 2], len(X_test))
-    clf.fit(X_train, y_train,
+    clf.set_params(eval_metric="logloss")
-            sample_weight=weights_train,
+    clf.fit(
-            eval_set=[(X_test, y_test)],
+        X_train,
-            sample_weight_eval_set=[weights_test],
+        y_train,
-            eval_metric='logloss',
+        sample_weight=weights_train,
-            verbose=False)
+        eval_set=[(X_test, y_test)],
        sample_weight_eval_set=[weights_test],
        verbose=False,
    )
    evals_result_with_weights = clf.evals_result()
    logloss_with_weights = evals_result_with_weights["validation_0"]["logloss"]
    # check that the logloss in the test set is actually different when using
    # weights than when not using them
-    assert all((logloss_with_weights[i] != logloss_without_weights[i]
+    assert all((logloss_with_weights[i] != logloss_without_weights[i] for i in [0, 1]))
                for i in [0, 1]))
    with pytest.raises(ValueError):
        # length of eval set and sample weight doesn't match.
-        clf.fit(X_train, y_train, sample_weight=weights_train,
+        clf.fit(
-                eval_set=[(X_train, y_train), (X_test, y_test)],
+            X_train,
-                sample_weight_eval_set=[weights_train])
+            y_train,
            sample_weight=weights_train,
            eval_set=[(X_train, y_train), (X_test, y_test)],
            sample_weight_eval_set=[weights_train],
        )
    with pytest.raises(ValueError):
        cls = xgb.XGBClassifier()
-        cls.fit(X_train, y_train, sample_weight=weights_train,
+        cls.fit(
-                eval_set=[(X_train, y_train), (X_test, y_test)],
+            X_train,
-                sample_weight_eval_set=[weights_train])
+            y_train,
            sample_weight=weights_train,
            eval_set=[(X_train, y_train), (X_test, y_test)],
            sample_weight_eval_set=[weights_train],
        )
 def test_validation_weights():
@ -960,8 +977,7 @@ def test_XGBClassifier_resume():
        # file name of stored xgb model
        model1.save_model(model1_path)
-        model2 = xgb.XGBClassifier(
+        model2 = xgb.XGBClassifier(learning_rate=0.3, random_state=0, n_estimators=8)
            learning_rate=0.3, random_state=0, n_estimators=8)
        model2.fit(X, Y, xgb_model=model1_path)
        pred2 = model2.predict(X)
@ -972,8 +988,7 @@ def test_XGBClassifier_resume():
        # file name of 'Booster' instance Xgb model
        model1.get_booster().save_model(model1_booster_path)
-        model2 = xgb.XGBClassifier(
+        model2 = xgb.XGBClassifier(learning_rate=0.3, random_state=0, n_estimators=8)
            learning_rate=0.3, random_state=0, n_estimators=8)
        model2.fit(X, Y, xgb_model=model1_booster_path)
        pred2 = model2.predict(X)
@ -1279,12 +1294,16 @@ def test_estimator_reg(estimator, check):
        ):
            estimator.fit(X, y)
        return
-    if os.environ["PYTEST_CURRENT_TEST"].find("check_estimators_overwrite_params") != -1:
+    if (
        os.environ["PYTEST_CURRENT_TEST"].find("check_estimators_overwrite_params")
        != -1
    ):
        # A hack to pass the scikit-learn parameter mutation tests.  XGBoost regressor
-        # returns actual internal default values for parameters in `get_params`, but those
+        # returns actual internal default values for parameters in `get_params`, but
-        # are set as `None` in sklearn interface to avoid duplication.  So we fit a dummy
+        # those are set as `None` in sklearn interface to avoid duplication.  So we fit
-        # model and obtain the default parameters here for the mutation tests.
+        # a dummy model and obtain the default parameters here for the mutation tests.
        from sklearn.datasets import make_regression
        X, y = make_regression(n_samples=2, n_features=1)
        estimator.set_params(**xgb.XGBRegressor().fit(X, y).get_params())
@ -1325,6 +1344,7 @@ def test_categorical():
 def test_evaluation_metric():
    from sklearn.datasets import load_diabetes, load_digits
    from sklearn.metrics import mean_absolute_error
    X, y = load_diabetes(return_X_y=True)
    n_estimators = 16
@ -1341,17 +1361,6 @@ def test_evaluation_metric():
    for line in lines:
        assert line.find("mean_absolute_error") != -1
    def metric(predt: np.ndarray, Xy: xgb.DMatrix):
        y = Xy.get_label()
        return "m", np.abs(predt - y).sum()
    with pytest.warns(UserWarning):
        reg = xgb.XGBRegressor(
            tree_method="hist",
            n_estimators=1,
        )
        reg.fit(X, y, eval_set=[(X, y)], eval_metric=metric)
    def merror(y_true: np.ndarray, predt: np.ndarray):
        n_samples = y_true.shape[0]
        assert n_samples == predt.size
--- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
@ -363,12 +363,12 @@ class TestDistributedGPU:
            device="cuda",
            eval_metric="error",
            n_estimators=100,
            early_stopping_rounds=early_stopping_rounds,
        )
        cls.client = local_cuda_client
        cls.fit(
            X,
            y,
            early_stopping_rounds=early_stopping_rounds,
            eval_set=[(valid_X, valid_y)],
        )
        booster = cls.get_booster()
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@ -937,8 +937,10 @@ def run_empty_dmatrix_auc(client: "Client", device: str, n_workers: int) -> None
    valid_X = dd.from_array(valid_X_, chunksize=n_samples)
    valid_y = dd.from_array(valid_y_, chunksize=n_samples)
-    cls = xgb.dask.DaskXGBClassifier(device=device, n_estimators=2)
+    cls = xgb.dask.DaskXGBClassifier(
-    cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
+        device=device, n_estimators=2, eval_metric=["auc", "aucpr"]
    )
    cls.fit(X, y, eval_set=[(valid_X, valid_y)])
    # multiclass
    X_, y_ = make_classification(
@ -966,8 +968,10 @@ def run_empty_dmatrix_auc(client: "Client", device: str, n_workers: int) -> None
    valid_X = dd.from_array(valid_X_, chunksize=n_samples)
    valid_y = dd.from_array(valid_y_, chunksize=n_samples)
-    cls = xgb.dask.DaskXGBClassifier(device=device, n_estimators=2)
+    cls = xgb.dask.DaskXGBClassifier(
-    cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
+        device=device, n_estimators=2, eval_metric=["auc", "aucpr"]
    )
    cls.fit(X, y, eval_set=[(valid_X, valid_y)])
 def test_empty_dmatrix_auc() -> None:
@ -994,11 +998,11 @@ def run_auc(client: "Client", device: str) -> None:
    valid_X = dd.from_array(valid_X_, chunksize=10)
    valid_y = dd.from_array(valid_y_, chunksize=10)
-    cls = xgb.XGBClassifier(device=device, n_estimators=2)
+    cls = xgb.XGBClassifier(device=device, n_estimators=2, eval_metric="auc")
-    cls.fit(X_, y_, eval_metric="auc", eval_set=[(valid_X_, valid_y_)])
+    cls.fit(X_, y_, eval_set=[(valid_X_, valid_y_)])
-    dcls = xgb.dask.DaskXGBClassifier(device=device, n_estimators=2)
+    dcls = xgb.dask.DaskXGBClassifier(device=device, n_estimators=2, eval_metric="auc")
-    dcls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
+    dcls.fit(X, y, eval_set=[(valid_X, valid_y)])
    approx = dcls.evals_result()["validation_0"]["auc"]
    exact = cls.evals_result()["validation_0"]["auc"]
@ -1267,16 +1271,16 @@ def test_dask_ranking(client: "Client") -> None:
    qid_valid = qid_valid.astype(np.uint32)
    qid_test = qid_test.astype(np.uint32)
-    rank = xgb.dask.DaskXGBRanker(n_estimators=2500)
+    rank = xgb.dask.DaskXGBRanker(
        n_estimators=2500, eval_metric=["ndcg"], early_stopping_rounds=10
    )
    rank.fit(
        x_train,
        y_train,
        qid=qid_train,
        eval_set=[(x_test, y_test), (x_train, y_train)],
        eval_qid=[qid_test, qid_train],
        eval_metric=["ndcg"],
        verbose=True,
        early_stopping_rounds=10,
    )
    assert rank.n_features_in_ == 46
    assert rank.best_score > 0.98
@ -2150,13 +2154,15 @@ class TestDaskCallbacks:
        valid_X, valid_y = load_breast_cancer(return_X_y=True)
        valid_X, valid_y = da.from_array(valid_X), da.from_array(valid_y)
        cls = xgb.dask.DaskXGBClassifier(
-            objective="binary:logistic", tree_method="hist", n_estimators=1000
+            objective="binary:logistic",
            tree_method="hist",
            n_estimators=1000,
            early_stopping_rounds=early_stopping_rounds,
        )
        cls.client = client
        cls.fit(
            X,
            y,
            early_stopping_rounds=early_stopping_rounds,
            eval_set=[(valid_X, valid_y)],
        )
        booster = cls.get_booster()
@ -2165,15 +2171,17 @@ class TestDaskCallbacks:
        # Specify the metric
        cls = xgb.dask.DaskXGBClassifier(
-            objective="binary:logistic", tree_method="hist", n_estimators=1000
+            objective="binary:logistic",
            tree_method="hist",
            n_estimators=1000,
            early_stopping_rounds=early_stopping_rounds,
            eval_metric="error",
        )
        cls.client = client
        cls.fit(
            X,
            y,
            early_stopping_rounds=early_stopping_rounds,
            eval_set=[(valid_X, valid_y)],
            eval_metric="error",
        )
        assert tm.non_increasing(cls.evals_result()["validation_0"]["error"])
        booster = cls.get_booster()
@ -2215,12 +2223,12 @@ class TestDaskCallbacks:
            tree_method="hist",
            n_estimators=1000,
            eval_metric=tm.eval_error_metric_skl,
            early_stopping_rounds=early_stopping_rounds,
        )
        cls.client = client
        cls.fit(
            X,
            y,
            early_stopping_rounds=early_stopping_rounds,
            eval_set=[(valid_X, valid_y)],
        )
        booster = cls.get_booster()
@ -2234,21 +2242,22 @@ class TestDaskCallbacks:
        X, y = load_breast_cancer(return_X_y=True)
        X, y = da.from_array(X), da.from_array(y)
        cls = xgb.dask.DaskXGBClassifier(
            objective="binary:logistic", tree_method="hist", n_estimators=10
        )
        cls.client = client
        with tempfile.TemporaryDirectory() as tmpdir:
-            cls.fit(
+            cls = xgb.dask.DaskXGBClassifier(
-                X,
+                objective="binary:logistic",
-                y,
+                tree_method="hist",
                n_estimators=10,
                callbacks=[
                    xgb.callback.TrainingCheckPoint(
                        directory=Path(tmpdir), interval=1, name="model"
                    )
                ],
            )
            cls.client = client
            cls.fit(
                X,
                y,
            )
            for i in range(1, 10):
                assert os.path.exists(
                    os.path.join(
--- a/tests/test_distributed/test_with_spark/test_spark_local.py
+++ b/tests/test_distributed/test_with_spark/test_spark_local.py
@ -311,24 +311,20 @@ def clf_with_weight(
    y_val = np.array([0, 1])
    w_train = np.array([1.0, 2.0])
    w_val = np.array([1.0, 2.0])
-    cls2 = XGBClassifier()
+    cls2 = XGBClassifier(eval_metric="logloss", early_stopping_rounds=1)
    cls2.fit(
        X_train,
        y_train,
        eval_set=[(X_val, y_val)],
        early_stopping_rounds=1,
        eval_metric="logloss",
    )
-    cls3 = XGBClassifier()
+    cls3 = XGBClassifier(eval_metric="logloss", early_stopping_rounds=1)
    cls3.fit(
        X_train,
        y_train,
        sample_weight=w_train,
        eval_set=[(X_val, y_val)],
        sample_weight_eval_set=[w_val],
        early_stopping_rounds=1,
        eval_metric="logloss",
    )
    cls_df_train_with_eval_weight = spark.createDataFrame(