merge latest changes

2024-03-12 09:13:09 -07:00 · 2024-03-12 09:13:09 -07:00 · 968dbf25fb
commit 968dbf25fb
parent 44db1cef54 1450aebb74
174 changed files with 5276 additions and 2304 deletions
--- a/.clang-format
+++ b/.clang-format
@ -17,7 +17,7 @@ AllowShortEnumsOnASingleLine: true
 AllowShortBlocksOnASingleLine: Never
 AllowShortCaseLabelsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: All
-AllowShortLambdasOnASingleLine: All
+AllowShortLambdasOnASingleLine: Inline
 AllowShortIfStatementsOnASingleLine: WithoutElse
 AllowShortLoopsOnASingleLine: true
 AlwaysBreakAfterDefinitionReturnType: None
--- a/.github/workflows/i386.yml
+++ b/.github/workflows/i386.yml
@ -5,6 +5,10 @@ on: [push, pull_request]
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  build-32bit:
    name: Build 32-bit
--- a/.github/workflows/jvm_tests.yml
+++ b/.github/workflows/jvm_tests.yml
@ -5,6 +5,10 @@ on: [push, pull_request]
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  test-with-jvm:
    name: Test JVM on OS ${{ matrix.os }}
@ -15,31 +19,36 @@ jobs:
        os: [windows-latest, ubuntu-latest, macos-11]

    steps:
-    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      with:
        submodules: 'true'

-    - uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
+    - uses: mamba-org/setup-micromamba@422500192359a097648154e8db4e39bdb6c6eed7  # v1.8.1
      with:
-        python-version: '3.8'
-        architecture: 'x64'
-
-    - uses: actions/setup-java@d202f5dbf7256730fb690ec59f6381650114feb2 # v3.6.0
-      with:
-        java-version: 1.8
-
-    - name: Install Python packages
-      run: |
-        python -m pip install wheel setuptools
-        python -m pip install awscli
+        micromamba-version: '1.5.6-0'
+        environment-name: jvm_tests
+        create-args: >-
+          python=3.10
+          awscli
+        cache-downloads: true
+        cache-environment: true
+        init-shell: bash powershell

    - name: Cache Maven packages
-      uses: actions/cache@6998d139ddd3e68c71e9e398d8e40b71a2f39812 # v3.2.5
+      uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2  # v4.0.0
      with:
        path: ~/.m2
        key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
        restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}

+    - name: Build xgboost4j.dll
+      run: |
+        mkdir build
+        cd build
+        cmake .. -G"Visual Studio 17 2022" -A x64 -DJVM_BINDINGS=ON
+        cmake --build . --config Release
+      if: matrix.os == 'windows-latest'
+
    - name: Test XGBoost4J (Core)
      run: |
        cd jvm-packages
@ -47,7 +56,8 @@ jobs:

    - name: Extract branch name
      shell: bash
-      run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
+      run: |
+        echo "branch=${GITHUB_REF#refs/heads/}" >> "$GITHUB_OUTPUT"
      id: extract_branch
      if: |
        (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
@ -58,7 +68,7 @@ jobs:
        cd lib/
        Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
        dir
-        python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
+        python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2
      if: |
        (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
        matrix.os == 'windows-latest'
@ -67,11 +77,12 @@ jobs:
        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}

    - name: Publish artifact libxgboost4j.dylib to S3
+      shell: bash -l {0}
      run: |
        cd lib/
        mv -v libxgboost4j.dylib libxgboost4j_${{ github.sha }}.dylib
        ls
-        python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
+        python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2
      if: |
        (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
        matrix.os == 'macos-11'
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -9,6 +9,10 @@ on: [push, pull_request]
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
  gtest-cpu:
@ -174,7 +178,7 @@ jobs:
    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
      with:
        submodules: 'true'
-    - uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
+    - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: "3.8"
        architecture: 'x64'
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@ -9,6 +9,10 @@ defaults:
  run:
    shell: bash -l {0}

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  python-mypy-lint:
    runs-on: ubuntu-latest
@ -310,7 +314,7 @@ jobs:
          submodules: 'true'

      - name: Set up Python 3.8
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
        with:
          python-version: 3.8

--- a/.github/workflows/python_wheels.yml
+++ b/.github/workflows/python_wheels.yml
@ -5,6 +5,10 @@ on: [push, pull_request]
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  python-wheels:
    name: Build wheel for ${{ matrix.platform_id }}
@ -21,7 +25,7 @@ jobs:
      with:
        submodules: 'true'
    - name: Setup Python
-      uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
+      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: "3.8"
    - name: Build wheels
--- a/.github/workflows/r_nold.yml
+++ b/.github/workflows/r_nold.yml
@ -10,6 +10,10 @@ on:
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  test-R-noLD:
    if: github.event.comment.body == '/gha run r-nold-test' && contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association)
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@ -8,6 +8,10 @@ env:
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  lintr:
    runs-on: ${{ matrix.config.os }}
@ -46,7 +50,7 @@ jobs:
        MAKEFLAGS="-j$(nproc)" R CMD INSTALL R-package/
        Rscript tests/ci_build/lint_r.R $(pwd)

-  test-R-on-Windows:
+  test-Rpkg:
    runs-on: ${{ matrix.config.os }}
    name: Test R on OS ${{ matrix.config.os }}, R ${{ matrix.config.r }}, Compiler ${{ matrix.config.compiler }}, Build ${{ matrix.config.build }}
    strategy:
@ -54,11 +58,17 @@ jobs:
      matrix:
        config:
          - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'}
+          - {os: ubuntu-latest, r: 'release', compiler: 'none', build: 'cmake'}
    env:
      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
      RSPM: ${{ matrix.config.rspm }}

    steps:
+    - name: Install system dependencies
+      run: |
+        sudo apt update
+        sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev
+      if: matrix.config.os == 'ubuntu-latest'
    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
      with:
        submodules: 'true'
@ -74,7 +84,7 @@ jobs:
        key: ${{ runner.os }}-r-${{ matrix.config.r }}-6-${{ hashFiles('R-package/DESCRIPTION') }}
        restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-6-${{ hashFiles('R-package/DESCRIPTION') }}

-    - uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
+    - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: "3.8"
        architecture: 'x64'
@ -89,6 +99,12 @@ jobs:
    - name: Test R
      run: |
        python tests/ci_build/test_r_package.py --compiler='${{ matrix.config.compiler }}' --build-tool="${{ matrix.config.build }}" --task=check
+      if: matrix.config.compiler != 'none'
+
+    - name: Test R
+      run: |
+        python tests/ci_build/test_r_package.py --build-tool="${{ matrix.config.build }}" --task=check
+      if: matrix.config.compiler == 'none'

  test-R-on-Debian:
    name: Test R package on Debian
--- a/R-package/CMakeLists.txt
+++ b/R-package/CMakeLists.txt
@ -26,7 +26,6 @@ endif()
 target_compile_definitions(
  xgboost-r PUBLIC
  -DXGBOOST_STRICT_R_MODE=1
-  -DXGBOOST_CUSTOMIZE_GLOBAL_PRNG=1
  -DDMLC_LOG_BEFORE_THROW=0
  -DDMLC_DISABLE_STDIN=1
  -DDMLC_LOG_CUSTOMIZE=1
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@ -56,7 +56,8 @@ Suggests:
    testthat,
    igraph (>= 1.0.1),
    float,
-    titanic
+    titanic,
+    RhpcBLASctl
 Depends:
    R (>= 4.3.0)
 Imports:
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -20,15 +20,9 @@ export("xgb.attr<-")
 export("xgb.attributes<-")
 export("xgb.config<-")
 export("xgb.parameters<-")
-export(cb.cv.predict)
-export(cb.early.stop)
-export(cb.evaluation.log)
-export(cb.gblinear.history)
-export(cb.print.evaluation)
-export(cb.reset.parameters)
-export(cb.save.model)
 export(getinfo)
 export(setinfo)
+export(xgb.Callback)
 export(xgb.DMatrix)
 export(xgb.DMatrix.hasinfo)
 export(xgb.DMatrix.save)
@ -39,6 +33,13 @@ export(xgb.QuantileDMatrix)
 export(xgb.QuantileDMatrix.from_iterator)
 export(xgb.attr)
 export(xgb.attributes)
+export(xgb.cb.cv.predict)
+export(xgb.cb.early.stop)
+export(xgb.cb.evaluation.log)
+export(xgb.cb.gblinear.history)
+export(xgb.cb.print.evaluation)
+export(xgb.cb.reset.parameters)
+export(xgb.cb.save.model)
 export(xgb.config)
 export(xgb.copy.Booster)
 export(xgb.create.features)
@ -72,14 +73,10 @@ export(xgb.slice.DMatrix)
 export(xgb.train)
 export(xgboost)
 import(methods)
+importClassesFrom(Matrix,CsparseMatrix)
 importClassesFrom(Matrix,dgCMatrix)
 importClassesFrom(Matrix,dgRMatrix)
-importClassesFrom(Matrix,dgeMatrix)
-importFrom(Matrix,colSums)
 importFrom(Matrix,sparse.model.matrix)
-importFrom(Matrix,sparseMatrix)
-importFrom(Matrix,sparseVector)
-importFrom(Matrix,t)
 importFrom(data.table,":=")
 importFrom(data.table,as.data.table)
 importFrom(data.table,data.table)
@ -101,6 +98,7 @@ importFrom(methods,new)
 importFrom(stats,coef)
 importFrom(stats,median)
 importFrom(stats,predict)
+importFrom(stats,sd)
 importFrom(stats,variable.names)
 importFrom(utils,head)
 importFrom(utils,object.size)
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -142,7 +142,7 @@ check.custom.eval <- function(env = parent.frame()) {
  if (!is.null(env$feval) &&
      is.null(env$maximize) && (
        !is.null(env$early_stopping_rounds) ||
-        has.callbacks(env$callbacks, 'cb.early.stop')))
+        has.callbacks(env$callbacks, "early_stop")))
    stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not")
 }

@ -193,20 +193,20 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
 # Evaluate one iteration.
 # Returns a named vector of evaluation metrics
 # with the names in a 'datasetname-metricname' format.
-xgb.iter.eval <- function(bst, watchlist, iter, feval) {
+xgb.iter.eval <- function(bst, evals, iter, feval) {
  handle <- xgb.get.handle(bst)

-  if (length(watchlist) == 0)
+  if (length(evals) == 0)
    return(NULL)

-  evnames <- names(watchlist)
+  evnames <- names(evals)
  if (is.null(feval)) {
-    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), watchlist, as.list(evnames))
+    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), evals, as.list(evnames))
    mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
    res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
  } else {
-    res <- sapply(seq_along(watchlist), function(j) {
-      w <- watchlist[[j]]
+    res <- sapply(seq_along(evals), function(j) {
+      w <- evals[[j]]
      ## predict using all trees
      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
      eval_res <- feval(preds, w)
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@ -77,26 +77,45 @@ xgb.get.handle <- function(object) {

 #' Predict method for XGBoost model
 #'
-#' Predicted values based on either xgboost model or model handle object.
+#' Predict values on data based on xgboost model.
 #'
 #' @param object Object of class `xgb.Booster`.
-#' @param newdata Takes `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,
+#' @param newdata Takes `data.frame`, `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,
 #'        local data file, or `xgb.DMatrix`.
-#'        For single-row predictions on sparse data, it is recommended to use the CSR format.
-#'        If passing a sparse vector, it will take it as a row vector.
-#' @param missing Only used when input is a dense matrix. Pick a float value that represents
-#'        missing values in data (e.g., 0 or some other extreme value).
+#'
+#'        For single-row predictions on sparse data, it's recommended to use CSR format. If passing
+#'        a sparse vector, it will take it as a row vector.
+#'
+#'        Note that, for repeated predictions on the same data, one might want to create a DMatrix to
+#'        pass here instead of passing R types like matrices or data frames, as predictions will be
+#'        faster on DMatrix.
+#'
+#'        If `newdata` is a `data.frame`, be aware that:\itemize{
+#'        \item Columns will be converted to numeric if they aren't already, which could potentially make
+#'              the operation slower than in an equivalent `matrix` object.
+#'        \item The order of the columns must match with that of the data from which the model was fitted
+#'              (i.e. columns will not be referenced by their names, just by their order in the data).
+#'        \item If the model was fitted to data with categorical columns, these columns must be of
+#'              `factor` type here, and must use the same encoding (i.e. have the same levels).
+#'        \item If `newdata` contains any `factor` columns, they will be converted to base-0
+#'              encoding (same as during DMatrix creation) - hence, one should not pass a `factor`
+#'              under a column which during training had a different type.
+#'        }
+#' @param missing Float value that represents missing values in data (e.g., 0 or some other extreme value).
+#'
+#'        This parameter is not used when `newdata` is an `xgb.DMatrix` - in such cases, should pass
+#'        this as an argument to the DMatrix constructor instead.
 #' @param outputmargin Whether the prediction should be returned in the form of original untransformed
 #'        sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
 #'        logistic regression would return log-odds instead of probabilities.
-#' @param predleaf Whether to predict pre-tree leaf indices.
+#' @param predleaf Whether to predict per-tree leaf indices.
 #' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
 #' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
 #' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details).
 #' @param reshape Whether to reshape the vector of predictions to matrix form when there are several
 #'        prediction outputs per case. No effect if `predleaf`, `predcontrib`,
 #'        or `predinteraction` is `TRUE`.
-#' @param training Whether the predictions are used for training. For dart booster,
+#' @param training Whether the prediction result is used for training. For dart booster,
 #'        training predicting will perform dropout.
 #' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
 #'        a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.
@ -111,6 +130,12 @@ xgb.get.handle <- function(object) {
 #'        If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
 #' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
 #'        type and shape of predictions are invariant to the model type.
+#' @param base_margin Base margin used for boosting from existing model.
+#'
+#'        Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
+#'        be ignored as it needs to be added to the DMatrix instead (e.g. by passing it as
+#'        an argument in its constructor, or by calling \link{setinfo.xgb.DMatrix}).
+#'
 #' @param validate_features When `TRUE`, validate that the Booster's and newdata's feature_names
 #'        match (only applicable when both `object` and `newdata` have feature names).
 #'
@ -287,16 +312,80 @@ xgb.get.handle <- function(object) {
 predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
                                predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
                                reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE,
-                                validate_features = FALSE, ...) {
+                                validate_features = FALSE, base_margin = NULL, ...) {
  if (validate_features) {
    newdata <- validate.features(object, newdata)
  }
-  if (!inherits(newdata, "xgb.DMatrix")) {
+  is_dmatrix <- inherits(newdata, "xgb.DMatrix")
+  if (is_dmatrix && !is.null(base_margin)) {
+    stop(
+      "'base_margin' is not supported when passing 'xgb.DMatrix' as input.",
+      " Should be passed as argument to 'xgb.DMatrix' constructor."
+    )
+  }
+
+  use_as_df <- FALSE
+  use_as_dense_matrix <- FALSE
+  use_as_csr_matrix <- FALSE
+  n_row <- NULL
+  if (!is_dmatrix) {
+
+    inplace_predict_supported <- !predcontrib && !predinteraction && !predleaf
+    if (inplace_predict_supported) {
+      booster_type <- xgb.booster_type(object)
+      if (booster_type == "gblinear" || (booster_type == "dart" && training)) {
+        inplace_predict_supported <- FALSE
+      }
+    }
+    if (inplace_predict_supported) {
+
+      if (is.matrix(newdata)) {
+        use_as_dense_matrix <- TRUE
+      } else if (is.data.frame(newdata)) {
+        # note: since here it turns it into a non-data-frame list,
+        # needs to keep track of the number of rows it had for later
+        n_row <- nrow(newdata)
+        newdata <- lapply(
+          newdata,
+          function(x) {
+            if (is.factor(x)) {
+              return(as.numeric(x) - 1)
+            } else {
+              return(as.numeric(x))
+            }
+          }
+        )
+        use_as_df <- TRUE
+      } else if (inherits(newdata, "dgRMatrix")) {
+        use_as_csr_matrix <- TRUE
+        csr_data <- list(newdata@p, newdata@j, newdata@x, ncol(newdata))
+      } else if (inherits(newdata, "dsparseVector")) {
+        use_as_csr_matrix <- TRUE
+        n_row <- 1L
+        i <- newdata@i - 1L
+        if (storage.mode(i) != "integer") {
+          storage.mode(i) <- "integer"
+        }
+        csr_data <- list(c(0L, length(i)), i, newdata@x, length(newdata))
+      }
+
+    }
+
+  } # if (!is_dmatrix)
+
+  if (!is_dmatrix && !use_as_dense_matrix && !use_as_csr_matrix && !use_as_df) {
    nthread <- xgb.nthread(object)
    newdata <- xgb.DMatrix(
      newdata,
-      missing = missing, nthread = NVL(nthread, -1)
+      missing = missing,
+      base_margin = base_margin,
+      nthread = NVL(nthread, -1)
    )
+    is_dmatrix <- TRUE
+  }
+
+  if (is.null(n_row)) {
+    n_row <- nrow(newdata)
  }


@ -354,18 +443,30 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
    args$type <- set_type(6)
  }

-  predts <- .Call(
-    XGBoosterPredictFromDMatrix_R,
-    xgb.get.handle(object),
-    newdata,
-    jsonlite::toJSON(args, auto_unbox = TRUE)
-  )
+  json_conf <- jsonlite::toJSON(args, auto_unbox = TRUE)
+  if (is_dmatrix) {
+    predts <- .Call(
+      XGBoosterPredictFromDMatrix_R, xgb.get.handle(object), newdata, json_conf
+    )
+  } else if (use_as_dense_matrix) {
+    predts <- .Call(
+      XGBoosterPredictFromDense_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
+    )
+  } else if (use_as_csr_matrix) {
+    predts <- .Call(
+      XGBoosterPredictFromCSR_R, xgb.get.handle(object), csr_data, missing, json_conf, base_margin
+    )
+  } else if (use_as_df) {
+    predts <- .Call(
+      XGBoosterPredictFromColumnar_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
+    )
+  }
+
  names(predts) <- c("shape", "results")
  shape <- predts$shape
  arr <- predts$results

  n_ret <- length(arr)
-  n_row <- nrow(newdata)
  if (n_row != shape[1]) {
    stop("Incorrect predict shape.")
  }
@ -970,6 +1071,10 @@ xgb.best_iteration <- function(bst) {
 #' coef(model)
 #' @export
 coef.xgb.Booster <- function(object, ...) {
+  return(.internal.coef.xgb.Booster(object, add_names = TRUE))
+}
+
+.internal.coef.xgb.Booster <- function(object, add_names = TRUE) {
  booster_type <- xgb.booster_type(object)
  if (booster_type != "gblinear") {
    stop("Coefficients are not defined for Booster type ", booster_type)
@ -988,21 +1093,27 @@ coef.xgb.Booster <- function(object, ...) {
  intercepts <- weights[seq(sep + 1, length(weights))]
  intercepts <- intercepts + as.numeric(base_score)

-  feature_names <- xgb.feature_names(object)
-  if (!NROW(feature_names)) {
-    # This mimics the default naming in R which names columns as "V1..N"
-    # when names are needed but not available
-    feature_names <- paste0("V", seq(1L, num_feature))
+  if (add_names) {
+    feature_names <- xgb.feature_names(object)
+    if (!NROW(feature_names)) {
+      # This mimics the default naming in R which names columns as "V1..N"
+      # when names are needed but not available
+      feature_names <- paste0("V", seq(1L, num_feature))
+    }
+    feature_names <- c("(Intercept)", feature_names)
  }
-  feature_names <- c("(Intercept)", feature_names)
  if (n_cols == 1L) {
    out <- c(intercepts, coefs)
-    names(out) <- feature_names
+    if (add_names) {
+      names(out) <- feature_names
+    }
  } else {
    coefs <- matrix(coefs, nrow = num_feature, byrow = TRUE)
    dim(intercepts) <- c(1L, n_cols)
    out <- rbind(intercepts, coefs)
-    row.names(out) <- feature_names
+    if (add_names) {
+      row.names(out) <- feature_names
+    }
    # TODO: if a class names attributes is added,
    # should use those names here.
  }
@ -1154,12 +1265,9 @@ print.xgb.Booster <- function(x, ...) {
    cat("  ", paste(attr_names, collapse = ", "), "\n")
  }

-  if (!is.null(R_attrs$callbacks) && length(R_attrs$callbacks) > 0) {
-    cat('callbacks:\n')
-    lapply(callback.calls(R_attrs$callbacks), function(x) {
-      cat('  ')
-      print(x)
-    })
+  additional_attr <- setdiff(names(R_attrs), .reserved_cb_names)
+  if (NROW(additional_attr)) {
+    cat("callbacks:\n  ", paste(additional_attr, collapse = ", "), "\n")
  }

  if (!is.null(R_attrs$evaluation_log)) {
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@ -28,10 +28,27 @@
 #' 'xgb.QuantileDMatrix'.
 #' \item Single-row CSR matrices, as class `dsparseVector` from package `Matrix`, which is interpreted
 #' as a single row (only when making predictions from a fitted model).
-#' \item Text files in SVMLight / LibSVM formats, passed as a path to the file. These are \bold{not}
-#' supported for xgb.QuantileDMatrix'.
-#' \item Binary files generated by \link{xgb.DMatrix.save},  passed as a path to the file. These are
-#' \bold{not} supported for xgb.QuantileDMatrix'.
+#' \item Text files in a supported format, passed as a `character` variable containing the URI path to
+#' the file, with an optional format specifier.
+#'
+#' These are \bold{not} supported for `xgb.QuantileDMatrix`. Supported formats are:\itemize{
+#'   \item XGBoost's own binary format for DMatrices, as produced by \link{xgb.DMatrix.save}.
+#'   \item SVMLight (a.k.a. LibSVM) format for CSR matrices. This format can be signaled by suffix
+#'         `?format=libsvm` at the end of the file path. It will be the default format if not
+#'         otherwise specified.
+#'   \item CSV files (comma-separated values). This format can be specified by adding suffix
+#'         `?format=csv` at the end ofthe file path. It will \bold{not} be auto-deduced from file extensions.
+#'   }
+#'
+#' Be aware that the format of the file will not be auto-deduced - for example, if a file is named 'file.csv',
+#' it will not look at the extension or file contents to determine that it is a comma-separated value.
+#' Instead, the format must be specified following the URI format, so the input to `data` should be passed
+#' like this: `"file.csv?format=csv"` (or `"file.csv?format=csv&label_column=0"` if the first column
+#' corresponds to the labels).
+#'
+#' For more information about passing text files as input, see the articles
+#' \href{https://xgboost.readthedocs.io/en/stable/tutorials/input_format.html}{Text Input Format of DMatrix} and
+#' \href{https://xgboost.readthedocs.io/en/stable/python/python_intro.html#python-data-interface}{Data Interface}.
 #' }
 #' @param label Label of the training data. For classification problems, should be passed encoded as
 #' integers with numeration starting at zero.
@ -81,6 +98,13 @@
 #' @param label_lower_bound Lower bound for survival training.
 #' @param label_upper_bound Upper bound for survival training.
 #' @param feature_weights Set feature weights for column sampling.
+#' @param data_split_mode When passing a URI (as R `character`) as input, this signals
+#' whether to split by row or column. Allowed values are `"row"` and `"col"`.
+#'
+#' In distributed mode, the file is split accordingly; otherwise this is only an indicator on
+#' how the file was split beforehand. Default to row.
+#'
+#' This is not used when `data` is not a URI.
 #' @return An 'xgb.DMatrix' object. If calling 'xgb.QuantileDMatrix', it will have additional
 #' subclass 'xgb.QuantileDMatrix'.
 #'
@ -117,7 +141,8 @@ xgb.DMatrix <- function(
  qid = NULL,
  label_lower_bound = NULL,
  label_upper_bound = NULL,
-  feature_weights = NULL
+  feature_weights = NULL,
+  data_split_mode = "row"
 ) {
  if (!is.null(group) && !is.null(qid)) {
    stop("Either one of 'group' or 'qid' should be NULL")
@ -131,7 +156,14 @@ xgb.DMatrix <- function(
      )
    }
    data <- path.expand(data)
-    handle <- .Call(XGDMatrixCreateFromFile_R, data, as.integer(silent))
+    if (data_split_mode == "row") {
+      data_split_mode <- 0L
+    } else if (data_split_mode == "col") {
+      data_split_mode <- 1L
+    } else {
+      stop("Passed invalid 'data_split_mode': ", data_split_mode)
+    }
+    handle <- .Call(XGDMatrixCreateFromURI_R, data, as.integer(silent), data_split_mode)
  } else if (is.matrix(data)) {
    handle <- .Call(
      XGDMatrixCreateFromMat_R, data, missing, nthread
--- a/R-package/R/xgb.DMatrix.save.R
+++ b/R-package/R/xgb.DMatrix.save.R
@ -6,6 +6,7 @@
 #' @param fname the name of the file to write.
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #' fname <- file.path(tempdir(), "xgb.DMatrix.data")
--- a/R-package/R/xgb.config.R
+++ b/R-package/R/xgb.config.R
@ -4,7 +4,14 @@
 #' values of one or more global-scope parameters. Use \code{xgb.get.config} to fetch the current
 #' values of all global-scope parameters (listed in
 #' \url{https://xgboost.readthedocs.io/en/stable/parameter.html}).
+#' @details
+#' Note that serialization-related functions might use a globally-configured number of threads,
+#' which is managed by the system's OpenMP (OMP) configuration instead. Typically, XGBoost methods
+#' accept an `nthreads` parameter, but some methods like `readRDS` might get executed before such
+#' parameter can be supplied.
 #'
+#' The number of OMP threads can in turn be configured for example through an environment variable
+#' `OMP_NUM_THREADS` (needs to be set before R is started), or through `RhpcBLASctl::omp_set_num_threads`.
 #' @rdname xgbConfig
 #' @title Set and get global configuration
 #' @name xgb.set.config, xgb.get.config
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@ -71,7 +71,6 @@
 #' new.dtest <- xgb.DMatrix(
 #'   data = new.features.test, label = agaricus.test$label, nthread = 2
 #' )
-#' watchlist <- list(train = new.dtrain)
 #' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 #'
 #' # Model accuracy with new features
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@ -27,7 +27,7 @@
 #'        that NA values should be considered as 'missing' by the algorithm.
 #'        Sometimes, 0 or other extreme value might be used to represent missing values.
 #' @param prediction A logical value indicating whether to return the test fold predictions
-#'        from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.
+#'        from each CV model. This parameter engages the \code{\link{xgb.cb.cv.predict}} callback.
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation
 #' @param metrics, list of evaluation metrics to be used in cross validation,
 #'   when it is not specified, the evaluation metric is chosen according to objective function.
@ -57,17 +57,17 @@
 #' @param verbose \code{boolean}, print the statistics during the process
 #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
 #'        Default is 1 which means all messages are printed. This parameter is passed to the
-#'        \code{\link{cb.print.evaluation}} callback.
+#'        \code{\link{xgb.cb.print.evaluation}} callback.
 #' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
 #'        If set to an integer \code{k}, training with a validation set will stop if the performance
 #'        doesn't improve for \code{k} rounds.
-#'        Setting this parameter engages the \code{\link{cb.early.stop}} callback.
+#'        Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.
 #' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
 #'        then this parameter must be set as well.
 #'        When it is \code{TRUE}, it means the larger the evaluation score the better.
-#'        This parameter is passed to the \code{\link{cb.early.stop}} callback.
+#'        This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.
 #' @param callbacks a list of callback functions to perform various task during boosting.
-#'        See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+#'        See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
 #'        parameters' values. User can provide either existing or their own callback methods in order
 #'        to customize the training process.
 #' @param ... other parameters to pass to \code{params}.
@ -90,25 +90,25 @@
 #' \itemize{
 #'   \item \code{call} a function call.
 #'   \item \code{params} parameters that were passed to the xgboost library. Note that it does not
-#'         capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
-#'   \item \code{callbacks} callback functions that were either automatically assigned or
-#'         explicitly passed.
+#'         capture parameters changed by the \code{\link{xgb.cb.reset.parameters}} callback.
 #'   \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
 #'         first column corresponding to iteration number and the rest corresponding to the
 #'         CV-based evaluation means and standard deviations for the training and test CV-sets.
-#'         It is created by the \code{\link{cb.evaluation.log}} callback.
+#'         It is created by the \code{\link{xgb.cb.evaluation.log}} callback.
 #'   \item \code{niter} number of boosting iterations.
 #'   \item \code{nfeatures} number of features in training data.
 #'   \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
 #'         parameter or randomly generated.
 #'   \item \code{best_iteration} iteration number with the best evaluation metric value
 #'         (only available with early stopping).
-#'   \item \code{pred} CV prediction values available when \code{prediction} is set.
-#'         It is either vector or matrix (see \code{\link{cb.cv.predict}}).
-#'   \item \code{models} a list of the CV folds' models. It is only available with the explicit
-#'         setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
 #' }
 #'
+#' Plus other potential elements that are the result of callbacks, such as a list `cv_predict` with
+#' a sub-element `pred` when passing `prediction = TRUE`, which is added by the \link{xgb.cb.cv.predict}
+#' callback (note that one can also pass it manually under `callbacks` with different settings,
+#' such as saving also the models created during cross validation); or a list `early_stop` which
+#' will contain elements such as `best_iteration` when using the early stopping callback (\link{xgb.cb.early.stop}).
+#'
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
@ -160,32 +160,38 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
    folds <- generate.cv.folds(nfold, nrow(data), stratified, cv_label, params)
  }

+  # Callbacks
+  tmp <- .process.callbacks(callbacks, is_cv = TRUE)
+  callbacks <- tmp$callbacks
+  cb_names <- tmp$cb_names
+  rm(tmp)
+
+  # Early stopping callback
+  if (!is.null(early_stopping_rounds) && !("early_stop" %in% cb_names)) {
+    callbacks <- add.callback(
+      callbacks,
+      xgb.cb.early.stop(
+        early_stopping_rounds,
+        maximize = maximize,
+        verbose = verbose
+      ),
+      as_first_elt = TRUE
+    )
+  }
  # verbosity & evaluation printing callback:
  params <- c(params, list(silent = 1))
  print_every_n <- max(as.integer(print_every_n), 1L)
-  if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) {
-    callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd = showsd))
+  if (verbose && !("print_evaluation" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n, showsd = showsd))
  }
  # evaluation log callback: always is on in CV
-  evaluation_log <- list()
-  if (!has.callbacks(callbacks, 'cb.evaluation.log')) {
-    callbacks <- add.cb(callbacks, cb.evaluation.log())
-  }
-  # Early stopping callback
-  stop_condition <- FALSE
-  if (!is.null(early_stopping_rounds) &&
-      !has.callbacks(callbacks, 'cb.early.stop')) {
-    callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
-                                                 maximize = maximize, verbose = verbose))
+  if (!("evaluation_log" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
  }
  # CV-predictions callback
-  if (prediction &&
-      !has.callbacks(callbacks, 'cb.cv.predict')) {
-    callbacks <- add.cb(callbacks, cb.cv.predict(save_models = FALSE))
+  if (prediction && !("cv_predict" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.cv.predict(save_models = FALSE))
  }
-  # Sort the callbacks into categories
-  cb <- categorize.callbacks(callbacks)
-

  # create the booster-folds
  # train_folds
@ -209,11 +215,8 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
      modelfile = NULL
    )
    bst <- bst$bst
-    list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
+    list(dtrain = dtrain, bst = bst, evals = list(train = dtrain, test = dtest), index = folds[[k]])
  })
-  rm(dall)
-  # a "basket" to collect some results from callbacks
-  basket <- list()

  # extract parameters that can affect the relationship b/w #trees and #iterations
  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
@ -222,10 +225,25 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
  begin_iteration <- 1
  end_iteration <- nrounds

+  .execute.cb.before.training(
+    callbacks,
+    bst_folds,
+    dall,
+    NULL,
+    begin_iteration,
+    end_iteration
+  )
+
  # synchronous CV boosting: run CV folds' models within each iteration
  for (iteration in begin_iteration:end_iteration) {

-    for (f in cb$pre_iter) f()
+    .execute.cb.before.iter(
+      callbacks,
+      bst_folds,
+      dall,
+      NULL,
+      iteration
+    )

    msg <- lapply(bst_folds, function(fd) {
      xgb.iter.update(
@ -236,33 +254,42 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
      )
      xgb.iter.eval(
        bst = fd$bst,
-        watchlist = fd$watchlist,
+        evals = fd$evals,
        iter = iteration - 1,
        feval = feval
      )
    })
    msg <- simplify2array(msg)
-    # Note: these variables might look unused here, but they are used in the callbacks
-    bst_evaluation <- rowMeans(msg) # nolint
-    bst_evaluation_err <- apply(msg, 1, sd) # nolint

-    for (f in cb$post_iter) f()
+    should_stop <- .execute.cb.after.iter(
+      callbacks,
+      bst_folds,
+      dall,
+      NULL,
+      iteration,
+      msg
+    )

-    if (stop_condition) break
+    if (should_stop) break
  }
-  for (f in cb$finalize) f(finalize = TRUE)
+  cb_outputs <- .execute.cb.after.training(
+    callbacks,
+    bst_folds,
+    dall,
+    NULL,
+    iteration,
+    msg
+  )

  # the CV result
  ret <- list(
    call = match.call(),
    params = params,
-    callbacks = callbacks,
-    evaluation_log = evaluation_log,
-    niter = end_iteration,
-    nfeatures = ncol(data),
+    niter = iteration,
+    nfeatures = ncol(dall),
    folds = folds
  )
-  ret <- c(ret, basket)
+  ret <- c(ret, cb_outputs)

  class(ret) <- 'xgb.cv.synchronous'
  return(invisible(ret))
@ -308,23 +335,16 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
                paste0('"', unlist(x$params), '"'),
                sep = ' = ', collapse = ', '), '\n', sep = '')
    }
-    if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
-      cat('callbacks:\n')
-      lapply(callback.calls(x$callbacks), function(x) {
-        cat('  ')
-        print(x)
-      })
-    }

    for (n in c('niter', 'best_iteration')) {
-      if (is.null(x[[n]]))
+      if (is.null(x$early_stop[[n]]))
        next
-      cat(n, ': ', x[[n]], '\n', sep = '')
+      cat(n, ': ', x$early_stop[[n]], '\n', sep = '')
    }

-    if (!is.null(x$pred)) {
+    if (!is.null(x$cv_predict$pred)) {
      cat('pred:\n')
-      str(x$pred)
+      str(x$cv_predict$pred)
    }
  }

@ -332,9 +352,9 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
    cat('evaluation_log:\n')
  print(x$evaluation_log, row.names = FALSE, ...)

-  if (!is.null(x$best_iteration)) {
+  if (!is.null(x$early_stop$best_iteration)) {
    cat('Best iteration:\n')
-    print(x$evaluation_log[x$best_iteration], row.names = FALSE, ...)
+    print(x$evaluation_log[x$early_stop$best_iteration], row.names = FALSE, ...)
  }
  invisible(x)
 }
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@ -24,6 +24,7 @@
 #' as a \code{character} vector. Otherwise it will return \code{TRUE}.
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #' train <- agaricus.train
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@ -6,7 +6,7 @@
 #'
 #' @details
 #' The input file is expected to contain a model saved in an xgboost model format
-#' using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
+#' using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some
 #' appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
 #' saved from there in xgboost format, could be loaded from R.
 #'
@ -20,6 +20,7 @@
 #' \code{\link{xgb.save}}
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #'
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@ -35,6 +35,7 @@
 #' \code{\link{xgb.load}}
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #'
--- a/R-package/R/xgb.save.raw.R
+++ b/R-package/R/xgb.save.raw.R
@ -12,6 +12,7 @@
 #' }
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #'
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -114,13 +114,13 @@
 #' @param data training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
 #'        \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.
 #' @param nrounds max number of boosting iterations.
-#' @param watchlist named list of xgb.DMatrix datasets to use for evaluating model performance.
+#' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance.
 #'        Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
 #'        of these datasets during each boosting iteration, and stored in the end as a field named
 #'        \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
-#'        \code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
+#'        \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
 #'        printed out during the training.
-#'        E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+#'        E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
 #'        the performance of each round's model on mat1 and mat2.
 #' @param obj customized objective function. Returns gradient and second order
 #'        gradient with given prediction and dtrain.
@ -130,31 +130,32 @@
 #' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance.
 #'        If 2, some additional information will be printed out.
 #'        Note that setting \code{verbose > 0} automatically engages the
-#'        \code{cb.print.evaluation(period=1)} callback function.
+#'        \code{xgb.cb.print.evaluation(period=1)} callback function.
 #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
 #'        Default is 1 which means all messages are printed. This parameter is passed to the
-#'        \code{\link{cb.print.evaluation}} callback.
+#'        \code{\link{xgb.cb.print.evaluation}} callback.
 #' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
 #'        If set to an integer \code{k}, training with a validation set will stop if the performance
 #'        doesn't improve for \code{k} rounds.
-#'        Setting this parameter engages the \code{\link{cb.early.stop}} callback.
+#'        Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.
 #' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
 #'        then this parameter must be set as well.
 #'        When it is \code{TRUE}, it means the larger the evaluation score the better.
-#'        This parameter is passed to the \code{\link{cb.early.stop}} callback.
+#'        This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.
 #' @param save_period when it is non-NULL, model is saved to disk after every \code{save_period} rounds,
-#'        0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.
+#'        0 means save at the end. The saving is handled by the \code{\link{xgb.cb.save.model}} callback.
 #' @param save_name the name or path for periodically saved model file.
 #' @param xgb_model a previously built model to continue the training from.
 #'        Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a
 #'        file with a previously saved model.
 #' @param callbacks a list of callback functions to perform various task during boosting.
-#'        See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+#'        See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
 #'        parameters' values. User can provide either existing or their own callback methods in order
 #'        to customize the training process.
 #'
-#'        Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs
-#'        are kept as R attributes, and thus do not get saved when using non-R serializaters like
+#'        Note that some callbacks might try to leave attributes in the resulting model object,
+#'        such as an evaluation log (a `data.table` object) - be aware that these objects are kept
+#'        as R attributes, and thus do not get saved when using XGBoost's own serializaters like
 #'        \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
 #' @param ... other parameters to pass to \code{params}.
 #' @param label vector of response values. Should not be provided when data is
@ -170,7 +171,7 @@
 #' @details
 #' These are the training functions for \code{xgboost}.
 #'
-#' The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+#' The \code{xgb.train} interface supports advanced features such as \code{evals},
 #' customized objective and evaluation metric functions, therefore it is more flexible
 #' than the \code{xgboost} interface.
 #'
@ -178,6 +179,11 @@
 #' Number of threads can also be manually specified via the \code{nthread}
 #' parameter.
 #'
+#' While in other interfaces, the default random seed defaults to zero, in R, if a parameter `seed`
+#' is not manually supplied, it will generate a random seed through R's own random number generator,
+#' whose seed in turn is controllable through `set.seed`. If `seed` is passed, it will override the
+#' RNG from R.
+#'
 #' The evaluation metric is chosen automatically by XGBoost (according to the objective)
 #' when the \code{eval_metric} parameter is not provided.
 #' User may set one or several \code{eval_metric} parameters.
@ -201,18 +207,19 @@
 #'
 #' The following callbacks are automatically created when certain parameters are set:
 #' \itemize{
-#'   \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
+#'   \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
 #'         and the \code{print_every_n} parameter is passed to it.
-#'   \item \code{cb.evaluation.log} is on when \code{watchlist} is present.
-#'   \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
-#'   \item \code{cb.save.model}: when \code{save_period > 0} is set.
+#'   \item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
+#'   \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
+#'   \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
 #' }
 #'
 #' Note that objects of type `xgb.Booster` as returned by this function behave a bit differently
 #' from typical R objects (it's an 'altrep' list class), and it makes a separation between
 #' internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr}
 #' and shared between interfaces through serialization functions like \link{xgb.save}; and
-#' R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise
+#' R-specific attributes (typically the result from a callback), accessed through \link{attributes}
+#' and \link{attr}, which are otherwise
 #' only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and
 #' not anyhow used by functions like \link{predict.xgb.Booster}.
 #'
@ -224,7 +231,7 @@
 #' effect elsewhere.
 #'
 #' @seealso
-#' \code{\link{callbacks}},
+#' \code{\link{xgb.Callback}},
 #' \code{\link{predict.xgb.Booster}},
 #' \code{\link{xgb.cv}}
 #'
@ -247,12 +254,12 @@
 #' dtest <- with(
 #'   agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
 #' )
-#' watchlist <- list(train = dtrain, eval = dtest)
+#' evals <- list(train = dtrain, eval = dtest)
 #'
 #' ## A simple xgb.train example:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 #'
 #' ## An xgb.train example where custom objective and evaluation metric are
 #' ## used:
@ -273,15 +280,15 @@
 #' #  as 'objective' and 'eval_metric' parameters in the params list:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = logregobj, eval_metric = evalerror)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 #'
 #' #  or through the ... arguments:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
 #'                  objective = logregobj, eval_metric = evalerror)
 #'
 #' #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
 #'                  obj = logregobj, feval = evalerror)
 #'
 #'
@ -289,11 +296,11 @@
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
 #' my_etas <- list(eta = c(0.5, 0.1))
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-#'                  callbacks = list(cb.reset.parameters(my_etas)))
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+#'                  callbacks = list(xgb.cb.reset.parameters(my_etas)))
 #'
 #' ## Early stopping:
-#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
 #'                  early_stopping_rounds = 3)
 #'
 #' ## An 'xgboost' interface example:
@ -304,7 +311,7 @@
 #'
 #' @rdname xgb.train
 #' @export
-xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
+xgb.train <- function(params = list(), data, nrounds, evals = list(),
                      obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
                      early_stopping_rounds = NULL, maximize = NULL,
                      save_period = NULL, save_name = "xgboost.model",
@ -317,68 +324,68 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  check.custom.obj()
  check.custom.eval()

-  # data & watchlist checks
+  # data & evals checks
  dtrain <- data
  if (!inherits(dtrain, "xgb.DMatrix"))
    stop("second argument dtrain must be xgb.DMatrix")
-  if (length(watchlist) > 0) {
-    if (typeof(watchlist) != "list" ||
-        !all(vapply(watchlist, inherits, logical(1), what = 'xgb.DMatrix')))
-      stop("watchlist must be a list of xgb.DMatrix elements")
-    evnames <- names(watchlist)
+  if (length(evals) > 0) {
+    if (typeof(evals) != "list" ||
+        !all(vapply(evals, inherits, logical(1), what = 'xgb.DMatrix')))
+      stop("'evals' must be a list of xgb.DMatrix elements")
+    evnames <- names(evals)
    if (is.null(evnames) || any(evnames == ""))
-      stop("each element of the watchlist must have a name tag")
+      stop("each element of 'evals' must have a name tag")
  }
  # Handle multiple evaluation metrics given as a list
  for (m in params$eval_metric) {
    params <- c(params, list(eval_metric = m))
  }

-  # evaluation printing callback
  params <- c(params)
-  print_every_n <- max(as.integer(print_every_n), 1L)
-  if (!has.callbacks(callbacks, 'cb.print.evaluation') &&
-      verbose) {
-    callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n))
-  }
-  # evaluation log callback:  it is automatically enabled when watchlist is provided
-  evaluation_log <- list()
-  if (!has.callbacks(callbacks, 'cb.evaluation.log') &&
-      length(watchlist) > 0) {
-    callbacks <- add.cb(callbacks, cb.evaluation.log())
-  }
-  # Model saving callback
-  if (!is.null(save_period) &&
-      !has.callbacks(callbacks, 'cb.save.model')) {
-    callbacks <- add.cb(callbacks, cb.save.model(save_period, save_name))
-  }
-  # Early stopping callback
-  stop_condition <- FALSE
-  if (!is.null(early_stopping_rounds) &&
-      !has.callbacks(callbacks, 'cb.early.stop')) {
-    callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
-                                                 maximize = maximize, verbose = verbose))
+  params['validate_parameters'] <- TRUE
+  if (!("seed" %in% names(params))) {
+    params[["seed"]] <- sample(.Machine$integer.max, size = 1)
  }

-  # Sort the callbacks into categories
-  cb <- categorize.callbacks(callbacks)
-  params['validate_parameters'] <- TRUE
-  if (!is.null(params[['seed']])) {
-    warning("xgb.train: `seed` is ignored in R package.  Use `set.seed()` instead.")
+  # callbacks
+  tmp <- .process.callbacks(callbacks, is_cv = FALSE)
+  callbacks <- tmp$callbacks
+  cb_names <- tmp$cb_names
+  rm(tmp)
+
+  # Early stopping callback (should always come first)
+  if (!is.null(early_stopping_rounds) && !("early_stop" %in% cb_names)) {
+    callbacks <- add.callback(
+      callbacks,
+      xgb.cb.early.stop(
+        early_stopping_rounds,
+        maximize = maximize,
+        verbose = verbose
+      ),
+      as_first_elt = TRUE
+    )
+  }
+  # evaluation printing callback
+  print_every_n <- max(as.integer(print_every_n), 1L)
+  if (verbose && !("print_evaluation" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n))
+  }
+  # evaluation log callback:  it is automatically enabled when 'evals' is provided
+  if (length(evals) && !("evaluation_log" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
+  }
+  # Model saving callback
+  if (!is.null(save_period) && !("save_model" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.save.model(save_period, save_name))
  }

  # The tree updating process would need slightly different handling
  is_update <- NVL(params[['process_type']], '.') == 'update'

-  past_evaluation_log <- NULL
-  if (inherits(xgb_model, "xgb.Booster")) {
-    past_evaluation_log <- attributes(xgb_model)$evaluation_log
-  }
-
  # Construct a booster (either a new one or load from xgb_model)
  bst <- xgb.Booster(
    params = params,
-    cachelist = append(watchlist, dtrain),
+    cachelist = append(evals, dtrain),
    modelfile = xgb_model
  )
  niter_init <- bst$niter
@ -389,11 +396,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
    dtrain
  )

-  # extract parameters that can affect the relationship b/w #trees and #iterations
-  # Note: it might look like these aren't used, but they need to be defined in this
-  # environment for the callbacks for work correctly.
-  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
-
  if (is_update && nrounds > niter_init)
    stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")

@ -401,57 +403,83 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  begin_iteration <- niter_skip + 1
  end_iteration <- niter_skip + nrounds

+  .execute.cb.before.training(
+    callbacks,
+    bst,
+    dtrain,
+    evals,
+    begin_iteration,
+    end_iteration
+  )
+
  # the main loop for boosting iterations
  for (iteration in begin_iteration:end_iteration) {

-    for (f in cb$pre_iter) f()
-
-    xgb.iter.update(
-        bst = bst,
-        dtrain = dtrain,
-        iter = iteration - 1,
-        obj = obj
+    .execute.cb.before.iter(
+      callbacks,
+      bst,
+      dtrain,
+      evals,
+      iteration
    )

-    if (length(watchlist) > 0) {
-      bst_evaluation <- xgb.iter.eval(  # nolint: object_usage_linter
+    xgb.iter.update(
+      bst = bst,
+      dtrain = dtrain,
+      iter = iteration - 1,
+      obj = obj
+    )
+
+    bst_evaluation <- NULL
+    if (length(evals) > 0) {
+      bst_evaluation <- xgb.iter.eval(
        bst = bst,
-        watchlist = watchlist,
+        evals = evals,
        iter = iteration - 1,
        feval = feval
      )
    }

-    for (f in cb$post_iter) f()
+    should_stop <- .execute.cb.after.iter(
+      callbacks,
+      bst,
+      dtrain,
+      evals,
+      iteration,
+      bst_evaluation
+    )

-    if (stop_condition) break
+    if (should_stop) break
  }
-  for (f in cb$finalize) f(finalize = TRUE)

-  # store the evaluation results
-  keep_evaluation_log <- FALSE
-  if (length(evaluation_log) > 0 && nrow(evaluation_log) > 0) {
-    keep_evaluation_log <- TRUE
-    # include the previous compatible history when available
-    if (inherits(xgb_model, 'xgb.Booster') &&
-        !is_update &&
-        !is.null(past_evaluation_log) &&
-        isTRUE(all.equal(colnames(evaluation_log),
-                         colnames(past_evaluation_log)))) {
-      evaluation_log <- rbindlist(list(past_evaluation_log, evaluation_log))
-    }
-  }
+  cb_outputs <- .execute.cb.after.training(
+    callbacks,
+    bst,
+    dtrain,
+    evals,
+    iteration,
+    bst_evaluation
+  )

  extra_attrs <- list(
    call = match.call(),
-    params = params,
-    callbacks = callbacks
+    params = params
  )
-  if (keep_evaluation_log) {
-    extra_attrs$evaluation_log <- evaluation_log
-  }
+
  curr_attrs <- attributes(bst)
-  attributes(bst) <- c(curr_attrs, extra_attrs)
+  if (NROW(curr_attrs)) {
+    curr_attrs <- curr_attrs[
+      setdiff(
+        names(curr_attrs),
+        c(names(extra_attrs), names(cb_outputs))
+      )
+    ]
+  }
+  curr_attrs <- c(extra_attrs, curr_attrs)
+  if (NROW(cb_outputs)) {
+    curr_attrs <- c(curr_attrs, cb_outputs)
+  }
+  attributes(bst) <- curr_attrs

  return(bst)
 }
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@ -18,9 +18,9 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
    nthread = merged$nthread
  )

-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)

-  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n = print_every_n,
+  bst <- xgb.train(params, dtrain, nrounds, evals, verbose = verbose, print_every_n = print_every_n,
                   early_stopping_rounds = early_stopping_rounds, maximize = maximize,
                   save_period = save_period, save_name = save_name,
                   xgb_model = xgb_model, callbacks = callbacks, ...)
@ -82,12 +82,8 @@ NULL
 NULL

 # Various imports
-#' @importClassesFrom Matrix dgCMatrix dgeMatrix dgRMatrix
-#' @importFrom Matrix colSums
+#' @importClassesFrom Matrix dgCMatrix dgRMatrix CsparseMatrix
 #' @importFrom Matrix sparse.model.matrix
-#' @importFrom Matrix sparseVector
-#' @importFrom Matrix sparseMatrix
-#' @importFrom Matrix t
 #' @importFrom data.table data.table
 #' @importFrom data.table is.data.table
 #' @importFrom data.table as.data.table
@ -103,6 +99,7 @@ NULL
 #' @importFrom stats coef
 #' @importFrom stats predict
 #' @importFrom stats median
+#' @importFrom stats sd
 #' @importFrom stats variable.names
 #' @importFrom utils head
 #' @importFrom graphics barplot
--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
@ -55,6 +55,8 @@ print(paste("test-error=", err))
 # save model to binary local file
 xgb.save(bst, "xgboost.model")
 # load binary model to R
+# Function doesn't take 'nthreads', but can be set like this:
+RhpcBLASctl::omp_set_num_threads(1)
 bst2 <- xgb.load("xgboost.model")
 pred2 <- predict(bst2, test$data)
 # pred2 should be identical to pred
@ -72,17 +74,17 @@ print(paste("sum(abs(pred3-pred))=", sum(abs(pred3 - pred))))
 # to use advanced features, we need to put data in xgb.DMatrix
 dtrain <- xgb.DMatrix(data = train$data, label = train$label)
 dtest <- xgb.DMatrix(data = test$data, label = test$label)
-#---------------Using watchlist----------------
-# watchlist is a list of xgb.DMatrix, each of them is tagged with name
-watchlist <- list(train = dtrain, test = dtest)
-# to train with watchlist, use xgb.train, which contains more advanced features
-# watchlist allows us to monitor the evaluation result on all data in the list
-print("Train xgboost using xgb.train with watchlist")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+#---------------Using an evaluation set----------------
+# 'evals' is a list of xgb.DMatrix, each of them is tagged with name
+evals <- list(train = dtrain, test = dtest)
+# to train with an evaluation set, use xgb.train, which contains more advanced features
+# 'evals' argument allows us to monitor the evaluation result on all data in the list
+print("Train xgboost using xgb.train with evaluation data")
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                 nthread = 2, objective = "binary:logistic")
 # we can change evaluation metrics, or use multiple evaluation metrics
-print("train xgboost using xgb.train with watchlist, watch logloss and error")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+print("train xgboost using xgb.train with evaluation data, watch logloss and error")
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                 eval_metric = "error", eval_metric = "logloss",
                 nthread = 2, objective = "binary:logistic")

@ -90,7 +92,7 @@ bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist =
 xgb.DMatrix.save(dtrain, "dtrain.buffer")
 # to load it in, simply call xgb.DMatrix
 dtrain2 <- xgb.DMatrix("dtrain.buffer")
-bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                 nthread = 2, objective = "binary:logistic")
 # information can be extracted from xgb.DMatrix using getinfo
 label <- getinfo(dtest, "label")
--- a/R-package/demo/boost_from_prediction.R
+++ b/R-package/demo/boost_from_prediction.R
@ -5,14 +5,14 @@ data(agaricus.test, package = 'xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 ###
 # advanced: start from a initial base prediction
 #
 print('start running example to start from a initial prediction')
 # train xgboost for 1 round
 param <- list(max_depth = 2, eta = 1, nthread = 2, objective = 'binary:logistic')
-bst <- xgb.train(param, dtrain, 1, watchlist)
+bst <- xgb.train(param, dtrain, 1, evals)
 # Note: we need the margin value instead of transformed prediction in set_base_margin
 # do predict with output_margin=TRUE, will always give you margin values before logistic transformation
 ptrain <- predict(bst, dtrain, outputmargin = TRUE)
@ -23,4 +23,4 @@ setinfo(dtrain, "base_margin", ptrain)
 setinfo(dtest, "base_margin", ptest)

 print('this is result of boost from initial prediction')
-bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist)
+bst <- xgb.train(params = param, data = dtrain, nrounds = 1, evals = evals)
--- a/R-package/demo/custom_objective.R
+++ b/R-package/demo/custom_objective.R
@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 num_round <- 2

 # user define objective function, given prediction, return gradient and second order gradient
@ -38,7 +38,7 @@ param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
 print('start training with user customized objective')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)

 #
 # there can be cases where you want additional information
@ -62,4 +62,4 @@ param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
 print('start training with user customized objective, with additional attributes in DMatrix')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
 param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
-watchlist <- list(eval = dtest)
+evals <- list(eval = dtest)
 num_round <- 20
 # user define objective function, given prediction, return gradient and second order gradient
 # this is log likelihood loss
@ -32,7 +32,7 @@ evalerror <- function(preds, dtrain) {
 }
 print('start training with early Stopping setting')

-bst <- xgb.train(param, dtrain, num_round, watchlist,
+bst <- xgb.train(param, dtrain, num_round, evals,
                 objective = logregobj, eval_metric = evalerror, maximize = FALSE,
                 early_stopping_round = 3)
 bst <- xgb.cv(param, dtrain, num_round, nfold = 5,
--- a/R-package/demo/generalized_linear_model.R
+++ b/R-package/demo/generalized_linear_model.R
@ -25,9 +25,9 @@ param <- list(objective = "binary:logistic", booster = "gblinear",
 ##
 # the rest of settings are the same
 ##
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 num_round <- 2
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
 ypred <- predict(bst, dtest)
 labels <- getinfo(dtest, 'label')
 cat('error of preds=', mean(as.numeric(ypred > 0.5) != labels), '\n')
--- a/R-package/demo/gpu_accelerated.R
+++ b/R-package/demo/gpu_accelerated.R
@ -23,7 +23,7 @@ y <- rbinom(N, 1, plogis(m))
 tr <- sample.int(N, N * 0.75)
 dtrain <- xgb.DMatrix(X[tr, ], label = y[tr])
 dtest <- xgb.DMatrix(X[-tr, ], label = y[-tr])
-wl <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)

 # An example of running 'gpu_hist' algorithm
 # which is
@ -35,11 +35,11 @@ wl <- list(train = dtrain, test = dtest)
 param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
              max_bin = 64, tree_method = 'gpu_hist')
 pt <- proc.time()
-bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_gpu <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
 proc.time() - pt

 # Compare to the 'hist' algorithm:
 param$tree_method <- 'hist'
 pt <- proc.time()
-bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_hist <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
 proc.time() - pt
--- a/R-package/demo/predict_first_ntree.R
+++ b/R-package/demo/predict_first_ntree.R
@ -6,11 +6,11 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

 param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 nrounds <- 2

 # training the model for two rounds
-bst <- xgb.train(param, dtrain, nrounds, nthread = 2, watchlist)
+bst <- xgb.train(param, dtrain, nrounds, nthread = 2, evals = evals)
 cat('start testing prediction from first n trees\n')
 labels <- getinfo(dtest, 'label')

--- a/R-package/demo/predict_leaf_indices.R
+++ b/R-package/demo/predict_leaf_indices.R
@ -43,7 +43,6 @@ colnames(new.features.test) <- colnames(new.features.train)
 # learning with new features
 new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
 new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
-watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)

 # Model accuracy with new features
--- a/R-package/demo/tweedie_regression.R
+++ b/R-package/demo/tweedie_regression.R
@ -39,7 +39,7 @@ bst <- xgb.train(
  data = d_train,
  params = params,
  maximize = FALSE,
-  watchlist = list(train = d_train),
+  evals = list(train = d_train),
  nrounds = 20)

 var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)
--- a/R-package/man/callbacks.Rd
+++ b/R-package/man/callbacks.Rd
@ -1,37 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{callbacks}
-\alias{callbacks}
-\title{Callback closures for booster training.}
-\description{
-These are used to perform various service tasks either during boosting iterations or at the end.
-This approach helps to modularize many of such tasks without bloating the main training methods,
-and it offers .
-}
-\details{
-By default, a callback function is run after each boosting iteration.
-An R-attribute \code{is_pre_iteration} could be set for a callback to define a pre-iteration function.
-
-When a callback function has \code{finalize} parameter, its finalizer part will also be run after
-the boosting is completed.
-
-WARNING: side-effects!!! Be aware that these callback functions access and modify things in
-the environment from which they are called from, which is a fairly uncommon thing to do in R.
-
-To write a custom callback closure, make sure you first understand the main concepts about R environments.
-Check either R documentation on \code{\link[base]{environment}} or the
-\href{http://adv-r.had.co.nz/Environments.html}{Environments chapter} from the "Advanced R"
-book by Hadley Wickham. Further, the best option is to read the code of some of the existing callbacks -
-choose ones that do something similar to what you want to achieve. Also, you would need to get familiar
-with the objects available inside of the \code{xgb.train} and \code{xgb.cv} internal environments.
-}
-\seealso{
-\code{\link{cb.print.evaluation}},
-\code{\link{cb.evaluation.log}},
-\code{\link{cb.reset.parameters}},
-\code{\link{cb.early.stop}},
-\code{\link{cb.save.model}},
-\code{\link{cb.cv.predict}},
-\code{\link{xgb.train}},
-\code{\link{xgb.cv}}
-}
--- a/R-package/man/cb.early.stop.Rd
+++ b/R-package/man/cb.early.stop.Rd
@ -1,62 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.early.stop}
-\alias{cb.early.stop}
-\title{Callback closure to activate the early stopping.}
-\usage{
-cb.early.stop(
-  stopping_rounds,
-  maximize = FALSE,
-  metric_name = NULL,
-  verbose = TRUE
-)
-}
-\arguments{
-\item{stopping_rounds}{The number of rounds with no improvement in
-the evaluation metric in order to stop the training.}
-
-\item{maximize}{whether to maximize the evaluation metric}
-
-\item{metric_name}{the name of an evaluation column to use as a criteria for early
-stopping. If not set, the last column would be used.
-Let's say the test data in \code{watchlist} was labelled as \code{dtest},
-and one wants to use the AUC in test data for early stopping regardless of where
-it is in the \code{watchlist}, then one of the following would need to be set:
-\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
-All dash '-' characters in metric names are considered equivalent to '_'.}
-
-\item{verbose}{whether to print the early stopping information.}
-}
-\description{
-Callback closure to activate the early stopping.
-}
-\details{
-This callback function determines the condition for early stopping
-by setting the \code{stop_condition = TRUE} flag in its calling frame.
-
-The following additional fields are assigned to the model's R object:
-\itemize{
-\item \code{best_score} the evaluation score at the best iteration
-\item \code{best_iteration} at which boosting iteration the best score has occurred (1-based index)
-}
-The Same values are also stored as xgb-attributes:
-\itemize{
-\item \code{best_iteration} is stored as a 0-based iteration index (for interoperability of binary models)
-\item \code{best_msg} message string is also stored.
-}
-
-At least one data element is required in the evaluation watchlist for early stopping to work.
-
-Callback function expects the following values to be set in its calling frame:
-\code{stop_condition},
-\code{bst_evaluation},
-\code{rank},
-\code{bst} (or \code{bst_folds} and \code{basket}),
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration},
-}
-\seealso{
-\code{\link{callbacks}},
-\code{\link{xgb.attr}}
-}
--- a/R-package/man/cb.evaluation.log.Rd
+++ b/R-package/man/cb.evaluation.log.Rd
@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.evaluation.log}
-\alias{cb.evaluation.log}
-\title{Callback closure for logging the evaluation history}
-\usage{
-cb.evaluation.log()
-}
-\description{
-Callback closure for logging the evaluation history
-}
-\details{
-This callback function appends the current iteration evaluation results \code{bst_evaluation}
-available in the calling parent frame to the \code{evaluation_log} list in a calling frame.
-
-The finalizer callback (called with \code{finalize = TURE} in the end) converts
-the \code{evaluation_log} list into a final data.table.
-
-The iteration evaluation result \code{bst_evaluation} must be a named numeric vector.
-
-Note: in the column names of the final data.table, the dash '-' character is replaced with
-the underscore '_' in order to make the column names more like regular R identifiers.
-
-Callback function expects the following values to be set in its calling frame:
-\code{evaluation_log},
-\code{bst_evaluation},
-\code{iteration}.
-}
-\seealso{
-\code{\link{callbacks}}
-}
--- a/R-package/man/cb.print.evaluation.Rd
+++ b/R-package/man/cb.print.evaluation.Rd
@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.print.evaluation}
-\alias{cb.print.evaluation}
-\title{Callback closure for printing the result of evaluation}
-\usage{
-cb.print.evaluation(period = 1, showsd = TRUE)
-}
-\arguments{
-\item{period}{results would be printed every number of periods}
-
-\item{showsd}{whether standard deviations should be printed (when available)}
-}
-\description{
-Callback closure for printing the result of evaluation
-}
-\details{
-The callback function prints the result of evaluation at every \code{period} iterations.
-The initial and the last iteration's evaluations are always printed.
-
-Callback function expects the following values to be set in its calling frame:
-\code{bst_evaluation} (also \code{bst_evaluation_err} when available),
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration}.
-}
-\seealso{
-\code{\link{callbacks}}
-}
--- a/R-package/man/cb.save.model.Rd
+++ b/R-package/man/cb.save.model.Rd
@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.save.model}
-\alias{cb.save.model}
-\title{Callback closure for saving a model file.}
-\usage{
-cb.save.model(save_period = 0, save_name = "xgboost.ubj")
-}
-\arguments{
-\item{save_period}{save the model to disk after every
-\code{save_period} iterations; 0 means save the model at the end.}
-
-\item{save_name}{the name or path for the saved model file.
-
-\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that the format of the model being saved is determined by the file
-   extension specified here (see \link{xgb.save} for details about how it works).
-
-   It can contain a \code{\link[base]{sprintf}} formatting specifier
-   to include the integer iteration number in the file name.
-   E.g., with \code{save_name} = 'xgboost_\%04d.ubj',
-   the file saved at iteration 50 would be named "xgboost_0050.ubj".
-}\if{html}{\out{</div>}}}
-}
-\description{
-Callback closure for saving a model file.
-}
-\details{
-This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end.
-
-Callback function expects the following values to be set in its calling frame:
-\code{bst},
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration}.
-}
-\seealso{
-\link{xgb.save}
-
-\code{\link{callbacks}}
-}
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@ -18,25 +18,47 @@
  iterationrange = NULL,
  strict_shape = FALSE,
  validate_features = FALSE,
+  base_margin = NULL,
  ...
 )
 }
 \arguments{
 \item{object}{Object of class \code{xgb.Booster}.}

-\item{newdata}{Takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
+\item{newdata}{Takes \code{data.frame}, \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
 local data file, or \code{xgb.DMatrix}.
-For single-row predictions on sparse data, it is recommended to use the CSR format.
-If passing a sparse vector, it will take it as a row vector.}

-\item{missing}{Only used when input is a dense matrix. Pick a float value that represents
-missing values in data (e.g., 0 or some other extreme value).}
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   For single-row predictions on sparse data, it's recommended to use CSR format. If passing
+   a sparse vector, it will take it as a row vector.
+
+   Note that, for repeated predictions on the same data, one might want to create a DMatrix to
+   pass here instead of passing R types like matrices or data frames, as predictions will be
+   faster on DMatrix.
+
+   If `newdata` is a `data.frame`, be aware that:\\itemize\{
+   \\item Columns will be converted to numeric if they aren't already, which could potentially make
+         the operation slower than in an equivalent `matrix` object.
+   \\item The order of the columns must match with that of the data from which the model was fitted
+         (i.e. columns will not be referenced by their names, just by their order in the data).
+   \\item If the model was fitted to data with categorical columns, these columns must be of
+         `factor` type here, and must use the same encoding (i.e. have the same levels).
+   \\item If `newdata` contains any `factor` columns, they will be converted to base-0
+         encoding (same as during DMatrix creation) - hence, one should not pass a `factor`
+         under a column which during training had a different type.
+   \}
+}\if{html}{\out{</div>}}}
+
+\item{missing}{Float value that represents missing values in data (e.g., 0 or some other extreme value).
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   This parameter is not used when `newdata` is an `xgb.DMatrix` - in such cases, should pass
+   this as an argument to the DMatrix constructor instead.
+}\if{html}{\out{</div>}}}

 \item{outputmargin}{Whether the prediction should be returned in the form of original untransformed
 sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
 logistic regression would return log-odds instead of probabilities.}

-\item{predleaf}{Whether to predict pre-tree leaf indices.}
+\item{predleaf}{Whether to predict per-tree leaf indices.}

 \item{predcontrib}{Whether to return feature contributions to individual predictions (see Details).}

@ -48,7 +70,7 @@ logistic regression would return log-odds instead of probabilities.}
 prediction outputs per case. No effect if \code{predleaf}, \code{predcontrib},
 or \code{predinteraction} is \code{TRUE}.}

-\item{training}{Whether the predictions are used for training. For dart booster,
+\item{training}{Whether the prediction result is used for training. For dart booster,
 training predicting will perform dropout.}

 \item{iterationrange}{Sequence of rounds/iterations from the model to use for prediction, specified by passing
@ -84,6 +106,13 @@ match (only applicable when both \code{object} and \code{newdata} have feature n
   recommended to disable it for performance-sensitive applications.
 }\if{html}{\out{</div>}}}

+\item{base_margin}{Base margin used for boosting from existing model.
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
+   be ignored as it needs to be added to the DMatrix instead (e.g. by passing it as
+   an argument in its constructor, or by calling \link{setinfo.xgb.DMatrix}).
+}\if{html}{\out{</div>}}}
+
 \item{...}{Not used.}
 }
 \value{
@ -115,7 +144,7 @@ When \code{strict_shape = TRUE}, the output is always an array:
 }
 }
 \description{
-Predicted values based on either xgboost model or model handle object.
+Predict values on data based on xgboost model.
 }
 \details{
 Note that \code{iterationrange} would currently do nothing for predictions from "gblinear",
--- a/R-package/man/xgb.Callback.Rd
+++ b/R-package/man/xgb.Callback.Rd
@ -0,0 +1,248 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.Callback}
+\alias{xgb.Callback}
+\title{XGBoost Callback Constructor}
+\usage{
+xgb.Callback(
+  cb_name = "custom_callback",
+  env = new.env(),
+  f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)
+    NULL,
+  f_before_iter = function(env, model, data, evals, iteration) NULL,
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
+  f_after_training = function(env, model, data, evals, iteration, final_feval,
+    prev_cb_res) NULL
+)
+}
+\arguments{
+\item{cb_name}{Name for the callback.
+
+If the callback produces some non-NULL result (from executing the function passed under
+\code{f_after_training}), that result will be added as an R attribute to the resulting booster
+(or as a named element in the result of CV), with the attribute name specified here.
+
+Names of callbacks must be unique - i.e. there cannot be two callbacks with the same name.}
+
+\item{env}{An environment object that will be passed to the different functions in the callback.
+Note that this environment will not be shared with other callbacks.}
+
+\item{f_before_training}{A function that will be executed before the training has started.
+
+If passing \code{NULL} for this or for the other function inputs, then no function will be executed.
+
+If passing a function, it will be called with parameters supplied as non-named arguments
+matching the function signatures that are shown in the default value for each function argument.}
+
+\item{f_before_iter}{A function that will be executed before each boosting round.
+
+This function can signal whether the training should be finalized or not, by outputting
+a value that evaluates to \code{TRUE} - i.e. if the output from the function provided here at
+a given round is \code{TRUE}, then training will be stopped before the current iteration happens.
+
+Return values of \code{NULL} will be interpreted as \code{FALSE}.}
+
+\item{f_after_iter}{A function that will be executed after each boosting round.
+
+This function can signal whether the training should be finalized or not, by outputting
+a value that evaluates to \code{TRUE} - i.e. if the output from the function provided here at
+a given round is \code{TRUE}, then training will be stopped at that round.
+
+Return values of \code{NULL} will be interpreted as \code{FALSE}.}
+
+\item{f_after_training}{A function that will be executed after training is finished.
+
+This function can optionally output something non-NULL, which will become part of the R
+attributes of the booster (assuming one passes \code{keep_extra_attributes=TRUE} to \link{xgb.train})
+under the name supplied for parameter \code{cb_name} imn the case of \link{xgb.train}; or a part
+of the named elements in the result of \link{xgb.cv}.}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+Constructor for defining the structure of callback functions that can be executed
+at different stages of model training (before / after training, before / after each boosting
+iteration).
+}
+\details{
+Arguments that will be passed to the supplied functions are as follows:\itemize{
+
+\item env The same environment that is passed under argument \code{env}.
+
+It may be modified by the functions in order to e.g. keep tracking of what happens
+across iterations or similar.
+
+This environment is only used by the functions supplied to the callback, and will
+not be kept after the model fitting function terminates (see parameter \code{f_after_training}).
+
+\item model The booster object when using \link{xgb.train}, or the folds when using
+\link{xgb.cv}.
+
+For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
+\item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
+\item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
+\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
+(\code{test} is the held-out data for the fold).
+\item \code{index}: The indices of the hold-out data for that fold (base-1 indexing),
+from which the \code{test} entry in \code{evals} was obtained.
+}
+
+This object should \bold{not} be in-place modified in ways that conflict with the
+training (e.g. resetting the parameters for a training update in a way that resets
+the number of rounds to zero in order to overwrite rounds).
+
+Note that any R attributes that are assigned to the booster during the callback functions,
+will not be kept thereafter as the booster object variable is not re-assigned during
+training. It is however possible to set C-level attributes of the booster through
+\link{xgb.attr} or \link{xgb.attributes}, which should remain available for the rest
+of the iterations and after the training is done.
+
+For keeping variables across iterations, it's recommended to use \code{env} instead.
+\item data The data to which the model is being fit, as an \code{xgb.DMatrix} object.
+
+Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
+folds can be found in the \code{model} object.
+
+\item evals The evaluation data, as passed under argument \code{evals} to
+\link{xgb.train}.
+
+For \link{xgb.cv}, this will always be \code{NULL}.
+
+\item begin_iteration Index of the first boosting iteration that will be executed
+(base-1 indexing).
+
+This will typically be '1', but when using training continuation, depending on the
+parameters for updates, boosting rounds will be continued from where the previous
+model ended, in which case this will be larger than 1.
+
+\item end_iteration Index of the last boostign iteration that will be executed
+(base-1 indexing, inclusive of this end).
+
+It should match with argument \code{nrounds} passed to \link{xgb.train} or \link{xgb.cv}.
+
+Note that boosting might be interrupted before reaching this last iteration, for
+example by using the early stopping callback \link{xgb.cb.early.stop}.
+
+\item iteration Index of the iteration number that is being executed (first iteration
+will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).
+
+\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
+determined by the objective, or by parameter \code{feval}.
+
+For \link{xgb.train}, this will be a named vector with one entry per element in
+\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
+example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
+this will be a one-element vector with name "tr-rmse".
+
+For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
+where the row names will follow the same naming logic as the one-dimensional vector
+that is passed in \link{xgb.train}.
+
+Note that, internally, the built-in callbacks such as \link{xgb.cb.print.evaluation} summarize
+this table by calculating the row-wise means and standard deviations.
+
+\item final_feval The evaluation results after the last boosting round is executed
+(same format as \code{iter_feval}, and will be the exact same input as passed under
+\code{iter_feval} to the last round that is executed during model fitting).
+
+\item prev_cb_res Result from a previous run of a callback sharing the same name
+(as given by parameter \code{cb_name}) when conducting training continuation, if there
+was any in the booster R attributes.
+
+Some times, one might want to append the new results to the previous one, and this will
+be done automatically by the built-in callbacks such as \link{xgb.cb.evaluation.log},
+which will append the new rows to the previous table.
+
+If no such previous callback result is available (which it never will when fitting
+a model from start instead of updating an existing model), this will be \code{NULL}.
+
+For \link{xgb.cv}, which doesn't support training continuation, this will always be \code{NULL}.
+}
+
+The following names (\code{cb_name} values) are reserved for internal callbacks:\itemize{
+\item print_evaluation
+\item evaluation_log
+\item reset_parameters
+\item early_stop
+\item save_model
+\item cv_predict
+\item gblinear_history
+}
+
+The following names are reserved for other non-callback attributes:\itemize{
+\item names
+\item class
+\item call
+\item params
+\item niter
+\item nfeatures
+\item folds
+}
+
+When using the built-in early stopping callback (\link{xgb.cb.early.stop}), said callback
+will always be executed before the others, as it sets some booster C-level attributes
+that other callbacks might also use. Otherwise, the order of execution will match with
+the order in which the callbacks are passed to the model fitting function.
+}
+\examples{
+# Example constructing a custom callback that calculates
+# squared error on the training data (no separate test set),
+# and outputs the per-iteration results.
+ssq_callback <- xgb.Callback(
+  cb_name = "ssq",
+  f_before_training = function(env, model, data, evals,
+                               begin_iteration, end_iteration) {
+    # A vector to keep track of a number at each iteration
+    env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
+  },
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
+    # This calculates the sum of squared errors on the training data.
+    # Note that this can be better done by passing an 'evals' entry,
+    # but this demonstrates a way in which callbacks can be structured.
+    pred <- predict(model, data)
+    err <- pred - getinfo(data, "label")
+    sq_err <- sum(err^2)
+    env$logs[iteration] <- sq_err
+    cat(
+      sprintf(
+        "Squared error at iteration \%d: \%.2f\n",
+        iteration, sq_err
+      )
+    )
+
+    # A return value of 'TRUE' here would signal to finalize the training
+    return(FALSE)
+  },
+  f_after_training = function(env, model, data, evals, iteration,
+                              final_feval, prev_cb_res) {
+    return(env$logs)
+  }
+)
+
+data(mtcars)
+y <- mtcars$mpg
+x <- as.matrix(mtcars[, -1])
+dm <- xgb.DMatrix(x, label = y, nthread = 1)
+model <- xgb.train(
+  data = dm,
+  params = list(objective = "reg:squarederror", nthread = 1),
+  nrounds = 5,
+  callbacks = list(ssq_callback),
+  keep_extra_attributes = TRUE
+)
+
+# Result from 'f_after_iter' will be available as an attribute
+attributes(model)$ssq
+}
+\seealso{
+Built-in callbacks:\itemize{
+\item \link{xgb.cb.print.evaluation}
+\item \link{xgb.cb.evaluation.log}
+\item \link{xgb.cb.reset.parameters}
+\item \link{xgb.cb.early.stop}
+\item \link{xgb.cb.save.model}
+\item \link{xgb.cb.cv.predict}
+\item \link{xgb.cb.gblinear.history}
+}
+}
--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@ -19,7 +19,8 @@ xgb.DMatrix(
  qid = NULL,
  label_lower_bound = NULL,
  label_upper_bound = NULL,
-  feature_weights = NULL
+  feature_weights = NULL,
+  data_split_mode = "row"
 )

 xgb.QuantileDMatrix(
@ -60,10 +61,27 @@ Other column types are not supported.
 'xgb.QuantileDMatrix'.
 \item Single-row CSR matrices, as class \code{dsparseVector} from package \code{Matrix}, which is interpreted
 as a single row (only when making predictions from a fitted model).
-\item Text files in SVMLight / LibSVM formats, passed as a path to the file. These are \bold{not}
-supported for xgb.QuantileDMatrix'.
-\item Binary files generated by \link{xgb.DMatrix.save},  passed as a path to the file. These are
-\bold{not} supported for xgb.QuantileDMatrix'.
+\item Text files in a supported format, passed as a \code{character} variable containing the URI path to
+the file, with an optional format specifier.
+
+These are \bold{not} supported for \code{xgb.QuantileDMatrix}. Supported formats are:\itemize{
+\item XGBoost's own binary format for DMatrices, as produced by \link{xgb.DMatrix.save}.
+\item SVMLight (a.k.a. LibSVM) format for CSR matrices. This format can be signaled by suffix
+\code{?format=libsvm} at the end of the file path. It will be the default format if not
+otherwise specified.
+\item CSV files (comma-separated values). This format can be specified by adding suffix
+\code{?format=csv} at the end ofthe file path. It will \bold{not} be auto-deduced from file extensions.
+}
+
+Be aware that the format of the file will not be auto-deduced - for example, if a file is named 'file.csv',
+it will not look at the extension or file contents to determine that it is a comma-separated value.
+Instead, the format must be specified following the URI format, so the input to \code{data} should be passed
+like this: \code{"file.csv?format=csv"} (or \code{"file.csv?format=csv&label_column=0"} if the first column
+corresponds to the labels).
+
+For more information about passing text files as input, see the articles
+\href{https://xgboost.readthedocs.io/en/stable/tutorials/input_format.html}{Text Input Format of DMatrix} and
+\href{https://xgboost.readthedocs.io/en/stable/python/python_intro.html#python-data-interface}{Data Interface}.
 }}

 \item{label}{Label of the training data. For classification problems, should be passed encoded as
@ -129,6 +147,14 @@ not be saved, so make sure that \code{factor} columns passed to \code{predict} h

 \item{feature_weights}{Set feature weights for column sampling.}

+\item{data_split_mode}{When passing a URI (as R \code{character}) as input, this signals
+whether to split by row or column. Allowed values are \code{"row"} and \code{"col"}.
+
+In distributed mode, the file is split accordingly; otherwise this is only an indicator on
+how the file was split beforehand. Default to row.
+
+This is not used when \code{data} is not a URI.}
+
 \item{ref}{The training dataset that provides quantile information, needed when creating
 validation/test dataset with \code{xgb.QuantileDMatrix}. Supplying the training DMatrix
 as a reference means that the same quantisation applied to the training data is
--- a/R-package/man/xgb.DMatrix.save.Rd
+++ b/R-package/man/xgb.DMatrix.save.Rd
@ -15,6 +15,7 @@ xgb.DMatrix.save(dmatrix, fname)
 Save xgb.DMatrix object to binary file
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 fname <- file.path(tempdir(), "xgb.DMatrix.data")
--- a/R-package/man/xgb.cb.cv.predict.Rd
+++ b/R-package/man/xgb.cb.cv.predict.Rd
@ -1,16 +1,27 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/callbacks.R
-\name{cb.cv.predict}
-\alias{cb.cv.predict}
-\title{Callback closure for returning cross-validation based predictions.}
+\name{xgb.cb.cv.predict}
+\alias{xgb.cb.cv.predict}
+\title{Callback for returning cross-validation based predictions.}
 \usage{
-cb.cv.predict(save_models = FALSE)
+xgb.cb.cv.predict(save_models = FALSE, outputmargin = FALSE)
 }
 \arguments{
-\item{save_models}{a flag for whether to save the folds' models.}
+\item{save_models}{A flag for whether to save the folds' models.}
+
+\item{outputmargin}{Whether to save margin predictions (same effect as passing this
+parameter to \link{predict.xgb.Booster}).}
 }
 \value{
-Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
+An \code{xgb.Callback} object, which can be passed to \link{xgb.cv},
+but \bold{not} to \link{xgb.train}.
+}
+\description{
+This callback function saves predictions for all of the test folds,
+and also allows to save the folds' models.
+}
+\details{
+Predictions are saved inside of the \code{pred} element, which is either a vector or a matrix,
 depending on the number of prediction outputs per data row. The order of predictions corresponds
 to the order of rows in the original dataset. Note that when a custom \code{folds} list is
 provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a
@ -19,23 +30,3 @@ meaningful when user-provided folds have overlapping indices as in, e.g., random
 When some of the indices in the training dataset are not included into user-provided \code{folds},
 their prediction value would be \code{NA}.
 }
-\description{
-Callback closure for returning cross-validation based predictions.
-}
-\details{
-This callback function saves predictions for all of the test folds,
-and also allows to save the folds' models.
-
-It is a "finalizer" callback and it uses early stopping information whenever it is available,
-thus it must be run after the early stopping callback if the early stopping is used.
-
-Callback function expects the following values to be set in its calling frame:
-\code{bst_folds},
-\code{basket},
-\code{data},
-\code{end_iteration},
-\code{params},
-}
-\seealso{
-\code{\link{callbacks}}
-}
--- a/R-package/man/xgb.cb.early.stop.Rd
+++ b/R-package/man/xgb.cb.early.stop.Rd
@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.early.stop}
+\alias{xgb.cb.early.stop}
+\title{Callback to activate early stopping}
+\usage{
+xgb.cb.early.stop(
+  stopping_rounds,
+  maximize = FALSE,
+  metric_name = NULL,
+  verbose = TRUE,
+  keep_all_iter = TRUE
+)
+}
+\arguments{
+\item{stopping_rounds}{The number of rounds with no improvement in
+the evaluation metric in order to stop the training.}
+
+\item{maximize}{Whether to maximize the evaluation metric.}
+
+\item{metric_name}{The name of an evaluation column to use as a criteria for early
+stopping. If not set, the last column would be used.
+Let's say the test data in \code{evals} was labelled as \code{dtest},
+and one wants to use the AUC in test data for early stopping regardless of where
+it is in the \code{evals}, then one of the following would need to be set:
+\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
+All dash '-' characters in metric names are considered equivalent to '_'.}
+
+\item{verbose}{Whether to print the early stopping information.}
+
+\item{keep_all_iter}{Whether to keep all of the boosting rounds that were produced
+in the resulting object. If passing \code{FALSE}, will only keep the boosting rounds
+up to the detected best iteration, discarding the ones that come after.}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+This callback function determines the condition for early stopping.
+
+The following attributes are assigned to the booster's object:
+\itemize{
+\item \code{best_score} the evaluation score at the best iteration
+\item \code{best_iteration} at which boosting iteration the best score has occurred
+(0-based index for interoperability of binary models)
+}
+
+The same values are also stored as R attributes as a result of the callback, plus an additional
+attribute \code{stopped_by_max_rounds} which indicates whether an early stopping by the \code{stopping_rounds}
+condition occurred. Note that the \code{best_iteration} that is stored under R attributes will follow
+base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
+through \link{xgb.attr} or \link{xgb.attributes}.
+
+At least one dataset is required in \code{evals} for early stopping to work.
+}
--- a/R-package/man/xgb.cb.evaluation.log.Rd
+++ b/R-package/man/xgb.cb.evaluation.log.Rd
@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.evaluation.log}
+\alias{xgb.cb.evaluation.log}
+\title{Callback for logging the evaluation history}
+\usage{
+xgb.cb.evaluation.log()
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+Callback for logging the evaluation history
+}
+\details{
+This callback creates a table with per-iteration evaluation metrics (see parameters
+\code{evals} and \code{feval} in \link{xgb.train}).
+
+Note: in the column names of the final data.table, the dash '-' character is replaced with
+the underscore '_' in order to make the column names more like regular R identifiers.
+}
+\seealso{
+\link{xgb.cb.print.evaluation}
+}
--- a/R-package/man/xgb.cb.gblinear.history.Rd
+++ b/R-package/man/xgb.cb.gblinear.history.Rd
@ -1,37 +1,48 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/callbacks.R
-\name{cb.gblinear.history}
-\alias{cb.gblinear.history}
-\title{Callback closure for collecting the model coefficients history of a gblinear booster
-during its training.}
+\name{xgb.cb.gblinear.history}
+\alias{xgb.cb.gblinear.history}
+\title{Callback for collecting coefficients history of a gblinear booster}
 \usage{
-cb.gblinear.history(sparse = FALSE)
+xgb.cb.gblinear.history(sparse = FALSE)
 }
 \arguments{
-\item{sparse}{when set to FALSE/TRUE, a dense/sparse matrix is used to store the result.
+\item{sparse}{when set to \code{FALSE}/\code{TRUE}, a dense/sparse matrix is used to store the result.
 Sparse format is useful when one expects only a subset of coefficients to be non-zero,
 when using the "thrifty" feature selector with fairly small number of top features
 selected per iteration.}
 }
 \value{
-Results are stored in the \code{coefs} element of the closure.
-The \code{\link{xgb.gblinear.history}} convenience function provides an easy
-way to access it.
-With \code{xgb.train}, it is either a dense of a sparse matrix.
-While with \code{xgb.cv}, it is a list (an element per each fold) of such
-matrices.
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
 }
 \description{
-Callback closure for collecting the model coefficients history of a gblinear booster
-during its training.
+Callback for collecting coefficients history of a gblinear booster
 }
 \details{
 To keep things fast and simple, gblinear booster does not internally store the history of linear
 model coefficients at each boosting iteration. This callback provides a workaround for storing
 the coefficients' path, by extracting them after each training iteration.

-Callback function expects the following values to be set in its calling frame:
-\code{bst} (or \code{bst_folds}).
+This callback will construct a matrix where rows are boosting iterations and columns are
+feature coefficients (same order as when calling \link{coef.xgb.Booster}, with the intercept
+corresponding to the first column).
+
+When there is more than one coefficient per feature (e.g. multi-class classification),
+the result will be reshaped into a vector where coefficients are arranged first by features and
+then by class (e.g. first 1 through N coefficients will be for the first class, then
+coefficients N+1 through 2N for the second class, and so on).
+
+If the result has only one coefficient per feature in the data, then the resulting matrix
+will have column names matching with the feature names, otherwise (when there's more than
+one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index'
+(so e.g. column 'c1' for class '0' will be named 'c1:0').
+
+With \code{xgb.train}, the output is either a dense or a sparse matrix.
+With with \code{xgb.cv}, it is a list (one element per each fold) of such
+matrices.
+
+Function \link{xgb.gblinear.history} function provides an easy way to retrieve the
+outputs from this callback.
 }
 \examples{
 #### Binary classification:
@ -52,7 +63,7 @@ param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "a
 # rate does not break the convergence, but allows us to illustrate the typical pattern of
 # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
 bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1.,
-                 callbacks = list(cb.gblinear.history()))
+                 callbacks = list(xgb.cb.gblinear.history()))
 # Extract the coefficients' path and plot them vs boosting iteration number:
 coef_path <- xgb.gblinear.history(bst)
 matplot(coef_path, type = 'l')
@ -61,7 +72,7 @@ matplot(coef_path, type = 'l')
 # Will try the classical componentwise boosting which selects a single best feature per round:
 bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
                 updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
-                 callbacks = list(cb.gblinear.history()))
+                 callbacks = list(xgb.cb.gblinear.history()))
 matplot(xgb.gblinear.history(bst), type = 'l')
 #  Componentwise boosting is known to have similar effect to Lasso regularization.
 # Try experimenting with various values of top_k, eta, nrounds,
@ -69,7 +80,7 @@ matplot(xgb.gblinear.history(bst), type = 'l')

 # For xgb.cv:
 bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
-              callbacks = list(cb.gblinear.history()))
+              callbacks = list(xgb.cb.gblinear.history()))
 # coefficients in the CV fold #3
 matplot(xgb.gblinear.history(bst)[[3]], type = 'l')

@ -82,7 +93,7 @@ param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
 # For the default linear updater 'shotgun' it sometimes is helpful
 # to use smaller eta to reduce instability
 bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
-                 callbacks = list(cb.gblinear.history()))
+                 callbacks = list(xgb.cb.gblinear.history()))
 # Will plot the coefficient paths separately for each class:
 matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l')
 matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l')
@ -90,11 +101,11 @@ matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l')

 # CV:
 bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
-              callbacks = list(cb.gblinear.history(FALSE)))
+              callbacks = list(xgb.cb.gblinear.history(FALSE)))
 # 1st fold of 1st class
 matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l')

 }
 \seealso{
-\code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
+\link{xgb.gblinear.history}, \link{coef.xgb.Booster}.
 }
--- a/R-package/man/xgb.cb.print.evaluation.Rd
+++ b/R-package/man/xgb.cb.print.evaluation.Rd
@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.print.evaluation}
+\alias{xgb.cb.print.evaluation}
+\title{Callback for printing the result of evaluation}
+\usage{
+xgb.cb.print.evaluation(period = 1, showsd = TRUE)
+}
+\arguments{
+\item{period}{results would be printed every number of periods}
+
+\item{showsd}{whether standard deviations should be printed (when available)}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+The callback function prints the result of evaluation at every \code{period} iterations.
+The initial and the last iteration's evaluations are always printed.
+
+Does not leave any attribute in the booster (see \link{xgb.cb.evaluation.log} for that).
+}
+\seealso{
+\link{xgb.Callback}
+}
--- a/R-package/man/xgb.cb.reset.parameters.Rd
+++ b/R-package/man/xgb.cb.reset.parameters.Rd
@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/callbacks.R
-\name{cb.reset.parameters}
-\alias{cb.reset.parameters}
-\title{Callback closure for resetting the booster's parameters at each iteration.}
+\name{xgb.cb.reset.parameters}
+\alias{xgb.cb.reset.parameters}
+\title{Callback for resetting the booster's parameters at each iteration.}
 \usage{
-cb.reset.parameters(new_params)
+xgb.cb.reset.parameters(new_params)
 }
 \arguments{
 \item{new_params}{a list where each element corresponds to a parameter that needs to be reset.
@ -14,23 +14,16 @@ or a function of two parameters \code{learning_rates(iteration, nrounds)}
 which returns a new parameter value by using the current iteration number
 and the total number of boosting rounds.}
 }
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
 \description{
-Callback closure for resetting the booster's parameters at each iteration.
+Callback for resetting the booster's parameters at each iteration.
 }
 \details{
-This is a "pre-iteration" callback function used to reset booster's parameters
-at the beginning of each iteration.
-
 Note that when training is resumed from some previous model, and a function is used to
 reset a parameter value, the \code{nrounds} argument in this function would be the
 the number of boosting rounds in the current training.

-Callback function expects the following values to be set in its calling frame:
-\code{bst} or \code{bst_folds},
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration}.
-}
-\seealso{
-\code{\link{callbacks}}
+Does not leave any attribute in the booster.
 }
--- a/R-package/man/xgb.cb.save.model.Rd
+++ b/R-package/man/xgb.cb.save.model.Rd
@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.save.model}
+\alias{xgb.cb.save.model}
+\title{Callback for saving a model file.}
+\usage{
+xgb.cb.save.model(save_period = 0, save_name = "xgboost.ubj")
+}
+\arguments{
+\item{save_period}{Save the model to disk after every
+\code{save_period} iterations; 0 means save the model at the end.}
+
+\item{save_name}{The name or path for the saved model file.
+It can contain a \code{\link[base]{sprintf}} formatting specifier
+to include the integer iteration number in the file name.
+E.g., with \code{save_name} = 'xgboost_\%04d.model',
+the file saved at iteration 50 would be named "xgboost_0050.model".}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train},
+but \bold{not} to \link{xgb.cv}.
+}
+\description{
+This callback function allows to save an xgb-model file, either periodically
+after each \code{save_period}'s or at the end.
+
+Does not leave any attribute in the booster.
+}
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@ -82,7 +82,6 @@ new.dtrain <- xgb.DMatrix(
 new.dtest <- xgb.DMatrix(
  data = new.features.test, label = agaricus.test$label, nthread = 2
 )
-watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)

 # Model accuracy with new features
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@ -59,7 +59,7 @@ that NA values should be considered as 'missing' by the algorithm.
 Sometimes, 0 or other extreme value might be used to represent missing values.}

 \item{prediction}{A logical value indicating whether to return the test fold predictions
-from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.}
+from each CV model. This parameter engages the \code{\link{xgb.cb.cv.predict}} callback.}

 \item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}

@ -98,20 +98,20 @@ the \code{nfold} and \code{stratified} parameters are ignored.}

 \item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
 Default is 1 which means all messages are printed. This parameter is passed to the
-\code{\link{cb.print.evaluation}} callback.}
+\code{\link{xgb.cb.print.evaluation}} callback.}

 \item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered.
 If set to an integer \code{k}, training with a validation set will stop if the performance
 doesn't improve for \code{k} rounds.
-Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
+Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.}

 \item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set,
 then this parameter must be set as well.
 When it is \code{TRUE}, it means the larger the evaluation score the better.
-This parameter is passed to the \code{\link{cb.early.stop}} callback.}
+This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.}

 \item{callbacks}{a list of callback functions to perform various task during boosting.
-See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
 parameters' values. User can provide either existing or their own callback methods in order
 to customize the training process.}

@ -122,24 +122,24 @@ An object of class \code{xgb.cv.synchronous} with the following elements:
 \itemize{
 \item \code{call} a function call.
 \item \code{params} parameters that were passed to the xgboost library. Note that it does not
-capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
-\item \code{callbacks} callback functions that were either automatically assigned or
-explicitly passed.
+capture parameters changed by the \code{\link{xgb.cb.reset.parameters}} callback.
 \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
 first column corresponding to iteration number and the rest corresponding to the
 CV-based evaluation means and standard deviations for the training and test CV-sets.
-It is created by the \code{\link{cb.evaluation.log}} callback.
+It is created by the \code{\link{xgb.cb.evaluation.log}} callback.
 \item \code{niter} number of boosting iterations.
 \item \code{nfeatures} number of features in training data.
 \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
 parameter or randomly generated.
 \item \code{best_iteration} iteration number with the best evaluation metric value
 (only available with early stopping).
-\item \code{pred} CV prediction values available when \code{prediction} is set.
-It is either vector or matrix (see \code{\link{cb.cv.predict}}).
-\item \code{models} a list of the CV folds' models. It is only available with the explicit
-setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
 }
+
+Plus other potential elements that are the result of callbacks, such as a list \code{cv_predict} with
+a sub-element \code{pred} when passing \code{prediction = TRUE}, which is added by the \link{xgb.cb.cv.predict}
+callback (note that one can also pass it manually under \code{callbacks} with different settings,
+such as saving also the models created during cross validation); or a list \code{early_stop} which
+will contain elements such as \code{best_iteration} when using the early stopping callback (\link{xgb.cb.early.stop}).
 }
 \description{
 The cross validation function of xgboost
--- a/R-package/man/xgb.dump.Rd
+++ b/R-package/man/xgb.dump.Rd
@ -44,6 +44,7 @@ as a \code{character} vector. Otherwise it will return \code{TRUE}.
 Dump an xgboost model in text format.
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
--- a/R-package/man/xgb.gblinear.history.Rd
+++ b/R-package/man/xgb.gblinear.history.Rd
@ -8,7 +8,7 @@ xgb.gblinear.history(model, class_index = NULL)
 }
 \arguments{
 \item{model}{either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
-using the \code{cb.gblinear.history()} callback, but \bold{not} a booster
+using the \link{xgb.cb.gblinear.history} callback, but \bold{not} a booster
 loaded from \link{xgb.load} or \link{xgb.load.raw}.}

 \item{class_index}{zero-based class index to extract the coefficients for only that
@ -16,23 +16,31 @@ specific class in a multinomial multiclass model. When it is NULL, all the
 coefficients are returned. Has no effect in non-multiclass models.}
 }
 \value{
-For an \code{xgb.train} result, a matrix (either dense or sparse) with the columns
-corresponding to iteration's coefficients (in the order as \code{xgb.dump()} would
-return) and the rows corresponding to boosting iterations.
+For an \link{xgb.train} result, a matrix (either dense or sparse) with the columns
+corresponding to iteration's coefficients and the rows corresponding to boosting iterations.

-For an \code{xgb.cv} result, a list of such matrices is returned with the elements
+For an \link{xgb.cv} result, a list of such matrices is returned with the elements
 corresponding to CV folds.
+
+When there is more than one coefficient per feature (e.g. multi-class classification)
+and \code{class_index} is not provided,
+the result will be reshaped into a vector where coefficients are arranged first by features and
+then by class (e.g. first 1 through N coefficients will be for the first class, then
+coefficients N+1 through 2N for the second class, and so on).
 }
 \description{
 A helper function to extract the matrix of linear coefficients' history
-from a gblinear model created while using the \code{cb.gblinear.history()}
-callback.
+from a gblinear model created while using the \link{xgb.cb.gblinear.history}
+callback (which must be added manually as by default it's not used).
 }
 \details{
 Note that this is an R-specific function that relies on R attributes that
 are not saved when using xgboost's own serialization functions like \link{xgb.load}
 or \link{xgb.load.raw}.

-In order for a serialized model to be accepted by tgis function, one must use R
+In order for a serialized model to be accepted by this function, one must use R
 serializers such as \link{saveRDS}.
 }
+\seealso{
+\link{xgb.cb.gblinear.history}, \link{coef.xgb.Booster}.
+}
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@ -17,7 +17,7 @@ Load xgboost model from the binary model file.
 }
 \details{
 The input file is expected to contain a model saved in an xgboost model format
-using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
+using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some
 appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
 saved from there in xgboost format, could be loaded from R.

@ -25,6 +25,7 @@ Note: a model saved as an R-object, has to be loaded using corresponding R-metho
 not \code{xgb.load}.
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

--- a/R-package/man/xgb.save.Rd
+++ b/R-package/man/xgb.save.Rd
@ -41,6 +41,7 @@ how to persist models in a future-proof way, i.e. to make the model accessible i
 releases of XGBoost.
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

--- a/R-package/man/xgb.save.raw.Rd
+++ b/R-package/man/xgb.save.raw.Rd
@ -21,6 +21,7 @@ xgb.save.raw(model, raw_format = "ubj")
 Save xgboost model from xgboost or xgb.train
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@ -9,7 +9,7 @@ xgb.train(
  params = list(),
  data,
  nrounds,
-  watchlist = list(),
+  evals = list(),
  obj = NULL,
  feval = NULL,
  verbose = 1,
@ -158,13 +158,13 @@ List is provided in detail section.}

 \item{nrounds}{max number of boosting iterations.}

-\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
+\item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance.
 Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
 of these datasets during each boosting iteration, and stored in the end as a field named
 \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
-\code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
+\code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
 printed out during the training.
-E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
 the performance of each round's model on mat1 and mat2.}

 \item{obj}{customized objective function. Returns gradient and second order
@ -177,24 +177,24 @@ prediction and dtrain.}
 \item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.
 If 2, some additional information will be printed out.
 Note that setting \code{verbose > 0} automatically engages the
-\code{cb.print.evaluation(period=1)} callback function.}
+\code{xgb.cb.print.evaluation(period=1)} callback function.}

 \item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
 Default is 1 which means all messages are printed. This parameter is passed to the
-\code{\link{cb.print.evaluation}} callback.}
+\code{\link{xgb.cb.print.evaluation}} callback.}

 \item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered.
 If set to an integer \code{k}, training with a validation set will stop if the performance
 doesn't improve for \code{k} rounds.
-Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
+Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.}

 \item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set,
 then this parameter must be set as well.
 When it is \code{TRUE}, it means the larger the evaluation score the better.
-This parameter is passed to the \code{\link{cb.early.stop}} callback.}
+This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.}

 \item{save_period}{when it is non-NULL, model is saved to disk after every \code{save_period} rounds,
-0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.}
+0 means save at the end. The saving is handled by the \code{\link{xgb.cb.save.model}} callback.}

 \item{save_name}{the name or path for periodically saved model file.}

@ -203,12 +203,13 @@ Could be either an object of class \code{xgb.Booster}, or its raw data, or the n
 file with a previously saved model.}

 \item{callbacks}{a list of callback functions to perform various task during boosting.
-See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
 parameters' values. User can provide either existing or their own callback methods in order
 to customize the training process.

-\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs
-   are kept as R attributes, and thus do not get saved when using non-R serializaters like
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that some callbacks might try to leave attributes in the resulting model object,
+   such as an evaluation log (a `data.table` object) - be aware that these objects are kept
+   as R attributes, and thus do not get saved when using XGBoost's own serializaters like
   \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
 }\if{html}{\out{</div>}}}

@ -233,7 +234,7 @@ The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
 \details{
 These are the training functions for \code{xgboost}.

-The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+The \code{xgb.train} interface supports advanced features such as \code{evals},
 customized objective and evaluation metric functions, therefore it is more flexible
 than the \code{xgboost} interface.

@ -241,6 +242,11 @@ Parallelization is automatically enabled if \code{OpenMP} is present.
 Number of threads can also be manually specified via the \code{nthread}
 parameter.

+While in other interfaces, the default random seed defaults to zero, in R, if a parameter \code{seed}
+is not manually supplied, it will generate a random seed through R's own random number generator,
+whose seed in turn is controllable through \code{set.seed}. If \code{seed} is passed, it will override the
+RNG from R.
+
 The evaluation metric is chosen automatically by XGBoost (according to the objective)
 when the \code{eval_metric} parameter is not provided.
 User may set one or several \code{eval_metric} parameters.
@ -264,18 +270,19 @@ Different threshold (e.g., 0.) could be specified as "error@0."

 The following callbacks are automatically created when certain parameters are set:
 \itemize{
-\item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
+\item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
 and the \code{print_every_n} parameter is passed to it.
-\item \code{cb.evaluation.log} is on when \code{watchlist} is present.
-\item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
-\item \code{cb.save.model}: when \code{save_period > 0} is set.
+\item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
+\item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
+\item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
 }

 Note that objects of type \code{xgb.Booster} as returned by this function behave a bit differently
 from typical R objects (it's an 'altrep' list class), and it makes a separation between
 internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr}
 and shared between interfaces through serialization functions like \link{xgb.save}; and
-R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise
+R-specific attributes (typically the result from a callback), accessed through \link{attributes}
+and \link{attr}, which are otherwise
 only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and
 not anyhow used by functions like \link{predict.xgb.Booster}.

@ -300,12 +307,12 @@ dtrain <- with(
 dtest <- with(
  agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
 )
-watchlist <- list(train = dtrain, eval = dtest)
+evals <- list(train = dtrain, eval = dtest)

 ## A simple xgb.train example:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)

 ## An xgb.train example where custom objective and evaluation metric are
 ## used:
@ -326,15 +333,15 @@ evalerror <- function(preds, dtrain) {
 #  as 'objective' and 'eval_metric' parameters in the params list:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = logregobj, eval_metric = evalerror)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)

 #  or through the ... arguments:
 param <- list(max_depth = 2, eta = 1, nthread = nthread)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                 objective = logregobj, eval_metric = evalerror)

 #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
                 obj = logregobj, feval = evalerror)


@ -342,11 +349,11 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                 callbacks = list(cb.reset.parameters(my_etas)))
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                 callbacks = list(xgb.cb.reset.parameters(my_etas)))

 ## Early stopping:
-bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
                 early_stopping_rounds = 3)

 ## An 'xgboost' interface example:
@ -361,7 +368,7 @@ Tianqi Chen and Carlos Guestrin, "XGBoost: A Scalable Tree Boosting System",
 22nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016, \url{https://arxiv.org/abs/1603.02754}
 }
 \seealso{
-\code{\link{callbacks}},
+\code{\link{xgb.Callback}},
 \code{\link{predict.xgb.Booster}},
 \code{\link{xgb.cv}}
 }
--- a/R-package/man/xgbConfig.Rd
+++ b/R-package/man/xgbConfig.Rd
@ -25,6 +25,15 @@ values of one or more global-scope parameters. Use \code{xgb.get.config} to fetc
 values of all global-scope parameters (listed in
 \url{https://xgboost.readthedocs.io/en/stable/parameter.html}).
 }
+\details{
+Note that serialization-related functions might use a globally-configured number of threads,
+which is managed by the system's OpenMP (OMP) configuration instead. Typically, XGBoost methods
+accept an \code{nthreads} parameter, but some methods like \code{readRDS} might get executed before such
+parameter can be supplied.
+
+The number of OMP threads can in turn be configured for example through an environment variable
+\code{OMP_NUM_THREADS} (needs to be set before R is started), or through \code{RhpcBLASctl::omp_set_num_threads}.
+}
 \examples{
 # Set verbosity level to silent (0)
 xgb.set.config(verbosity = 0)
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@ -104,6 +104,7 @@ OBJECTS= \
    $(PKGROOT)/src/collective/broadcast.o \
    $(PKGROOT)/src/collective/comm.o \
    $(PKGROOT)/src/collective/coll.o \
+    $(PKGROOT)/src/collective/communicator-inl.o \
    $(PKGROOT)/src/collective/tracker.o \
    $(PKGROOT)/src/collective/communicator.o \
    $(PKGROOT)/src/collective/in_memory_communicator.o \
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@ -104,6 +104,7 @@ OBJECTS= \
    $(PKGROOT)/src/collective/broadcast.o \
    $(PKGROOT)/src/collective/comm.o \
    $(PKGROOT)/src/collective/coll.o \
+    $(PKGROOT)/src/collective/communicator-inl.o \
    $(PKGROOT)/src/collective/tracker.o \
    $(PKGROOT)/src/collective/communicator.o \
    $(PKGROOT)/src/collective/in_memory_communicator.o \
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@ -37,6 +37,9 @@ extern SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value);
 extern SEXP XGBoosterSerializeToBuffer_R(SEXP handle);
 extern SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw);
 extern SEXP XGBoosterPredictFromDMatrix_R(SEXP, SEXP, SEXP);
+extern SEXP XGBoosterPredictFromDense_R(SEXP, SEXP, SEXP, SEXP, SEXP);
+extern SEXP XGBoosterPredictFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP);
+extern SEXP XGBoosterPredictFromColumnar_R(SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
 extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
@ -46,7 +49,7 @@ extern SEXP XGSetArrayDimInplace_R(SEXP, SEXP);
 extern SEXP XGSetArrayDimNamesInplace_R(SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
-extern SEXP XGDMatrixCreateFromFile_R(SEXP, SEXP);
+extern SEXP XGDMatrixCreateFromURI_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromMat_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixGetFloatInfo_R(SEXP, SEXP);
 extern SEXP XGDMatrixGetUIntInfo_R(SEXP, SEXP);
@ -73,6 +76,7 @@ extern SEXP XGBSetGlobalConfig_R(SEXP);
 extern SEXP XGBGetGlobalConfig_R(void);
 extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
 extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP);
+extern SEXP XGBoosterSliceAndReplace_R(SEXP, SEXP, SEXP, SEXP);

 static const R_CallMethodDef CallEntries[] = {
  {"XGDuplicate_R",               (DL_FUNC) &XGDuplicate_R,               1},
@ -96,6 +100,9 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBoosterSerializeToBuffer_R",     (DL_FUNC) &XGBoosterSerializeToBuffer_R,     1},
  {"XGBoosterUnserializeFromBuffer_R", (DL_FUNC) &XGBoosterUnserializeFromBuffer_R, 2},
  {"XGBoosterPredictFromDMatrix_R", (DL_FUNC) &XGBoosterPredictFromDMatrix_R, 3},
+  {"XGBoosterPredictFromDense_R", (DL_FUNC) &XGBoosterPredictFromDense_R, 5},
+  {"XGBoosterPredictFromCSR_R",   (DL_FUNC) &XGBoosterPredictFromCSR_R,   5},
+  {"XGBoosterPredictFromColumnar_R", (DL_FUNC) &XGBoosterPredictFromColumnar_R, 5},
  {"XGBoosterSaveModel_R",        (DL_FUNC) &XGBoosterSaveModel_R,        2},
  {"XGBoosterSetAttr_R",          (DL_FUNC) &XGBoosterSetAttr_R,          3},
  {"XGBoosterSetParam_R",         (DL_FUNC) &XGBoosterSetParam_R,         3},
@ -105,7 +112,7 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGSetArrayDimNamesInplace_R", (DL_FUNC) &XGSetArrayDimNamesInplace_R, 2},
  {"XGDMatrixCreateFromCSC_R",    (DL_FUNC) &XGDMatrixCreateFromCSC_R,    6},
  {"XGDMatrixCreateFromCSR_R",    (DL_FUNC) &XGDMatrixCreateFromCSR_R,    6},
-  {"XGDMatrixCreateFromFile_R",   (DL_FUNC) &XGDMatrixCreateFromFile_R,   2},
+  {"XGDMatrixCreateFromURI_R",    (DL_FUNC) &XGDMatrixCreateFromURI_R,    3},
  {"XGDMatrixCreateFromMat_R",    (DL_FUNC) &XGDMatrixCreateFromMat_R,    3},
  {"XGDMatrixGetFloatInfo_R",     (DL_FUNC) &XGDMatrixGetFloatInfo_R,     2},
  {"XGDMatrixGetUIntInfo_R",      (DL_FUNC) &XGDMatrixGetUIntInfo_R,      2},
@ -132,6 +139,7 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBGetGlobalConfig_R",        (DL_FUNC) &XGBGetGlobalConfig_R,        0},
  {"XGBoosterFeatureScore_R",     (DL_FUNC) &XGBoosterFeatureScore_R,     2},
  {"XGBoosterSlice_R",            (DL_FUNC) &XGBoosterSlice_R,            4},
+  {"XGBoosterSliceAndReplace_R",  (DL_FUNC) &XGBoosterSliceAndReplace_R,  4},
  {NULL, NULL, 0}
 };

--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@ -13,6 +13,7 @@
 #include <cstdint>
 #include <cstdio>
 #include <cstring>
+#include <memory>
 #include <limits>
 #include <sstream>
 #include <string>
@ -207,25 +208,24 @@ SEXP SafeAllocInteger(size_t size, SEXP continuation_token) {
  return xgboost::Json::Dump(jinterface);
 }

-[[nodiscard]] std::string MakeJsonConfigForArray(SEXP missing, SEXP n_threads, SEXPTYPE arr_type) {
-  using namespace ::xgboost;  // NOLINT
-  Json jconfig{Object{}};
-
-  const SEXPTYPE missing_type = TYPEOF(missing);
-  if (Rf_isNull(missing) || (missing_type == REALSXP && ISNAN(Rf_asReal(missing))) ||
-      (missing_type == LGLSXP && Rf_asLogical(missing) == R_NaInt) ||
-      (missing_type == INTSXP && Rf_asInteger(missing) == R_NaInt)) {
+void AddMissingToJson(xgboost::Json *jconfig, SEXP missing, SEXPTYPE arr_type) {
+  if (Rf_isNull(missing) || ISNAN(Rf_asReal(missing))) {
    // missing is not specified
    if (arr_type == REALSXP) {
-      jconfig["missing"] = std::numeric_limits<double>::quiet_NaN();
+      (*jconfig)["missing"] = std::numeric_limits<double>::quiet_NaN();
    } else {
-      jconfig["missing"] = R_NaInt;
+      (*jconfig)["missing"] = R_NaInt;
    }
  } else {
    // missing specified
-    jconfig["missing"] = Rf_asReal(missing);
+    (*jconfig)["missing"] = Rf_asReal(missing);
  }
+}

+[[nodiscard]] std::string MakeJsonConfigForArray(SEXP missing, SEXP n_threads, SEXPTYPE arr_type) {
+  using namespace ::xgboost;  // NOLINT
+  Json jconfig{Object{}};
+  AddMissingToJson(&jconfig, missing, arr_type);
  jconfig["nthread"] = Rf_asInteger(n_threads);
  return Json::Dump(jconfig);
 }
@ -365,15 +365,22 @@ XGB_DLL SEXP XGBGetGlobalConfig_R() {
  return mkString(json_str);
 }

-XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+XGB_DLL SEXP XGDMatrixCreateFromURI_R(SEXP uri, SEXP silent, SEXP data_split_mode) {
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP uri_char = Rf_protect(Rf_asChar(uri));
+  const char *uri_ptr = CHAR(uri_char);
  R_API_BEGIN();
+  xgboost::Json jconfig{xgboost::Object{}};
+  jconfig["uri"] = std::string(uri_ptr);
+  jconfig["silent"] = Rf_asLogical(silent);
+  jconfig["data_split_mode"] = Rf_asInteger(data_split_mode);
+  const std::string sconfig = xgboost::Json::Dump(jconfig);
  DMatrixHandle handle;
-  CHECK_CALL(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
+  CHECK_CALL(XGDMatrixCreateFromURI(sconfig.c_str(), &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
  R_API_END();
-  UNPROTECT(1);
+  Rf_unprotect(2);
  return ret;
 }

@ -404,7 +411,7 @@ XGB_DLL SEXP XGDMatrixCreateFromDF_R(SEXP df, SEXP missing, SEXP n_threads) {
  DMatrixHandle handle;
  std::int32_t rc{0};
  {
-    std::string sinterface = MakeArrayInterfaceFromRDataFrame(df);
+    const std::string sinterface = MakeArrayInterfaceFromRDataFrame(df);
    xgboost::Json jconfig{xgboost::Object{}};
    jconfig["missing"] = asReal(missing);
    jconfig["nthread"] = asInteger(n_threads);
@ -456,7 +463,7 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP
    Json jconfig{Object{}};
    // Construct configuration
    jconfig["nthread"] = Integer{threads};
-    jconfig["missing"] = xgboost::Number{asReal(missing)};
+    AddMissingToJson(&jconfig, missing, TYPEOF(data));
    std::string config;
    Json::Dump(jconfig, &config);
    res_code = XGDMatrixCreateFromCSC(sindptr.c_str(), sindices.c_str(), sdata.c_str(), nrow,
@ -491,7 +498,7 @@ XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP
    Json jconfig{Object{}};
    // Construct configuration
    jconfig["nthread"] = Integer{threads};
-    jconfig["missing"] = xgboost::Number{asReal(missing)};
+    AddMissingToJson(&jconfig, missing, TYPEOF(data));
    std::string config;
    Json::Dump(jconfig, &config);
    res_code = XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), ncol,
@ -1240,7 +1247,60 @@ XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evn
  return mkString(ret);
 }

-XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config)  {
+namespace {
+
+struct ProxyDmatrixError : public std::exception {};
+
+struct ProxyDmatrixWrapper {
+  DMatrixHandle proxy_dmat_handle;
+
+  ProxyDmatrixWrapper() {
+    int res_code = XGProxyDMatrixCreate(&this->proxy_dmat_handle);
+    if (res_code != 0) {
+      throw ProxyDmatrixError();
+    }
+  }
+
+  ~ProxyDmatrixWrapper() {
+    if (this->proxy_dmat_handle) {
+      XGDMatrixFree(this->proxy_dmat_handle);
+      this->proxy_dmat_handle = nullptr;
+    }
+  }
+
+  DMatrixHandle get_handle() {
+    return this->proxy_dmat_handle;
+  }
+};
+
+std::unique_ptr<ProxyDmatrixWrapper> GetProxyDMatrixWithBaseMargin(SEXP base_margin) {
+  if (Rf_isNull(base_margin)) {
+    return std::unique_ptr<ProxyDmatrixWrapper>(nullptr);
+  }
+
+  SEXP base_margin_dim = Rf_getAttrib(base_margin, R_DimSymbol);
+  int res_code;
+  try {
+    const std::string array_str = Rf_isNull(base_margin_dim)?
+      MakeArrayInterfaceFromRVector(base_margin) : MakeArrayInterfaceFromRMat(base_margin);
+    std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat(new ProxyDmatrixWrapper());
+    res_code = XGDMatrixSetInfoFromInterface(proxy_dmat->get_handle(),
+                                             "base_margin",
+                                             array_str.c_str());
+    if (res_code != 0) {
+      throw ProxyDmatrixError();
+    }
+    return proxy_dmat;
+  } catch(ProxyDmatrixError &err) {
+    Rf_error("%s", XGBGetLastError());
+  }
+}
+
+enum class PredictionInputType {DMatrix, DenseMatrix, CSRMatrix, DataFrame};
+
+SEXP XGBoosterPredictGeneric(SEXP handle, SEXP input_data, SEXP json_config,
+                                    PredictionInputType input_type, SEXP missing,
+                                    SEXP base_margin) {
  SEXP r_out_shape;
  SEXP r_out_result;
  SEXP r_out = PROTECT(allocVector(VECSXP, 2));
@ -1252,9 +1312,79 @@ XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_con
  bst_ulong out_dim;
  bst_ulong const *out_shape;
  float const *out_result;
-  CHECK_CALL(XGBoosterPredictFromDMatrix(R_ExternalPtrAddr(handle),
-                                         R_ExternalPtrAddr(dmat), c_json_config,
-                                         &out_shape, &out_dim, &out_result));
+
+  int res_code;
+  {
+    switch (input_type) {
+      case PredictionInputType::DMatrix: {
+        res_code = XGBoosterPredictFromDMatrix(R_ExternalPtrAddr(handle),
+                                               R_ExternalPtrAddr(input_data), c_json_config,
+                                               &out_shape, &out_dim, &out_result);
+        break;
+      }
+
+      case PredictionInputType::CSRMatrix: {
+        std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat = GetProxyDMatrixWithBaseMargin(
+          base_margin);
+        DMatrixHandle proxy_dmat_handle = proxy_dmat.get()? proxy_dmat->get_handle() : nullptr;
+
+        SEXP indptr = VECTOR_ELT(input_data, 0);
+        SEXP indices = VECTOR_ELT(input_data, 1);
+        SEXP data = VECTOR_ELT(input_data, 2);
+        const int ncol_csr = Rf_asInteger(VECTOR_ELT(input_data, 3));
+        const SEXPTYPE type_data = TYPEOF(data);
+        CHECK_EQ(type_data, REALSXP);
+        std::string sindptr, sindices, sdata;
+        CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
+
+        xgboost::StringView json_str(c_json_config);
+        xgboost::Json new_json = xgboost::Json::Load(json_str);
+        AddMissingToJson(&new_json, missing, type_data);
+        const std::string new_c_json = xgboost::Json::Dump(new_json);
+
+        res_code = XGBoosterPredictFromCSR(
+          R_ExternalPtrAddr(handle), sindptr.c_str(), sindices.c_str(), sdata.c_str(),
+          ncol_csr, new_c_json.c_str(), proxy_dmat_handle, &out_shape, &out_dim, &out_result);
+        break;
+      }
+
+      case PredictionInputType::DenseMatrix: {
+        std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat = GetProxyDMatrixWithBaseMargin(
+          base_margin);
+        DMatrixHandle proxy_dmat_handle = proxy_dmat.get()? proxy_dmat->get_handle() : nullptr;
+        const std::string array_str = MakeArrayInterfaceFromRMat(input_data);
+
+        xgboost::StringView json_str(c_json_config);
+        xgboost::Json new_json = xgboost::Json::Load(json_str);
+        AddMissingToJson(&new_json, missing, TYPEOF(input_data));
+        const std::string new_c_json = xgboost::Json::Dump(new_json);
+
+        res_code = XGBoosterPredictFromDense(
+          R_ExternalPtrAddr(handle), array_str.c_str(), new_c_json.c_str(),
+          proxy_dmat_handle, &out_shape, &out_dim, &out_result);
+        break;
+      }
+
+      case PredictionInputType::DataFrame: {
+        std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat = GetProxyDMatrixWithBaseMargin(
+          base_margin);
+        DMatrixHandle proxy_dmat_handle = proxy_dmat.get()? proxy_dmat->get_handle() : nullptr;
+
+        const std::string df_str = MakeArrayInterfaceFromRDataFrame(input_data);
+
+        xgboost::StringView json_str(c_json_config);
+        xgboost::Json new_json = xgboost::Json::Load(json_str);
+        AddMissingToJson(&new_json, missing, REALSXP);
+        const std::string new_c_json = xgboost::Json::Dump(new_json);
+
+        res_code = XGBoosterPredictFromColumnar(
+          R_ExternalPtrAddr(handle), df_str.c_str(), new_c_json.c_str(),
+          proxy_dmat_handle, &out_shape, &out_dim, &out_result);
+        break;
+      }
+    }
+  }
+  CHECK_CALL(res_code);

  r_out_shape = PROTECT(allocVector(INTSXP, out_dim));
  size_t len = 1;
@ -1275,6 +1405,31 @@ XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_con
  return r_out;
 }

+}  // namespace
+
+XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config)  {
+  return XGBoosterPredictGeneric(handle, dmat, json_config,
+                                 PredictionInputType::DMatrix, R_NilValue, R_NilValue);
+}
+
+XGB_DLL SEXP XGBoosterPredictFromDense_R(SEXP handle, SEXP R_mat, SEXP missing,
+                                         SEXP json_config, SEXP base_margin) {
+  return XGBoosterPredictGeneric(handle, R_mat, json_config,
+                                 PredictionInputType::DenseMatrix, missing, base_margin);
+}
+
+XGB_DLL SEXP XGBoosterPredictFromCSR_R(SEXP handle, SEXP lst, SEXP missing,
+                                       SEXP json_config, SEXP base_margin) {
+  return XGBoosterPredictGeneric(handle, lst, json_config,
+                                 PredictionInputType::CSRMatrix, missing, base_margin);
+}
+
+XGB_DLL SEXP XGBoosterPredictFromColumnar_R(SEXP handle, SEXP R_df, SEXP missing,
+                                            SEXP json_config, SEXP base_margin) {
+  return XGBoosterPredictGeneric(handle, R_df, json_config,
+                                 PredictionInputType::DataFrame, missing, base_margin);
+}
+
 XGB_DLL SEXP XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
  R_API_BEGIN();
  CHECK_CALL(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
@ -1519,3 +1674,18 @@ XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEX
  Rf_unprotect(1);
  return out;
 }
+
+XGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {
+  R_API_BEGIN();
+  BoosterHandle old_handle = R_ExternalPtrAddr(handle);
+  BoosterHandle new_handle = nullptr;
+  CHECK_CALL(XGBoosterSlice(old_handle,
+                            Rf_asInteger(begin_layer),
+                            Rf_asInteger(end_layer),
+                            Rf_asInteger(step),
+                            &new_handle));
+  R_SetExternalPtrAddr(handle, new_handle);
+  CHECK_CALL(XGBoosterFree(old_handle));
+  R_API_END();
+  return R_NilValue;
+}
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@ -53,12 +53,13 @@ XGB_DLL SEXP XGBSetGlobalConfig_R(SEXP json_str);
 XGB_DLL SEXP XGBGetGlobalConfig_R();

 /*!
- * \brief load a data matrix
- * \param fname name of the content
+ * \brief load a data matrix from URI
+ * \param uri URI to the source file to read data from
 * \param silent whether print messages
+ * \param Data split mode (0=rows, 1=columns)
 * \return a loaded data matrix
 */
-XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
+XGB_DLL SEXP XGDMatrixCreateFromURI_R(SEXP uri, SEXP silent, SEXP data_split_mode);

 /*!
 * \brief create matrix content from dense matrix
@ -370,6 +371,50 @@ XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evn
 * \return A list containing 2 vectors, first one for shape while second one for prediction result.
 */
 XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config);
+
+/*!
+ * \brief Run prediction on R dense matrix
+ * \param handle handle
+ * \param R_mat R matrix
+ * \param missing missing value
+ * \param json_config See `XGBoosterPredictFromDense` in xgboost c_api.h. Doesn't include 'missing'
+ * \param base_margin base margin for the prediction
+ *
+ * \return A list containing 2 vectors, first one for shape while second one for prediction result.
+ */
+XGB_DLL SEXP XGBoosterPredictFromDense_R(SEXP handle, SEXP R_mat, SEXP missing,
+                                         SEXP json_config, SEXP base_margin);
+
+/*!
+ * \brief Run prediction on R CSR matrix
+ * \param handle handle
+ * \param lst An R list, containing, in this order:
+ *              (a) 'p' array (a.k.a. indptr)
+ *              (b) 'j' array (a.k.a. indices)
+ *              (c) 'x' array (a.k.a. data / values)
+ *              (d) number of columns
+ * \param missing missing value
+ * \param json_config See `XGBoosterPredictFromCSR` in xgboost c_api.h. Doesn't include 'missing'
+ * \param base_margin base margin for the prediction
+ *
+ * \return A list containing 2 vectors, first one for shape while second one for prediction result.
+ */
+XGB_DLL SEXP XGBoosterPredictFromCSR_R(SEXP handle, SEXP lst, SEXP missing,
+                                       SEXP json_config, SEXP base_margin);
+
+/*!
+ * \brief Run prediction on R data.frame
+ * \param handle handle
+ * \param R_df R data.frame
+ * \param missing missing value
+ * \param json_config See `XGBoosterPredictFromDense` in xgboost c_api.h. Doesn't include 'missing'
+ * \param base_margin base margin for the prediction
+ *
+ * \return A list containing 2 vectors, first one for shape while second one for prediction result.
+ */
+XGB_DLL SEXP XGBoosterPredictFromColumnar_R(SEXP handle, SEXP R_df, SEXP missing,
+                                            SEXP json_config, SEXP base_margin);
+
 /*!
 * \brief load model from existing file
 * \param handle handle
@ -490,4 +535,14 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);
 */
 XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);

+/*!
+ * \brief Slice a fitted booster model (by rounds), and replace its handle with the result
+ * \param handle handle to the fitted booster
+ * \param begin_layer start of the slice
+ * \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round
+ * \param step step size of the slice
+ * \return NULL
+ */
+XGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);
+
 #endif  // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
--- a/R-package/src/xgboost_custom.cc
+++ b/R-package/src/xgboost_custom.cc
@ -41,16 +41,6 @@ double LogGamma(double v) {
  return lgammafn(v);
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
-// customize random engine.
-void CustomGlobalRandomEngine::seed(CustomGlobalRandomEngine::result_type val) {
-  // ignore the seed
-}

-// use R's PRNG to replacd
-CustomGlobalRandomEngine::result_type
-CustomGlobalRandomEngine::operator()() {
-  return static_cast<result_type>(
-      std::floor(unif_rand() * CustomGlobalRandomEngine::max()));
-}
 }  // namespace common
 }  // namespace xgboost
--- a/R-package/tests/helper_scripts/install_deps.R
+++ b/R-package/tests/helper_scripts/install_deps.R
@ -20,6 +20,7 @@ pkgs <- c(
  "igraph",
  "float",
  "titanic",
+  "RhpcBLASctl",
  ## imports
  "Matrix",
  "methods",
--- a/R-package/tests/testthat.R
+++ b/R-package/tests/testthat.R
@ -1,4 +1,6 @@
 library(testthat)
 library(xgboost)
+library(Matrix)

 test_check("xgboost", reporter = ProgressReporter)
+RhpcBLASctl::omp_set_num_threads(1)
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -20,7 +20,7 @@ test_that("train and predict binary classification", {
      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
      eta = 1, nthread = n_threads, nrounds = nrounds,
      objective = "binary:logistic", eval_metric = "error",
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
    ),
    "train-error"
  )
@ -139,8 +139,8 @@ test_that("dart prediction works", {
  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, iterationrange = c(1, nrounds))
  pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)

-  expect_true(all(matrix(pred_by_train_0, byrow = TRUE) == matrix(pred_by_xgboost_0, byrow = TRUE)))
-  expect_true(all(matrix(pred_by_train_1, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))
+  expect_equal(pred_by_train_0, pred_by_xgboost_0, tolerance = 1e-6)
+  expect_equal(pred_by_train_1, pred_by_xgboost_1, tolerance = 1e-6)
  expect_true(all(matrix(pred_by_train_2, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
 })

@ -152,7 +152,7 @@ test_that("train and predict softprob", {
      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
      max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
      objective = "multi:softprob", num_class = 3, eval_metric = "merror",
-      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
    ),
    "train-merror"
  )
@ -203,7 +203,7 @@ test_that("train and predict softmax", {
      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
      max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
      objective = "multi:softmax", num_class = 3, eval_metric = "merror",
-      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
    ),
    "train-merror"
  )
@ -226,7 +226,7 @@ test_that("train and predict RF", {
    nthread = n_threads,
    nrounds = 1, objective = "binary:logistic", eval_metric = "error",
    num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1,
-    watchlist = list(train = xgb.DMatrix(train$data, label = lb))
+    evals = list(train = xgb.DMatrix(train$data, label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 1)

@ -250,7 +250,7 @@ test_that("train and predict RF with softprob", {
    objective = "multi:softprob", eval_metric = "merror",
    num_class = 3, verbose = 0,
    num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5,
-    watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+    evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 15)
  # predict for all iterations:
@ -271,7 +271,7 @@ test_that("use of multiple eval metrics works", {
      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
      eval_metric = "error", eval_metric = "auc", eval_metric = "logloss",
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
    ),
    "train-error.*train-auc.*train-logloss"
  )
@ -283,7 +283,7 @@ test_that("use of multiple eval metrics works", {
      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
      eval_metric = list("error", "auc", "logloss"),
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
    ),
    "train-error.*train-auc.*train-logloss"
  )
@ -295,19 +295,19 @@ test_that("use of multiple eval metrics works", {

 test_that("training continuation works", {
  dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
  param <- list(
    objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
  )

  # for the reference, use 4 iterations at once:
  set.seed(11)
-  bst <- xgb.train(param, dtrain, nrounds = 4, watchlist, verbose = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 4, evals = evals, verbose = 0)
  # first two iterations:
  set.seed(11)
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
  # continue for two more:
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = bst1)
  if (!windows_flag && !solaris_flag) {
    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
  }
@ -315,7 +315,7 @@ test_that("training continuation works", {
  expect_equal(dim(attributes(bst2)$evaluation_log), c(4, 2))
  expect_equal(attributes(bst2)$evaluation_log, attributes(bst)$evaluation_log)
  # test continuing from raw model data
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
  if (!windows_flag && !solaris_flag) {
    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
  }
@ -323,7 +323,7 @@ test_that("training continuation works", {
  # test continuing from a model in file
  fname <- file.path(tempdir(), "xgboost.json")
  xgb.save(bst1, fname)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = fname)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = fname)
  if (!windows_flag && !solaris_flag) {
    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
  }
@ -348,7 +348,6 @@ test_that("xgb.cv works", {
  expect_false(is.null(cv$folds) && is.list(cv$folds))
  expect_length(cv$folds, 5)
  expect_false(is.null(cv$params) && is.list(cv$params))
-  expect_false(is.null(cv$callbacks))
  expect_false(is.null(cv$call))
 })

@ -418,7 +417,7 @@ test_that("max_delta_step works", {
  dtrain <- xgb.DMatrix(
    agaricus.train$data, label = agaricus.train$label, nthread = n_threads
  )
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
  param <- list(
    objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
    nthread = n_threads,
@ -426,9 +425,9 @@ test_that("max_delta_step works", {
  )
  nrounds <- 5
  # model with no restriction on max_delta_step
-  bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
+  bst1 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1)
  # model with restricted max_delta_step
-  bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
+  bst2 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1, max_delta_step = 1)
  # the no-restriction model is expected to have consistently lower loss during the initial iterations
  expect_true(all(attributes(bst1)$evaluation_log$train_logloss < attributes(bst2)$evaluation_log$train_logloss))
  expect_lt(mean(attributes(bst1)$evaluation_log$train_logloss) / mean(attributes(bst2)$evaluation_log$train_logloss), 0.8)
@ -445,7 +444,7 @@ test_that("colsample_bytree works", {
  colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
  dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
  dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
-  watchlist <- list(train = dtrain, eval = dtest)
+  evals <- list(train = dtrain, eval = dtest)
  ## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
  ## each tree
  param <- list(
@ -454,7 +453,7 @@ test_that("colsample_bytree works", {
    eval_metric = "auc"
  )
  set.seed(2)
-  bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 100, evals = evals, verbose = 0)
  xgb.importance(model = bst)
  # If colsample_bytree works properly, a variety of features should be used
  # in the 100 trees
@ -651,6 +650,51 @@ test_that("Can use ranking objectives with either 'qid' or 'group'", {
  expect_equal(pred_qid, pred_gr)
 })

+test_that("Can predict on data.frame objects", {
+  data("mtcars")
+  y <- mtcars$mpg
+  x_df <- mtcars[, -1]
+  x_mat <- as.matrix(x_df)
+  dm <- xgb.DMatrix(x_mat, label = y, nthread = n_threads)
+  model <- xgb.train(
+    params = list(
+      tree_method = "hist",
+      objective = "reg:squarederror",
+      nthread = n_threads
+    ),
+    data = dm,
+    nrounds = 5
+  )
+
+  pred_mat <- predict(model, xgb.DMatrix(x_mat), nthread = n_threads)
+  pred_df <- predict(model, x_df, nthread = n_threads)
+  expect_equal(pred_mat, pred_df)
+})
+
+test_that("'base_margin' gives the same result in DMatrix as in inplace_predict", {
+  data("mtcars")
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = y, nthread = n_threads)
+  model <- xgb.train(
+    params = list(
+      tree_method = "hist",
+      objective = "reg:squarederror",
+      nthread = n_threads
+    ),
+    data = dm,
+    nrounds = 5
+  )
+
+  set.seed(123)
+  base_margin <- rnorm(nrow(x))
+  dm_w_base <- xgb.DMatrix(data = x, base_margin = base_margin)
+  pred_from_dm <- predict(model, dm_w_base)
+  pred_from_mat <- predict(model, x, base_margin = base_margin)
+
+  expect_equal(pred_from_dm, pred_from_mat)
+})
+
 test_that("Coefficients from gblinear have the expected shape and names", {
  # Single-column coefficients
  data(mtcars)
@ -778,3 +822,66 @@ test_that("DMatrix field are set to booster when training", {
  expect_equal(getinfo(model_feature_types, "feature_type"), c("q", "c", "q"))
  expect_equal(getinfo(model_both, "feature_type"), c("q", "c", "q"))
 })
+
+test_that("Seed in params override PRNG from R", {
+  set.seed(123)
+  model1 <- xgb.train(
+    data = xgb.DMatrix(
+      agaricus.train$data,
+      label = agaricus.train$label, nthread = 1L
+    ),
+    params = list(
+      objective = "binary:logistic",
+      max_depth = 3L,
+      subsample = 0.1,
+      colsample_bytree = 0.1,
+      seed = 111L
+    ),
+    nrounds = 3L
+  )
+
+  set.seed(456)
+  model2 <- xgb.train(
+    data = xgb.DMatrix(
+      agaricus.train$data,
+      label = agaricus.train$label, nthread = 1L
+    ),
+    params = list(
+      objective = "binary:logistic",
+      max_depth = 3L,
+      subsample = 0.1,
+      colsample_bytree = 0.1,
+      seed = 111L
+    ),
+    nrounds = 3L
+  )
+
+  expect_equal(
+    xgb.save.raw(model1, raw_format = "json"),
+    xgb.save.raw(model2, raw_format = "json")
+  )
+
+  set.seed(123)
+  model3 <- xgb.train(
+    data = xgb.DMatrix(
+      agaricus.train$data,
+      label = agaricus.train$label, nthread = 1L
+    ),
+    params = list(
+      objective = "binary:logistic",
+      max_depth = 3L,
+      subsample = 0.1,
+      colsample_bytree = 0.1,
+      seed = 222L
+    ),
+    nrounds = 3L
+  )
+  expect_false(
+    isTRUE(
+      all.equal(
+        xgb.save.raw(model1, raw_format = "json"),
+        xgb.save.raw(model3, raw_format = "json")
+      )
+    )
+  )
+})
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@ -19,7 +19,7 @@ ltrain <- add.noise(train$label, 0.2)
 ltest <- add.noise(test$label, 0.2)
 dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
 dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
-watchlist <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)


 err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
@ -28,79 +28,125 @@ param <- list(objective = "binary:logistic", eval_metric = "error",
              max_depth = 2, nthread = n_threads)


-test_that("cb.print.evaluation works as expected", {
+test_that("xgb.cb.print.evaluation works as expected for xgb.train", {
+  logs1 <- capture.output({
+    model <- xgb.train(
+      data = dtrain,
+      params = list(
+        objective = "binary:logistic",
+        eval_metric = "auc",
+        max_depth = 2,
+        nthread = n_threads
+      ),
+      nrounds = 10,
+      evals = list(train = dtrain, test = dtest),
+      callbacks = list(xgb.cb.print.evaluation(period = 1))
+    )
+  })
+  expect_equal(length(logs1), 10)
+  expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+\ttest-auc:0\\.\\d+\\s*$", logs1)))
+  lapply(seq(1, 10), function(x) expect_true(grepl(paste0("^\\[", x), logs1[x])))

-  bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8)
-  bst_evaluation_err <- NULL
-  begin_iteration <- 1
-  end_iteration <- 7
-
-  f0 <- cb.print.evaluation(period = 0)
-  f1 <- cb.print.evaluation(period = 1)
-  f5 <- cb.print.evaluation(period = 5)
-
-  expect_false(is.null(attr(f1, 'call')))
-  expect_equal(attr(f1, 'name'), 'cb.print.evaluation')
-
-  iteration <- 1
-  expect_silent(f0())
-  expect_output(f1(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-  expect_output(f5(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-  expect_null(f1())
-
-  iteration <- 2
-  expect_output(f1(), "\\[2\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-  expect_silent(f5())
-
-  iteration <- 7
-  expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-  expect_output(f5(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-
-  bst_evaluation_err  <- c('train-auc' = 0.1, 'test-auc' = 0.2)
-  expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000±0.100000\ttest-auc:0.800000±0.200000")
+  logs2 <- capture.output({
+    model <- xgb.train(
+      data = dtrain,
+      params = list(
+        objective = "binary:logistic",
+        eval_metric = "auc",
+        max_depth = 2,
+        nthread = n_threads
+      ),
+      nrounds = 10,
+      evals = list(train = dtrain, test = dtest),
+      callbacks = list(xgb.cb.print.evaluation(period = 2))
+    )
+  })
+  expect_equal(length(logs2), 6)
+  expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+\ttest-auc:0\\.\\d+\\s*$", logs2)))
+  seq_matches <- c(seq(1, 10, 2), 10)
+  lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0("^\\[", seq_matches[x]), logs2[x])))
 })

-test_that("cb.evaluation.log works as expected", {
+test_that("xgb.cb.print.evaluation works as expected for xgb.cv", {
+  logs1 <- capture.output({
+    model <- xgb.cv(
+      data = dtrain,
+      params = list(
+        objective = "binary:logistic",
+        eval_metric = "auc",
+        max_depth = 2,
+        nthread = n_threads
+      ),
+      nrounds = 10,
+      nfold = 3,
+      callbacks = list(xgb.cb.print.evaluation(period = 1, showsd = TRUE))
+    )
+  })
+  expect_equal(length(logs1), 10)
+  expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+±0\\.\\d+\ttest-auc:0\\.\\d+±0\\.\\d+\\s*$", logs1)))
+  lapply(seq(1, 10), function(x) expect_true(grepl(paste0("^\\[", x), logs1[x])))

-  bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8)
-  bst_evaluation_err <- NULL
+  logs2 <- capture.output({
+    model <- xgb.cv(
+      data = dtrain,
+      params = list(
+        objective = "binary:logistic",
+        eval_metric = "auc",
+        max_depth = 2,
+        nthread = n_threads
+      ),
+      nrounds = 10,
+      nfold = 3,
+      callbacks = list(xgb.cb.print.evaluation(period = 2, showsd = TRUE))
+    )
+  })
+  expect_equal(length(logs2), 6)
+  expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+±0\\.\\d+\ttest-auc:0\\.\\d+±0\\.\\d+\\s*$", logs2)))
+  seq_matches <- c(seq(1, 10, 2), 10)
+  lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0("^\\[", seq_matches[x]), logs2[x])))
+})

-  evaluation_log <- list()
-  f <- cb.evaluation.log()
+test_that("xgb.cb.evaluation.log works as expected for xgb.train", {
+  model <- xgb.train(
+    data = dtrain,
+    params = list(
+      objective = "binary:logistic",
+      eval_metric = "auc",
+      max_depth = 2,
+      nthread = n_threads
+    ),
+    nrounds = 10,
+    verbose = FALSE,
+    evals = list(train = dtrain, test = dtest),
+    callbacks = list(xgb.cb.evaluation.log())
+  )
+  logs <- attributes(model)$evaluation_log

-  expect_false(is.null(attr(f, 'call')))
-  expect_equal(attr(f, 'name'), 'cb.evaluation.log')
+  expect_equal(nrow(logs), 10)
+  expect_equal(colnames(logs), c("iter", "train_auc", "test_auc"))
+})

-  iteration <- 1
-  expect_silent(f())
-  expect_equal(evaluation_log,
-               list(c(iter = 1, bst_evaluation)))
-  iteration <- 2
-  expect_silent(f())
-  expect_equal(evaluation_log,
-               list(c(iter = 1, bst_evaluation), c(iter = 2, bst_evaluation)))
-  expect_silent(f(finalize = TRUE))
-  expect_equal(evaluation_log,
-               data.table::data.table(iter = 1:2, train_auc = c(0.9, 0.9), test_auc = c(0.8, 0.8)))
+test_that("xgb.cb.evaluation.log works as expected for xgb.cv", {
+  model <- xgb.cv(
+    data = dtrain,
+    params = list(
+      objective = "binary:logistic",
+      eval_metric = "auc",
+      max_depth = 2,
+      nthread = n_threads
+    ),
+    nrounds = 10,
+    verbose = FALSE,
+    nfold = 3,
+    callbacks = list(xgb.cb.evaluation.log())
+  )
+  logs <- model$evaluation_log

-  bst_evaluation_err  <- c('train-auc' = 0.1, 'test-auc' = 0.2)
-  evaluation_log <- list()
-  f <- cb.evaluation.log()
-
-  iteration <- 1
-  expect_silent(f())
-  expect_equal(evaluation_log,
-               list(c(iter = 1, c(bst_evaluation, bst_evaluation_err))))
-  iteration <- 2
-  expect_silent(f())
-  expect_equal(evaluation_log,
-               list(c(iter = 1, c(bst_evaluation, bst_evaluation_err)),
-                    c(iter = 2, c(bst_evaluation, bst_evaluation_err))))
-  expect_silent(f(finalize = TRUE))
-  expect_equal(evaluation_log,
-               data.table::data.table(iter = 1:2,
-                          train_auc_mean = c(0.9, 0.9), train_auc_std = c(0.1, 0.1),
-                          test_auc_mean = c(0.8, 0.8), test_auc_std = c(0.2, 0.2)))
+  expect_equal(nrow(logs), 10)
+  expect_equal(
+    colnames(logs),
+    c("iter", "train_auc_mean", "train_auc_std", "test_auc_mean", "test_auc_std")
+  )
 })


@ -109,26 +155,26 @@ param <- list(objective = "binary:logistic", eval_metric = "error",

 test_that("can store evaluation_log without printing", {
  expect_silent(
-    bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, verbose = 0)
+    bst <- xgb.train(param, dtrain, nrounds = 10, evals = evals, eta = 1, verbose = 0)
  )
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_false(is.null(attributes(bst)$evaluation_log$train_error))
  expect_lt(attributes(bst)$evaluation_log[, min(train_error)], 0.2)
 })

-test_that("cb.reset.parameters works as expected", {
+test_that("xgb.cb.reset.parameters works as expected", {

  # fixed eta
  set.seed(111)
-  bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9, verbose = 0)
+  bst0 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 0.9, verbose = 0)
  expect_false(is.null(attributes(bst0)$evaluation_log))
  expect_false(is.null(attributes(bst0)$evaluation_log$train_error))

  # same eta but re-set as a vector parameter in the callback
  set.seed(111)
  my_par <- list(eta = c(0.9, 0.9))
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                    callbacks = list(cb.reset.parameters(my_par)))
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bst1)$evaluation_log$train_error))
  expect_equal(attributes(bst0)$evaluation_log$train_error,
               attributes(bst1)$evaluation_log$train_error)
@ -136,8 +182,8 @@ test_that("cb.reset.parameters works as expected", {
  # same eta but re-set via a function in the callback
  set.seed(111)
  my_par <- list(eta = function(itr, itr_end) 0.9)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                    callbacks = list(cb.reset.parameters(my_par)))
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bst2)$evaluation_log$train_error))
  expect_equal(attributes(bst0)$evaluation_log$train_error,
               attributes(bst2)$evaluation_log$train_error)
@ -145,39 +191,39 @@ test_that("cb.reset.parameters works as expected", {
  # different eta re-set as a vector parameter in the callback
  set.seed(111)
  my_par <- list(eta = c(0.6, 0.5))
-  bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                    callbacks = list(cb.reset.parameters(my_par)))
+  bst3 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bst3)$evaluation_log$train_error))
  expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error))

  # resetting multiple parameters at the same time runs with no error
  my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8))
  expect_error(
-    bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                      callbacks = list(cb.reset.parameters(my_par)))
+    bst4 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                      callbacks = list(xgb.cb.reset.parameters(my_par)))
  , NA) # NA = no error
  # CV works as well
  expect_error(
    bst4 <- xgb.cv(param, dtrain, nfold = 2, nrounds = 2, verbose = 0,
-                   callbacks = list(cb.reset.parameters(my_par)))
+                   callbacks = list(xgb.cb.reset.parameters(my_par)))
  , NA) # NA = no error

  # expect no learning with 0 learning rate
  my_par <- list(eta = c(0., 0.))
-  bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                    callbacks = list(cb.reset.parameters(my_par)))
+  bstX <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bstX)$evaluation_log$train_error))
  er <- unique(attributes(bstX)$evaluation_log$train_error)
  expect_length(er, 1)
  expect_gt(er, 0.4)
 })

-test_that("cb.save.model works as expected", {
+test_that("xgb.cb.save.model works as expected", {
  files <- c('xgboost_01.json', 'xgboost_02.json', 'xgboost.json')
  files <- unname(sapply(files, function(f) file.path(tempdir(), f)))
  for (f in files) if (file.exists(f)) file.remove(f)

-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+  bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
                   save_period = 1, save_name = file.path(tempdir(), "xgboost_%02d.json"))
  expect_true(file.exists(files[1]))
  expect_true(file.exists(files[2]))
@ -193,7 +239,7 @@ test_that("cb.save.model works as expected", {
  expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))

  # save_period = 0 saves the last iteration's model
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+  bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
                   save_period = 0, save_name = file.path(tempdir(), 'xgboost.json'))
  expect_true(file.exists(files[3]))
  b2 <- xgb.load(files[3])
@ -206,7 +252,7 @@ test_that("cb.save.model works as expected", {
 test_that("early stopping xgb.train works", {
  set.seed(11)
  expect_output(
-    bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+    bst <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
                     early_stopping_rounds = 3, maximize = FALSE)
  , "Stopping. Best iteration")
  expect_false(is.null(xgb.attr(bst, "best_iteration")))
@ -220,7 +266,7 @@ test_that("early stopping xgb.train works", {

  set.seed(11)
  expect_silent(
-    bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+    bst0 <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
                      early_stopping_rounds = 3, maximize = FALSE, verbose = 0)
  )
  expect_equal(attributes(bst)$evaluation_log, attributes(bst0)$evaluation_log)
@ -236,10 +282,10 @@ test_that("early stopping xgb.train works", {
 test_that("early stopping using a specific metric works", {
  set.seed(11)
  expect_output(
-    bst <- xgb.train(param[-2], dtrain, nrounds = 20, watchlist, eta = 0.6,
+    bst <- xgb.train(param[-2], dtrain, nrounds = 20, evals = evals, eta = 0.6,
                     eval_metric = "logloss", eval_metric = "auc",
-                     callbacks = list(cb.early.stop(stopping_rounds = 3, maximize = FALSE,
-                                                    metric_name = 'test_logloss')))
+                     callbacks = list(xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE,
+                                                        metric_name = 'test_logloss')))
  , "Stopping. Best iteration")
  expect_false(is.null(xgb.attr(bst, "best_iteration")))
  expect_lt(xgb.attr(bst, "best_iteration"), 19)
@ -269,7 +315,7 @@ test_that("early stopping works with titanic", {
    nrounds = 100,
    early_stopping_rounds = 3,
    nthread = n_threads,
-    watchlist = list(train = xgb.DMatrix(dtx, label = dty))
+    evals = list(train = xgb.DMatrix(dtx, label = dty))
  )

  expect_true(TRUE)  # should not crash
@ -281,10 +327,10 @@ test_that("early stopping xgb.cv works", {
    cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.3, nrounds = 20,
                 early_stopping_rounds = 3, maximize = FALSE)
  , "Stopping. Best iteration")
-  expect_false(is.null(cv$best_iteration))
-  expect_lt(cv$best_iteration, 19)
+  expect_false(is.null(cv$early_stop$best_iteration))
+  expect_lt(cv$early_stop$best_iteration, 19)
  # the best error is min error:
-  expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)])
+  expect_true(cv$evaluation_log[, test_error_mean[cv$early_stop$best_iteration] == min(test_error_mean)])
 })

 test_that("prediction in xgb.cv works", {
@ -292,19 +338,19 @@ test_that("prediction in xgb.cv works", {
  nrounds <- 4
  cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
  expect_false(is.null(cv$evaluation_log))
-  expect_false(is.null(cv$pred))
-  expect_length(cv$pred, nrow(train$data))
-  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))))
+  expect_false(is.null(cv$cv_predict$pred))
+  expect_length(cv$cv_predict$pred, nrow(train$data))
+  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f]))))
  err_log <- cv$evaluation_log[nrounds, test_error_mean]
  expect_equal(err_pred, err_log, tolerance = 1e-6)

  # save CV models
  set.seed(11)
  cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0,
-                callbacks = list(cb.cv.predict(save_models = TRUE)))
+                callbacks = list(xgb.cb.cv.predict(save_models = TRUE)))
  expect_equal(cv$evaluation_log, cvx$evaluation_log)
-  expect_length(cvx$models, 5)
-  expect_true(all(sapply(cvx$models, class) == 'xgb.Booster'))
+  expect_length(cvx$cv_predict$models, 5)
+  expect_true(all(sapply(cvx$cv_predict$models, class) == 'xgb.Booster'))
 })

 test_that("prediction in xgb.cv works for gblinear too", {
@ -312,8 +358,8 @@ test_that("prediction in xgb.cv works for gblinear too", {
  p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads)
  cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
  expect_false(is.null(cv$evaluation_log))
-  expect_false(is.null(cv$pred))
-  expect_length(cv$pred, nrow(train$data))
+  expect_false(is.null(cv$cv_predict$pred))
+  expect_length(cv$cv_predict$pred, nrow(train$data))
 })

 test_that("prediction in early-stopping xgb.cv works", {
@ -324,14 +370,14 @@ test_that("prediction in early-stopping xgb.cv works", {
                 prediction = TRUE, base_score = 0.5)
  , "Stopping. Best iteration")

-  expect_false(is.null(cv$best_iteration))
-  expect_lt(cv$best_iteration, 19)
+  expect_false(is.null(cv$early_stop$best_iteration))
+  expect_lt(cv$early_stop$best_iteration, 19)
  expect_false(is.null(cv$evaluation_log))
-  expect_false(is.null(cv$pred))
-  expect_length(cv$pred, nrow(train$data))
+  expect_false(is.null(cv$cv_predict$pred))
+  expect_length(cv$cv_predict$pred, nrow(train$data))

-  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))))
-  err_log <- cv$evaluation_log[cv$best_iteration, test_error_mean]
+  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f]))))
+  err_log <- cv$evaluation_log[cv$early_stop$best_iteration, test_error_mean]
  expect_equal(err_pred, err_log, tolerance = 1e-6)
  err_log_last <- cv$evaluation_log[cv$niter, test_error_mean]
  expect_gt(abs(err_pred - err_log_last), 1e-4)
@ -346,9 +392,9 @@ test_that("prediction in xgb.cv for softprob works", {
                 subsample = 0.8, gamma = 2, verbose = 0,
                 prediction = TRUE, objective = "multi:softprob", num_class = 3)
  , NA)
-  expect_false(is.null(cv$pred))
-  expect_equal(dim(cv$pred), c(nrow(iris), 3))
-  expect_lt(diff(range(rowSums(cv$pred))), 1e-6)
+  expect_false(is.null(cv$cv_predict$pred))
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(iris), 3))
+  expect_lt(diff(range(rowSums(cv$cv_predict$pred))), 1e-6)
 })

 test_that("prediction in xgb.cv works for multi-quantile", {
@ -368,7 +414,7 @@ test_that("prediction in xgb.cv works for multi-quantile", {
    prediction = TRUE,
    verbose = 0
  )
-  expect_equal(dim(cv$pred), c(nrow(x), 5))
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5))
 })

 test_that("prediction in xgb.cv works for multi-output", {
@ -389,5 +435,46 @@ test_that("prediction in xgb.cv works for multi-output", {
    prediction = TRUE,
    verbose = 0
  )
-  expect_equal(dim(cv$pred), c(nrow(x), 2))
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2))
+})
+
+test_that("prediction in xgb.cv works for multi-quantile", {
+  data(mtcars)
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = y, nthread = 1)
+  cv <- xgb.cv(
+    data = dm,
+    params = list(
+      objective = "reg:quantileerror",
+      quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),
+      nthread = 1
+    ),
+    nrounds = 5,
+    nfold = 3,
+    prediction = TRUE,
+    verbose = 0
+  )
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5))
+})
+
+test_that("prediction in xgb.cv works for multi-output", {
+  data(mtcars)
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)
+  cv <- xgb.cv(
+    data = dm,
+    params = list(
+      tree_method = "hist",
+      multi_strategy = "multi_output_tree",
+      objective = "reg:squarederror",
+      nthread = n_threads
+    ),
+    nrounds = 5,
+    nfold = 3,
+    prediction = TRUE,
+    verbose = 0
+  )
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2))
 })
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@ -12,7 +12,7 @@ dtrain <- xgb.DMatrix(
 dtest <- xgb.DMatrix(
  agaricus.test$data, label = agaricus.test$label, nthread = n_threads
 )
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)

 logregobj <- function(preds, dtrain) {
  labels <- getinfo(dtrain, "label")
@ -33,7 +33,7 @@ param <- list(max_depth = 2, eta = 1, nthread = n_threads,
 num_round <- 2

 test_that("custom objective works", {
-  bst <- xgb.train(param, dtrain, num_round, watchlist)
+  bst <- xgb.train(param, dtrain, num_round, evals)
  expect_equal(class(bst), "xgb.Booster")
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_false(is.null(attributes(bst)$evaluation_log$eval_error))
@ -48,7 +48,7 @@ test_that("custom objective in CV works", {
 })

 test_that("custom objective with early stop works", {
-  bst <- xgb.train(param, dtrain, 10, watchlist)
+  bst <- xgb.train(param, dtrain, 10, evals)
  expect_equal(class(bst), "xgb.Booster")
  train_log <- attributes(bst)$evaluation_log$train_error
  expect_true(all(diff(train_log) <= 0))
@ -66,7 +66,7 @@ test_that("custom objective using DMatrix attr works", {
    return(list(grad = grad, hess = hess))
  }
  param$objective <- logregobjattr
-  bst <- xgb.train(param, dtrain, num_round, watchlist)
+  bst <- xgb.train(param, dtrain, num_round, evals)
  expect_equal(class(bst), "xgb.Booster")
 })

--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@ -41,13 +41,13 @@ test_that("xgb.DMatrix: basic construction", {

  params <- list(tree_method = "hist", nthread = n_threads)
  bst_fd <- xgb.train(
-    params, nrounds = 8, fd, watchlist = list(train = fd)
+    params, nrounds = 8, fd, evals = list(train = fd)
  )
  bst_dgr <- xgb.train(
-    params, nrounds = 8, fdgr, watchlist = list(train = fdgr)
+    params, nrounds = 8, fdgr, evals = list(train = fdgr)
  )
  bst_dgc <- xgb.train(
-    params, nrounds = 8, fdgc, watchlist = list(train = fdgc)
+    params, nrounds = 8, fdgc, evals = list(train = fdgc)
  )

  raw_fd <- xgb.save.raw(bst_fd, raw_format = "ubj")
@ -302,6 +302,37 @@ test_that("xgb.DMatrix: Inf as missing", {
  file.remove(fname_nan)
 })

+test_that("xgb.DMatrix: missing in CSR", {
+  x_dense <- matrix(as.numeric(1:10), nrow = 5)
+  x_dense[2, 1] <- NA_real_
+
+  x_csr <- as(x_dense, "RsparseMatrix")
+
+  m_dense <- xgb.DMatrix(x_dense, nthread = n_threads, missing = NA_real_)
+  xgb.DMatrix.save(m_dense, "dense.dmatrix")
+
+  m_csr <- xgb.DMatrix(x_csr, nthread = n_threads, missing = NA)
+  xgb.DMatrix.save(m_csr, "csr.dmatrix")
+
+  denseconn <- file("dense.dmatrix", "rb")
+  csrconn <- file("csr.dmatrix", "rb")
+
+  expect_equal(file.size("dense.dmatrix"), file.size("csr.dmatrix"))
+
+  bytes <- file.size("dense.dmatrix")
+  densedmatrix <- readBin(denseconn, "raw", n = bytes)
+  csrmatrix <- readBin(csrconn, "raw", n = bytes)
+
+  expect_equal(length(densedmatrix), length(csrmatrix))
+  expect_equal(densedmatrix, csrmatrix)
+
+  close(denseconn)
+  close(csrconn)
+
+  file.remove("dense.dmatrix")
+  file.remove("csr.dmatrix")
+})
+
 test_that("xgb.DMatrix: error on three-dimensional array", {
  set.seed(123)
  x <- matrix(rnorm(500), nrow = 50)
@ -692,3 +723,20 @@ test_that("xgb.DMatrix: quantile cuts look correct", {
    }
  )
 })
+
+test_that("xgb.DMatrix: can read CSV", {
+  txt <- paste(
+    "1,2,3",
+    "-1,3,2",
+    sep = "\n"
+  )
+  fname <- file.path(tempdir(), "data.csv")
+  writeChar(txt, fname)
+  uri <- paste0(fname, "?format=csv&label_column=0")
+  dm <- xgb.DMatrix(uri, silent = TRUE)
+  expect_equal(getinfo(dm, "label"), c(1, -1))
+  expect_equal(
+    as.matrix(xgb.get.DMatrix.data(dm)),
+    matrix(c(2, 3, 3, 2), nrow = 2, byrow = TRUE)
+  )
+})
--- a/R-package/tests/testthat/test_feature_weights.R
+++ b/R-package/tests/testthat/test_feature_weights.R
@ -25,7 +25,7 @@ test_that("training with feature weights works", {
    expect_lt(importance[1, Frequency], importance[9, Frequency])
  }

-  for (tm in c("hist", "approx", "exact")) {
+  for (tm in c("hist", "approx")) {
    test(tm)
  }
 })
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@ -14,37 +14,37 @@ test_that("gblinear works", {

  param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
                nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
-  watchlist <- list(eval = dtest, train = dtrain)
+  evals <- list(eval = dtest, train = dtrain)

  n <- 5         # iterations
  ERR_UL <- 0.005 # upper limit for the test set error
  VERB <- 0      # chatterbox switch

  param$updater <- 'shotgun'
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
  ypred <- predict(bst, dtest)
  expect_equal(length(getinfo(dtest, 'label')), 1611)
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)

-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic',
-                   callbacks = list(cb.gblinear.history()))
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic',
+                   callbacks = list(xgb.cb.gblinear.history()))
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
  h <- xgb.gblinear.history(bst)
  expect_equal(dim(h), c(n, ncol(dtrain) + 1))
  expect_is(h, "matrix")

  param$updater <- 'coord_descent'
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic')
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)

-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)

-  bst <- xgb.train(param, dtrain, 2, watchlist, verbose = VERB, feature_selector = 'greedy')
+  bst <- xgb.train(param, dtrain, 2, evals, verbose = VERB, feature_selector = 'greedy')
  expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL)

-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'thrifty',
-                   top_k = 50, callbacks = list(cb.gblinear.history(sparse = TRUE)))
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'thrifty',
+                   top_k = 50, callbacks = list(xgb.cb.gblinear.history(sparse = TRUE)))
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
  h <- xgb.gblinear.history(bst)
  expect_equal(dim(h), c(n, ncol(dtrain) + 1))
--- a/R-package/tests/testthat/test_ranking.R
+++ b/R-package/tests/testthat/test_ranking.R
@ -15,7 +15,7 @@ test_that('Test ranking with unweighted data', {

  params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
                 eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
-  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
  # Check if the metric is monotone increasing
  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
@ -39,7 +39,7 @@ test_that('Test ranking with weighted data', {
    eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
    eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
  )
-  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
  # Check if the metric is monotone increasing
  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@ -17,7 +17,7 @@ dtest <- xgb.DMatrix(
 win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8

 test_that("updating the model works", {
-  watchlist <- list(train = dtrain, test = dtest)
+  evals <- list(train = dtrain, test = dtest)

  # no-subsampling
  p1 <- list(
@ -25,19 +25,19 @@ test_that("updating the model works", {
    updater = "grow_colmaker,prune"
  )
  set.seed(11)
-  bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst1 <- xgb.train(p1, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr1 <- xgb.model.dt.tree(model = bst1)

  # with subsampling
  p2 <- modifyList(p1, list(subsample = 0.1))
  set.seed(11)
-  bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst2 <- xgb.train(p2, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr2 <- xgb.model.dt.tree(model = bst2)

  # the same no-subsampling boosting with an extra 'refresh' updater:
  p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
  set.seed(11)
-  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr1r <- xgb.model.dt.tree(model = bst1r)
  # all should be the same when no subsampling
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1r)$evaluation_log)
@ -53,7 +53,7 @@ test_that("updating the model works", {
  # the same boosting with subsampling with an extra 'refresh' updater:
  p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
  set.seed(11)
-  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr2r <- xgb.model.dt.tree(model = bst2r)
  # should be the same evaluation but different gains and larger cover
  expect_equal(attributes(bst2)$evaluation_log, attributes(bst2r)$evaluation_log)
@ -66,7 +66,7 @@ test_that("updating the model works", {
  # process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
  set.seed(123)
  p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
  tr1u <- xgb.model.dt.tree(model = bst1u)
  # all should be the same when no subsampling
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@ -79,7 +79,7 @@ test_that("updating the model works", {

  # same thing but with a serialized model
  set.seed(123)
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
  tr1u <- xgb.model.dt.tree(model = bst1u)
  # all should be the same when no subsampling
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@ -87,7 +87,7 @@ test_that("updating the model works", {

  # process type 'update' for model with subsampling, refreshing only the tree stats from training data:
  p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
+  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst2)
  tr2u <- xgb.model.dt.tree(model = bst2u)
  # should be the same evaluation but different gains and larger cover
  expect_equal(attributes(bst2)$evaluation_log, attributes(bst2u)$evaluation_log)
@ -102,7 +102,7 @@ test_that("updating the model works", {

  # process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
  p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
  tr1ut <- xgb.model.dt.tree(model = bst1ut)
  # should be the same evaluations but different gains and smaller cover (test data is smaller)
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1ut)$evaluation_log)
@ -115,18 +115,18 @@ test_that("updating works for multiclass & multitree", {
  dtr <- xgb.DMatrix(
    as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
  )
-  watchlist <- list(train = dtr)
+  evals <- list(train = dtr)
  p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
             objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
             base_score = 0)
  set.seed(121)
-  bst0 <- xgb.train(p0, dtr, 5, watchlist, verbose = 0)
+  bst0 <- xgb.train(p0, dtr, 5, evals = evals, verbose = 0)
  tr0 <- xgb.model.dt.tree(model = bst0)

  # run update process for an original model with subsampling
  p0u <- modifyList(p0, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
  bst0u <- xgb.train(p0u, dtr, nrounds = xgb.get.num.boosted.rounds(bst0),
-                     watchlist, xgb_model = bst0, verbose = 0)
+                     evals = evals, xgb_model = bst0, verbose = 0)
  tr0u <- xgb.model.dt.tree(model = bst0u)

  # should be the same evaluation but different gains and larger cover
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@ -341,10 +341,10 @@ One way to measure progress in learning of a model is to provide to **XGBoost**

 > in some way it is similar to what we have done above with the average error. The main difference is that below it was after building the model, and now it is during the construction that we measure errors.

-For the purpose of this example, we use `watchlist` parameter. It is a list of `xgb.DMatrix`, each of them tagged with a name.
+For the purpose of this example, we use the `evals` parameter. It is a list of `xgb.DMatrix` objects, each of them tagged with a name.

-```{r watchlist, message=F, warning=F}
-watchlist <- list(train = dtrain, test = dtest)
+```{r evals, message=F, warning=F}
+evals <- list(train = dtrain, test = dtest)

 bst <- xgb.train(
    data = dtrain
@ -355,7 +355,7 @@ bst <- xgb.train(
        , objective = "binary:logistic"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```

@ -367,7 +367,7 @@ If with your own dataset you have not such results, you should think about how y

 For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.

-```{r watchlist2, message=F, warning=F}
+```{r evals2, message=F, warning=F}
 bst <- xgb.train(
    data = dtrain
    , max_depth = 2
@ -379,7 +379,7 @@ bst <- xgb.train(
        , eval_metric = "logloss"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```

@ -401,7 +401,7 @@ bst <- xgb.train(
        , eval_metric = "logloss"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```

@ -430,7 +430,7 @@ bst <- xgb.train(
        , objective = "binary:logistic"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```

@ -496,6 +496,9 @@ An interesting test to see how identical our saved model is to the original one

 ```{r loadModel, message=F, warning=F}
 # load binary model to R
+# Note that the number of threads for 'xgb.load' is taken from global config,
+# can be modified like this:
+RhpcBLASctl::omp_set_num_threads(1)
 bst2 <- xgb.load(fname)
 xgb.parameters(bst2) <- list(nthread = 2)
 pred2 <- predict(bst2, test$data)
--- a/doc/R-package/index.rst
+++ b/doc/R-package/index.rst
@ -34,4 +34,5 @@ Other topics
 .. toctree::
  :maxdepth: 2
  :titlesonly:
+
  Handling of indexable elements <index_base>
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@ -118,7 +118,7 @@ Parameters for Tree Booster
  - All ``colsample_by*`` parameters have a range of (0, 1], the default value of 1, and specify the fraction of columns to be subsampled.
  - ``colsample_bytree`` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
  - ``colsample_bylevel`` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
-  - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level.
+  - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.
  - ``colsample_by*`` parameters work cumulatively. For instance,
    the combination ``{'colsample_bytree':0.5, 'colsample_bylevel':0.5,
    'colsample_bynode':0.5}`` with 64 features will leave 8 features to choose from at
@ -450,7 +450,7 @@ Specify the learning task and the corresponding learning objective. The objectiv

 * ``seed`` [default=0]

-  - Random number seed.  This parameter is ignored in R package, use `set.seed()` instead.
+  - Random number seed.  In the R package, if not specified, instead of defaulting to seed 'zero', will take a random seed through R's own RNG engine.

 * ``seed_per_iteration`` [default= ``false``]

@ -489,7 +489,7 @@ Parameters for learning to rank (``rank:ndcg``, ``rank:map``, ``rank:pairwise``)

 These are parameters specific to learning to rank task. See :doc:`Learning to Rank </tutorials/learning_to_rank>` for an in-depth explanation.

-* ``lambdarank_pair_method`` [default = ``mean``]
+* ``lambdarank_pair_method`` [default = ``topk``]

  How to construct pairs for pair-wise learning.

@ -500,7 +500,13 @@ These are parameters specific to learning to rank task. See :doc:`Learning to Ra

  It specifies the number of pairs sampled for each document when pair method is ``mean``, or the truncation level for queries when the pair method is ``topk``. For example, to train with ``ndcg@6``, set ``lambdarank_num_pair_per_sample`` to :math:`6` and ``lambdarank_pair_method`` to ``topk``.

-* ``lambdarank_unbiased`` [default = ``false``]
+* ``lambdarank_normalization`` [default = ``true``]
+
+  .. versionadded:: 2.1.0
+
+  Whether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress.
+
+*  ``lambdarank_unbiased`` [default = ``false``]

  Specify whether do we need to debias input click data.

--- a/doc/python/callbacks.rst
+++ b/doc/python/callbacks.rst
@ -36,7 +36,7 @@ inside iteration loop.  You can also pass this callback function directly into X
    # Specify which dataset and which metric should be used for early stopping.
    early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
                                            metric_name='CustomErr',
-                                            data_name='Train')
+                                            data_name='Valid')

    booster = xgb.train(
        {'objective': 'binary:logistic',
--- a/doc/python/python_intro.rst
+++ b/doc/python/python_intro.rst
@ -63,7 +63,7 @@ The input data is stored in a :py:class:`DMatrix <xgboost.DMatrix>` object. For

  .. code-block:: python

-    dtrain = xgb.DMatrix('train.svm.txt')
+    dtrain = xgb.DMatrix('train.svm.txt?format=libsvm')
    dtrain.save_binary('train.buffer')

 * Missing values can be replaced by a default value in the :py:class:`DMatrix <xgboost.DMatrix>` constructor:
@ -86,7 +86,7 @@ to number of groups.

  .. code-block:: python

-    dtrain = xgb.DMatrix('train.svm.txt')
+    dtrain = xgb.DMatrix('train.svm.txt?format=libsvm')
    dtest = xgb.DMatrix('test.svm.buffer')

  The parser in XGBoost has limited functionality. When using Python interface, it's
@ -176,7 +176,6 @@ Support Matrix
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
 | pyarrow.Table           | NPA       | NPA               | NPA       | NPA       | NPA                | NPA         |
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
 | _\_array\_\_            | NPA       | F                 | NPA       | NPA       | H                  |             |
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
 | Others                  | SciCSR    | F                 |           | F         | F                  |             |
@ -240,7 +239,7 @@ A saved model can be loaded as follows:
 .. code-block:: python

  bst = xgb.Booster({'nthread': 4})  # init model
-  bst.load_model('model.bin')  # load data
+  bst.load_model('model.bin')  # load model data

 Methods including `update` and `boost` from `xgboost.Booster` are designed for
 internal usage only.  The wrapper function `xgboost.train` does some
--- a/doc/python/sklearn_estimator.rst
+++ b/doc/python/sklearn_estimator.rst
@ -62,7 +62,7 @@ stack of trees:
 .. code-block:: python

    early_stop = xgb.callback.EarlyStopping(
-        rounds=2, metric_name='logloss', data_name='Validation_0', save_best=True
+        rounds=2, metric_name='logloss', data_name='validation_0', save_best=True
    )
    clf = xgb.XGBClassifier(tree_method="hist", callbacks=[early_stop])
    clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@ -7,7 +7,9 @@ sh
 matplotlib
 graphviz
 numpy
+scipy
 myst-parser
+ray[train]
 xgboost_ray
 sphinx-gallery
 pyspark
--- a/doc/tutorials/learning_to_rank.rst
+++ b/doc/tutorials/learning_to_rank.rst
@ -146,7 +146,8 @@ The consideration of effective pairs also applies to the choice of pair method (

 When using the mean strategy for generating pairs, where the target metric (like ``NDCG``) is computed over the whole query list, users can specify how many pairs should be generated per each document, by setting the ``lambdarank_num_pair_per_sample``. XGBoost will randomly sample ``lambdarank_num_pair_per_sample`` pairs for each element in the query group (:math:`|pairs| = |query| \times num\_pairsample`). Often, setting it to 1 can produce reasonable results. In cases where performance is inadequate due to insufficient number of effective pairs being generated, set ``lambdarank_num_pair_per_sample`` to a higher value. As more document pairs are generated, more effective pairs will be generated as well.

-On the other hand, if you are prioritizing the top :math:`k` documents, the ``lambdarank_num_pair_per_sample`` should be set slightly higher than :math:`k` (with a few more documents) to obtain a good training result.
+On the other hand, if you are prioritizing the top :math:`k` documents, the ``lambdarank_num_pair_per_sample`` should be set slightly higher than :math:`k` (with a few more documents) to obtain a good training result. Lastly, XGBoost employs additional regularization for learning to rank objectives, which can be disabled by setting the ``lambdarank_normalization`` to ``False``.
+

 **Summary** If you have large amount of training data:

--- a/doc/tutorials/spark_estimator.rst
+++ b/doc/tutorials/spark_estimator.rst
@ -28,7 +28,7 @@ We can create a ``SparkXGBRegressor`` estimator like:
 .. code-block:: python

  from xgboost.spark import SparkXGBRegressor
-  spark_reg_estimator = SparkXGBRegressor(
+  xgb_regressor = SparkXGBRegressor(
    features_col="features",
    label_col="label",
    num_workers=2,
@ -61,7 +61,7 @@ type or spark array type.

 .. code-block:: python

-  transformed_test_spark_dataframe = xgb_regressor.predict(test_spark_dataframe)
+  transformed_test_spark_dataframe = xgb_regressor_model.transform(test_spark_dataframe)


 The above snippet code returns a ``transformed_test_spark_dataframe`` that contains the input
--- a/doc/xgboost_doc.yml
+++ b/doc/xgboost_doc.yml
@ -1,15 +1,23 @@
 name: xgboost_docs
 dependencies:
-  - python
+  - python=3.10
  - pip
  - pygraphviz
  - sphinx
+  - sphinx-gallery
  - recommonmark
  - mock
  - sh
  - matplotlib
+  - numpy
+  - scipy
+  - scikit-learn
+  - myst-parser
+  - pyspark
  - pip:
    - breathe
    - sphinx_rtd_theme
    - pydot-ng
    - graphviz
+    - ray[train]
+    - xgboost_ray
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@ -37,7 +37,7 @@
 * \brief Whether to customize global PRNG.
 */
 #ifndef XGBOOST_CUSTOMIZE_GLOBAL_PRNG
-#define XGBOOST_CUSTOMIZE_GLOBAL_PRNG XGBOOST_STRICT_R_MODE
+#define XGBOOST_CUSTOMIZE_GLOBAL_PRNG 0
 #endif  // XGBOOST_CUSTOMIZE_GLOBAL_PRNG

 /*!
--- a/include/xgboost/collective/result.h
+++ b/include/xgboost/collective/result.h
@ -1,8 +1,10 @@
 /**
- *  Copyright 2023, XGBoost Contributors
+ *  Copyright 2023-2024, XGBoost Contributors
 */
 #pragma once

+#include <xgboost/logging.h>
+
 #include <memory>   // for unique_ptr
 #include <sstream>  // for stringstream
 #include <stack>    // for stack
@ -160,10 +162,16 @@ struct Result {

 // We don't have monad, a simple helper would do.
 template <typename Fn>
-Result operator<<(Result&& r, Fn&& fn) {
+[[nodiscard]] Result operator<<(Result&& r, Fn&& fn) {
  if (!r.OK()) {
    return std::forward<Result>(r);
  }
  return fn();
 }
+
+inline void SafeColl(Result const& rc) {
+  if (!rc.OK()) {
+    LOG(FATAL) << rc.Report();
+  }
+}
 }  // namespace xgboost::collective
--- a/include/xgboost/json.h
+++ b/include/xgboost/json.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2023 by XGBoost Contributors
+ * Copyright 2019-2024, XGBoost Contributors
 */
 #ifndef XGBOOST_JSON_H_
 #define XGBOOST_JSON_H_
@ -42,7 +42,8 @@ class Value {
    kBoolean,
    kNull,
    // typed array for ubjson
-    kNumberArray,
+    kF32Array,
+    kF64Array,
    kU8Array,
    kI32Array,
    kI64Array
@ -173,7 +174,11 @@ class JsonTypedArray : public Value {
 /**
 * @brief Typed UBJSON array for 32-bit floating point.
 */
-using F32Array = JsonTypedArray<float, Value::ValueKind::kNumberArray>;
+using F32Array = JsonTypedArray<float, Value::ValueKind::kF32Array>;
+/**
+ * @brief Typed UBJSON array for 64-bit floating point.
+ */
+using F64Array = JsonTypedArray<double, Value::ValueKind::kF64Array>;
 /**
 * @brief Typed UBJSON array for uint8_t.
 */
@ -457,9 +462,9 @@ class Json {
  Json& operator[](int ind)                 const { return (*ptr_)[ind]; }

  /*! \brief Return the reference to stored Json value. */
-  Value const& GetValue() const & { return *ptr_; }
-  Value const& GetValue() &&      { return *ptr_; }
-  Value&       GetValue() &       { return *ptr_; }
+  [[nodiscard]] Value const& GetValue() const& { return *ptr_; }
+  Value const& GetValue() && { return *ptr_; }
+  Value& GetValue() & { return *ptr_; }

  bool operator==(Json const& rhs) const {
    return *ptr_ == *(rhs.ptr_);
@ -472,7 +477,7 @@ class Json {
    return os;
  }

-  IntrusivePtr<Value> const& Ptr() const { return ptr_; }
+  [[nodiscard]] IntrusivePtr<Value> const& Ptr() const { return ptr_; }

 private:
  IntrusivePtr<Value> ptr_{new JsonNull};
--- a/include/xgboost/json_io.h
+++ b/include/xgboost/json_io.h
@ -142,6 +142,7 @@ class JsonWriter {

  virtual void Visit(JsonArray  const* arr);
  virtual void Visit(F32Array  const* arr);
+  virtual void Visit(F64Array const*) { LOG(FATAL) << "Only UBJSON format can handle f64 array."; }
  virtual void Visit(U8Array  const* arr);
  virtual void Visit(I32Array  const* arr);
  virtual void Visit(I64Array  const* arr);
@ -244,7 +245,8 @@ class UBJReader : public JsonReader {
 */
 class UBJWriter : public JsonWriter {
  void Visit(JsonArray const* arr) override;
-  void Visit(F32Array  const* arr) override;
+  void Visit(F32Array const* arr) override;
+  void Visit(F64Array const* arr) override;
  void Visit(U8Array  const* arr) override;
  void Visit(I32Array  const* arr) override;
  void Visit(I64Array  const* arr) override;
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@ -295,6 +295,9 @@ class TensorView {
  using ShapeT = std::size_t[kDim];
  using StrideT = ShapeT;

+  using element_type = T;                  // NOLINT
+  using value_type = std::remove_cv_t<T>;  // NOLINT
+
 private:
  StrideT stride_{1};
  ShapeT shape_{0};
@ -314,7 +317,7 @@ class TensorView {
  }

  template <size_t old_dim, size_t new_dim, int32_t D, typename I>
-  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
+  LINALG_HD size_t MakeSliceDim(std::size_t new_shape[D], std::size_t new_stride[D],
                                detail::RangeTag<I> &&range) const {
    static_assert(new_dim < D);
    static_assert(old_dim < kDim);
@ -528,9 +531,10 @@ class TensorView {
  LINALG_HD auto Stride(size_t i) const { return stride_[i]; }

  /**
-   * \brief Number of items in the tensor.
+   * @brief Number of items in the tensor.
   */
  [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
+  [[nodiscard]] bool Empty() const { return Size() == 0; }
  /**
   * \brief Whether this is a contiguous array, both C and F contiguous returns true.
   */
@ -865,7 +869,9 @@ class Tensor {
  auto HostView() { return this->View(DeviceOrd::CPU()); }
  auto HostView() const { return this->View(DeviceOrd::CPU()); }

-  [[nodiscard]] size_t Size() const { return data_.Size(); }
+  [[nodiscard]] std::size_t Size() const { return data_.Size(); }
+  [[nodiscard]] bool Empty() const { return Size() == 0; }
+
  auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
  auto Shape(size_t i) const { return shape_[i]; }

--- a/include/xgboost/span.h
+++ b/include/xgboost/span.h
@ -738,10 +738,10 @@ class IterSpan {
    return {data() + _offset, _count == dynamic_extent ? size() - _offset : _count};
  }
  [[nodiscard]] XGBOOST_DEVICE constexpr iterator begin() const noexcept {  // NOLINT
-    return {this, 0};
+    return it_;
  }
  [[nodiscard]] XGBOOST_DEVICE constexpr iterator end() const noexcept {  // NOLINT
-    return {this, size()};
+    return it_ + size();
  }
 };
 }  // namespace common
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2023 by Contributors
+ * Copyright 2014-2024, XGBoost Contributors
 * \file tree_model.h
 * \brief model structure for tree
 * \author Tianqi Chen
@ -688,6 +688,9 @@ class RegTree : public Model {
    }
    return (*this)[nidx].DefaultLeft();
  }
+  [[nodiscard]] bst_node_t DefaultChild(bst_node_t nidx) const {
+    return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);
+  }
  [[nodiscard]] bool IsRoot(bst_node_t nidx) const {
    if (IsMultiTarget()) {
      return nidx == kRoot;
--- a/jvm-packages/create_jni.py
+++ b/jvm-packages/create_jni.py
@ -83,44 +83,59 @@ def native_build(args):
    with cd(".."):
        build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'build'
        maybe_makedirs(build_dir)
+
+        if sys.platform == "linux":
+            maybe_parallel_build = " -- -j $(nproc)"
+        else:
+            maybe_parallel_build = ""
+
+        if cli_args.log_capi_invocation == "ON":
+            CONFIG["LOG_CAPI_INVOCATION"] = "ON"
+
+        if cli_args.use_cuda == "ON":
+            CONFIG["USE_CUDA"] = "ON"
+            CONFIG["USE_NCCL"] = "ON"
+            CONFIG["USE_DLOPEN_NCCL"] = "OFF"
+        elif cli_args.use_hip== 'ON':
+            CONFIG['USE_HIP'] = 'ON'
+            CONFIG['USE_RCCL'] = 'ON'
+            CONFIG["USE_DLOPEN_RCCL"] = "OFF"
+
+        args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
+
+        # if enviorment set rabit_mock
+        if os.getenv("RABIT_MOCK", None) is not None:
+            args.append("-DRABIT_MOCK:BOOL=ON")
+
+        # if enviorment set GPU_ARCH_FLAG
+        gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None)
+        if gpu_arch_flag is not None:
+            args.append("%s" % gpu_arch_flag)
+
        with cd(build_dir):
-            if sys.platform == "win32":
-                # Force x64 build on Windows.
-                maybe_generator = " -A x64"
-            else:
-                maybe_generator = ""
-            if sys.platform == "linux":
-                maybe_parallel_build = " -- -j $(nproc)"
-            else:
-                maybe_parallel_build = ""
-
-            if cli_args.log_capi_invocation == "ON":
-                CONFIG["LOG_CAPI_INVOCATION"] = "ON"
-
-            if cli_args.use_cuda == "ON":
-                CONFIG["USE_CUDA"] = "ON"
-                CONFIG["USE_NCCL"] = "ON"
-                CONFIG["USE_DLOPEN_NCCL"] = "OFF"
-            elif cli_args.use_hip== 'ON':
-                CONFIG['USE_HIP'] = 'ON'
-                CONFIG['USE_RCCL'] = 'ON'
-                CONFIG["USE_DLOPEN_RCCL"] = "OFF"
-
-            args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
-
-            # if enviorment set rabit_mock
-            if os.getenv("RABIT_MOCK", None) is not None:
-                args.append("-DRABIT_MOCK:BOOL=ON")
-
-            # if enviorment set GPU_ARCH_FLAG
-            gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None)
-            if gpu_arch_flag is not None:
-                args.append("%s" % gpu_arch_flag)
-
            lib_dir = os.path.join(os.pardir, "lib")
            if os.path.exists(lib_dir):
                shutil.rmtree(lib_dir)
-            run("cmake .. " + " ".join(args) + maybe_generator)
+
+            # Same trick as Python build, just test all possible generators.
+            if sys.platform == "win32":
+                supported_generators = (
+                    "",  # empty, decided by cmake
+                    '-G"Visual Studio 17 2022" -A x64',
+                    '-G"Visual Studio 16 2019" -A x64',
+                    '-G"Visual Studio 15 2017" -A x64',
+                )
+                for generator in supported_generators:
+                    try:
+                        run("cmake .. " + " ".join(args + [generator]))
+                        break
+                    except subprocess.CalledProcessError as e:
+                        print(f"Failed to build with generator: {generator}", e)
+                        with cd(os.path.pardir):
+                            shutil.rmtree(build_dir)
+                            maybe_makedirs(build_dir)
+            else:
+                run("cmake .. " + " ".join(args))
            run("cmake --build . --config Release" + maybe_parallel_build)

        with cd("demo/CLI/regression"):
--- a/plugin/CMakeLists.txt
+++ b/plugin/CMakeLists.txt
@ -1,10 +1,7 @@
 if(PLUGIN_SYCL)
  set(CMAKE_CXX_COMPILER "icpx")
-  add_library(plugin_sycl OBJECT
-    ${xgboost_SOURCE_DIR}/plugin/sycl/objective/regression_obj.cc
-    ${xgboost_SOURCE_DIR}/plugin/sycl/objective/multiclass_obj.cc
-    ${xgboost_SOURCE_DIR}/plugin/sycl/device_manager.cc
-    ${xgboost_SOURCE_DIR}/plugin/sycl/predictor/predictor.cc)
+  file(GLOB_RECURSE SYCL_SOURCES "sycl/*.cc")
+  add_library(plugin_sycl OBJECT ${SYCL_SOURCES})
  target_include_directories(plugin_sycl
    PRIVATE
    ${xgboost_SOURCE_DIR}/include
--- a/plugin/sycl/common/partition_builder.h
+++ b/plugin/sycl/common/partition_builder.h
@ -21,6 +21,9 @@
 #pragma GCC diagnostic pop

 #include "../data.h"
+#include "row_set.h"
+#include "../data/gradient_index.h"
+#include "../tree/expand_entry.h"

 #include <CL/sycl.hpp>

@ -28,6 +31,87 @@ namespace xgboost {
 namespace sycl {
 namespace common {

+// split row indexes (rid_span) to 2 parts (both stored in rid_buf) depending
+// on comparison of indexes values (idx_span) and split point (split_cond)
+// Handle dense columns
+template <bool default_left, typename BinIdxType>
+inline ::sycl::event PartitionDenseKernel(
+                                 ::sycl::queue* qu,
+                                 const GHistIndexMatrix& gmat,
+                                 const RowSetCollection::Elem& rid_span,
+                                 const size_t fid,
+                                 const int32_t split_cond,
+                                 xgboost::common::Span<size_t>* rid_buf,
+                                 size_t* parts_size,
+                                 ::sycl::event event) {
+  const size_t row_stride = gmat.row_stride;
+  const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
+  const size_t* rid = rid_span.begin;
+  const size_t range_size = rid_span.Size();
+  const size_t offset = gmat.cut.Ptrs()[fid];
+
+  size_t* p_rid_buf = rid_buf->data();
+
+  return qu->submit([&](::sycl::handler& cgh) {
+    cgh.depends_on(event);
+    cgh.parallel_for<>(::sycl::range<1>(range_size), [=](::sycl::item<1> nid) {
+      const size_t id = rid[nid.get_id(0)];
+      const int32_t value = static_cast<int32_t>(gradient_index[id * row_stride + fid] + offset);
+      const bool is_left = value <= split_cond;
+      if (is_left) {
+        AtomicRef<size_t> n_left(parts_size[0]);
+        p_rid_buf[n_left.fetch_add(1)] = id;
+      } else {
+        AtomicRef<size_t> n_right(parts_size[1]);
+        p_rid_buf[range_size - n_right.fetch_add(1) - 1] = id;
+      }
+    });
+  });
+}
+
+// split row indexes (rid_span) to 2 parts (both stored in rid_buf) depending
+// on comparison of indexes values (idx_span) and split point (split_cond)
+// Handle sparce columns
+template <bool default_left, typename BinIdxType>
+inline ::sycl::event PartitionSparseKernel(::sycl::queue* qu,
+                                  const GHistIndexMatrix& gmat,
+                                  const RowSetCollection::Elem& rid_span,
+                                  const size_t fid,
+                                  const int32_t split_cond,
+                                  xgboost::common::Span<size_t>* rid_buf,
+                                  size_t* parts_size,
+                                  ::sycl::event event) {
+  const size_t row_stride = gmat.row_stride;
+  const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
+  const size_t* rid = rid_span.begin;
+  const size_t range_size = rid_span.Size();
+  const uint32_t* cut_ptrs = gmat.cut_device.Ptrs().DataConst();
+
+  size_t* p_rid_buf = rid_buf->data();
+  return qu->submit([&](::sycl::handler& cgh) {
+    cgh.depends_on(event);
+    cgh.parallel_for<>(::sycl::range<1>(range_size), [=](::sycl::item<1> nid) {
+      const size_t id = rid[nid.get_id(0)];
+
+      const BinIdxType* gr_index_local = gradient_index + row_stride * id;
+      const int32_t fid_local = std::lower_bound(gr_index_local,
+                                                 gr_index_local + row_stride,
+                                                 cut_ptrs[fid]) - gr_index_local;
+      const bool is_left = (fid_local >= row_stride ||
+                            gr_index_local[fid_local] >= cut_ptrs[fid + 1]) ?
+                              default_left :
+                              gr_index_local[fid_local] <= split_cond;
+      if (is_left) {
+        AtomicRef<size_t> n_left(parts_size[0]);
+        p_rid_buf[n_left.fetch_add(1)] = id;
+      } else {
+        AtomicRef<size_t> n_right(parts_size[1]);
+        p_rid_buf[range_size - n_right.fetch_add(1) - 1] = id;
+      }
+    });
+  });
+}
+
 // The builder is required for samples partition to left and rights children for set of nodes
 class PartitionBuilder {
 public:
@ -53,7 +137,6 @@ class PartitionBuilder {
    return result_rows_[2 * nid];
  }

-
  size_t GetNRightElems(int nid) const {
    return result_rows_[2 * nid + 1];
  }
@ -72,19 +155,97 @@ class PartitionBuilder {
    return { data_.Data() + nodes_offsets_[nid], nodes_offsets_[nid + 1] - nodes_offsets_[nid] };
  }

+  template <typename BinIdxType>
+  ::sycl::event Partition(const int32_t split_cond,
+                        const GHistIndexMatrix& gmat,
+                        const RowSetCollection::Elem& rid_span,
+                        const xgboost::RegTree::Node& node,
+                        xgboost::common::Span<size_t>* rid_buf,
+                        size_t* parts_size,
+                        ::sycl::event event) {
+    const bst_uint fid = node.SplitIndex();
+    const bool default_left = node.DefaultLeft();
+
+    if (gmat.IsDense()) {
+      if (default_left) {
+        return PartitionDenseKernel<true, BinIdxType>(qu_, gmat, rid_span, fid,
+                                                      split_cond, rid_buf, parts_size, event);
+      } else {
+        return PartitionDenseKernel<false, BinIdxType>(qu_, gmat, rid_span, fid,
+                                                      split_cond, rid_buf, parts_size, event);
+      }
+    } else {
+      if (default_left) {
+        return PartitionSparseKernel<true, BinIdxType>(qu_, gmat, rid_span, fid,
+                                                      split_cond, rid_buf, parts_size, event);
+      } else {
+        return PartitionSparseKernel<false, BinIdxType>(qu_, gmat, rid_span, fid,
+                                                        split_cond, rid_buf, parts_size, event);
+      }
+    }
+  }
+
+  // Entry point for Partition
+  void Partition(const GHistIndexMatrix& gmat,
+                 const std::vector<tree::ExpandEntry> nodes,
+                 const RowSetCollection& row_set_collection,
+                 const std::vector<int32_t>& split_conditions,
+                 RegTree* p_tree,
+                 ::sycl::event* general_event) {
+    nodes_events_.resize(n_nodes_);
+
+    parts_size_.ResizeAndFill(qu_, 2 * n_nodes_, 0, general_event);
+
+    for (size_t node_in_set = 0; node_in_set < n_nodes_; node_in_set++) {
+      const int32_t nid = nodes[node_in_set].nid;
+      ::sycl::event& node_event = nodes_events_[node_in_set];
+      const auto& rid_span = row_set_collection[nid];
+      if (rid_span.Size() > 0) {
+        const RegTree::Node& node = (*p_tree)[nid];
+        xgboost::common::Span<size_t> rid_buf = GetData(node_in_set);
+        size_t* part_size = parts_size_.Data() + 2 * node_in_set;
+        int32_t split_condition = split_conditions[node_in_set];
+        switch (gmat.index.GetBinTypeSize()) {
+          case common::BinTypeSize::kUint8BinsTypeSize:
+            node_event = Partition<uint8_t>(split_condition, gmat, rid_span, node,
+                                            &rid_buf, part_size, *general_event);
+            break;
+          case common::BinTypeSize::kUint16BinsTypeSize:
+            node_event = Partition<uint16_t>(split_condition, gmat, rid_span, node,
+                                            &rid_buf, part_size, *general_event);
+            break;
+          case common::BinTypeSize::kUint32BinsTypeSize:
+            node_event = Partition<uint32_t>(split_condition, gmat, rid_span, node,
+                                            &rid_buf, part_size, *general_event);
+            break;
+          default:
+            CHECK(false);  // no default behavior
+        }
+      } else {
+        node_event = ::sycl::event();
+      }
+    }
+
+    *general_event = qu_->memcpy(result_rows_.data(),
+                                 parts_size_.DataConst(),
+                                 sizeof(size_t) * 2 * n_nodes_,
+                                 nodes_events_);
+  }
+
  void MergeToArray(size_t nid,
                    size_t* data_result,
-                    ::sycl::event event) {
+                    ::sycl::event* event) {
    size_t n_nodes_total = GetNLeftElems(nid) + GetNRightElems(nid);
    if (n_nodes_total > 0) {
      const size_t* data = data_.Data() + nodes_offsets_[nid];
-      qu_->memcpy(data_result, data, sizeof(size_t) * n_nodes_total, event);
+      qu_->memcpy(data_result, data, sizeof(size_t) * n_nodes_total, *event);
    }
  }

 protected:
  std::vector<size_t> nodes_offsets_;
  std::vector<size_t> result_rows_;
+  std::vector<::sycl::event> nodes_events_;
  size_t n_nodes_;

  USMVector<size_t, MemoryType::on_device> parts_size_;
--- a/plugin/sycl/common/row_set.h
+++ b/plugin/sycl/common/row_set.h
@ -0,0 +1,123 @@
+/*!
+ * Copyright 2017-2023 XGBoost contributors
+ */
+#ifndef PLUGIN_SYCL_COMMON_ROW_SET_H_
+#define PLUGIN_SYCL_COMMON_ROW_SET_H_
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
+#pragma GCC diagnostic ignored "-W#pragma-messages"
+#include <xgboost/data.h>
+#pragma GCC diagnostic pop
+#include <algorithm>
+#include <vector>
+#include <utility>
+
+#include "../data.h"
+
+#include <CL/sycl.hpp>
+
+namespace xgboost {
+namespace sycl {
+namespace common {
+
+
+/*! \brief Collection of rowsets stored on device in USM memory */
+class RowSetCollection {
+ public:
+  /*! \brief data structure to store an instance set, a subset of
+   *  rows (instances) associated with a particular node in a decision
+   *  tree. */
+  struct Elem {
+    const size_t* begin{nullptr};
+    const size_t* end{nullptr};
+    bst_node_t node_id{-1};  // id of node associated with this instance set; -1 means uninitialized
+    Elem()
+         = default;
+    Elem(const size_t* begin,
+         const size_t* end,
+         bst_node_t node_id = -1)
+        : begin(begin), end(end), node_id(node_id) {}
+
+
+    inline size_t Size() const {
+      return end - begin;
+    }
+  };
+
+  inline size_t Size() const {
+    return elem_of_each_node_.size();
+  }
+
+  /*! \brief return corresponding element set given the node_id */
+  inline const Elem& operator[](unsigned node_id) const {
+    const Elem& e = elem_of_each_node_[node_id];
+    CHECK(e.begin != nullptr)
+        << "access element that is not in the set";
+    return e;
+  }
+
+  /*! \brief return corresponding element set given the node_id */
+  inline Elem& operator[](unsigned node_id) {
+    Elem& e = elem_of_each_node_[node_id];
+    return e;
+  }
+
+  // clear up things
+  inline void Clear() {
+    elem_of_each_node_.clear();
+  }
+  // initialize node id 0->everything
+  inline void Init() {
+    CHECK_EQ(elem_of_each_node_.size(), 0U);
+
+    const size_t* begin = row_indices_.Begin();
+    const size_t* end = row_indices_.End();
+    elem_of_each_node_.emplace_back(Elem(begin, end, 0));
+  }
+
+  auto& Data() { return row_indices_; }
+
+  // split rowset into two
+  inline void AddSplit(unsigned node_id,
+                       unsigned left_node_id,
+                       unsigned right_node_id,
+                       size_t n_left,
+                       size_t n_right) {
+    const Elem e = elem_of_each_node_[node_id];
+    CHECK(e.begin != nullptr);
+    size_t* all_begin = row_indices_.Begin();
+    size_t* begin = all_begin + (e.begin - all_begin);
+
+
+    CHECK_EQ(n_left + n_right, e.Size());
+    CHECK_LE(begin + n_left, e.end);
+    CHECK_EQ(begin + n_left + n_right, e.end);
+
+
+    if (left_node_id >= elem_of_each_node_.size()) {
+      elem_of_each_node_.resize(left_node_id + 1, Elem(nullptr, nullptr, -1));
+    }
+    if (right_node_id >= elem_of_each_node_.size()) {
+      elem_of_each_node_.resize(right_node_id + 1, Elem(nullptr, nullptr, -1));
+    }
+
+
+    elem_of_each_node_[left_node_id] = Elem(begin, begin + n_left, left_node_id);
+    elem_of_each_node_[right_node_id] = Elem(begin + n_left, e.end, right_node_id);
+    elem_of_each_node_[node_id] = Elem(nullptr, nullptr, -1);
+  }
+
+ private:
+  // stores the row indexes in the set
+  USMVector<size_t, MemoryType::on_device> row_indices_;
+  // vector: node_id -> elements
+  std::vector<Elem> elem_of_each_node_;
+};
+
+}  // namespace common
+}  // namespace sycl
+}  // namespace xgboost
+
+
+#endif  // PLUGIN_SYCL_COMMON_ROW_SET_H_
--- a/plugin/sycl/data.h
+++ b/plugin/sycl/data.h
@ -26,8 +26,13 @@

 namespace xgboost {
 namespace sycl {
-enum class MemoryType { shared, on_device};
+template <typename T>
+using AtomicRef = ::sycl::atomic_ref<T,
+                                    ::sycl::memory_order::relaxed,
+                                    ::sycl::memory_scope::device,
+                                    ::sycl::access::address_space::ext_intel_global_device_space>;

+enum class MemoryType { shared, on_device};

 template <typename T>
 class USMDeleter {
@ -166,20 +171,20 @@ class USMVector {
    }
  }

-  ::sycl::event ResizeAndFill(::sycl::queue* qu, size_t size_new, int v) {
+  void ResizeAndFill(::sycl::queue* qu, size_t size_new, int v, ::sycl::event* event) {
    if (size_new <= size_) {
      size_ = size_new;
-      return qu->memset(data_.get(), v, size_new * sizeof(T));
+      *event = qu->memset(data_.get(), v, size_new * sizeof(T), *event);
    } else if (size_new <= capacity_) {
      size_ = size_new;
-      return qu->memset(data_.get(), v, size_new * sizeof(T));
+      *event = qu->memset(data_.get(), v, size_new * sizeof(T), *event);
    } else {
      size_t size_old = size_;
      auto data_old = data_;
      size_ = size_new;
      capacity_ = size_new;
      data_ = allocate_memory_(qu, size_);
-      return qu->memset(data_.get(), v, size_new * sizeof(T));
+      *event = qu->memset(data_.get(), v, size_new * sizeof(T), *event);
    }
  }

@ -206,11 +211,16 @@ class USMVector {
 struct DeviceMatrix {
  DMatrix* p_mat;  // Pointer to the original matrix on the host
  ::sycl::queue qu_;
-  USMVector<size_t> row_ptr;
+  USMVector<size_t, MemoryType::on_device> row_ptr;
  USMVector<Entry, MemoryType::on_device> data;
  size_t total_offset;

-  DeviceMatrix(::sycl::queue qu, DMatrix* dmat) : p_mat(dmat), qu_(qu) {
+  DeviceMatrix() = default;
+
+  void Init(::sycl::queue qu, DMatrix* dmat) {
+    qu_ = qu;
+    p_mat = dmat;
+
    size_t num_row = 0;
    size_t num_nonzero = 0;
    for (auto &batch : dmat->GetBatches<SparsePage>()) {
@ -221,27 +231,41 @@ struct DeviceMatrix {
    }

    row_ptr.Resize(&qu_, num_row + 1);
+    size_t* rows = row_ptr.Data();
    data.Resize(&qu_, num_nonzero);

    size_t data_offset = 0;
+    ::sycl::event event;
    for (auto &batch : dmat->GetBatches<SparsePage>()) {
      const auto& data_vec = batch.data.HostVector();
      const auto& offset_vec = batch.offset.HostVector();
      size_t batch_size = batch.Size();
      if (batch_size > 0) {
-        std::copy(offset_vec.data(), offset_vec.data() + batch_size,
-                  row_ptr.Data() + batch.base_rowid);
-        if (batch.base_rowid > 0) {
-          for (size_t i = 0; i < batch_size; i++)
-            row_ptr[i + batch.base_rowid] += batch.base_rowid;
+        const auto base_rowid = batch.base_rowid;
+        event = qu.memcpy(row_ptr.Data() + base_rowid, offset_vec.data(),
+                          sizeof(size_t) * batch_size, event);
+        if (base_rowid > 0) {
+          qu.submit([&](::sycl::handler& cgh) {
+            cgh.depends_on(event);
+            cgh.parallel_for<>(::sycl::range<1>(batch_size), [=](::sycl::id<1> pid) {
+              int row_id = pid[0];
+              rows[row_id] += base_rowid;
+            });
+          });
        }
-        qu.memcpy(data.Data() + data_offset,
-                  data_vec.data(),
-                  offset_vec[batch_size] * sizeof(Entry)).wait();
+        event = qu.memcpy(data.Data() + data_offset, data_vec.data(),
+                          sizeof(Entry) * offset_vec[batch_size], event);
        data_offset += offset_vec[batch_size];
+        qu.wait();
      }
    }
-    row_ptr[num_row] = data_offset;
+    qu.submit([&](::sycl::handler& cgh) {
+      cgh.depends_on(event);
+      cgh.single_task<>([=] {
+        rows[num_row] = data_offset;
+      });
+    });
+    qu.wait();
    total_offset = data_offset;
  }

--- a/plugin/sycl/data/gradient_index.cc
+++ b/plugin/sycl/data/gradient_index.cc
@ -0,0 +1,177 @@
+/*!
+ * Copyright 2017-2024 by Contributors
+ * \file gradient_index.cc
+ */
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+#include "gradient_index.h"
+
+#include <CL/sycl.hpp>
+
+namespace xgboost {
+namespace sycl {
+namespace common {
+
+uint32_t SearchBin(const bst_float* cut_values, const uint32_t* cut_ptrs, Entry const& e) {
+  auto beg = cut_ptrs[e.index];
+  auto end = cut_ptrs[e.index + 1];
+  auto it = std::upper_bound(cut_values + beg, cut_values + end, e.fvalue);
+  uint32_t idx = it - cut_values;
+  if (idx == end) {
+    idx -= 1;
+  }
+  return idx;
+}
+
+template <typename BinIdxType>
+void mergeSort(BinIdxType* begin, BinIdxType* end, BinIdxType* buf) {
+  const size_t total_len = end - begin;
+  for (size_t block_len = 1; block_len < total_len; block_len <<= 1) {
+    for (size_t cur_block = 0; cur_block + block_len < total_len; cur_block += 2 * block_len) {
+      size_t start = cur_block;
+      size_t mid = start + block_len;
+      size_t finish = mid + block_len < total_len ? mid + block_len : total_len;
+      size_t left_pos = start;
+      size_t right_pos = mid;
+      size_t pos = start;
+      while (left_pos < mid || right_pos < finish) {
+        if (left_pos < mid && (right_pos == finish || begin[left_pos] < begin[right_pos])) {
+          buf[pos++] = begin[left_pos++];
+        } else {
+          buf[pos++] = begin[right_pos++];
+        }
+      }
+      for (size_t i = start; i < finish; i++) begin[i] = buf[i];
+    }
+  }
+}
+
+template <typename BinIdxType>
+void GHistIndexMatrix::SetIndexData(::sycl::queue qu,
+                                    BinIdxType* index_data,
+                                    const DeviceMatrix &dmat,
+                                    size_t nbins,
+                                    size_t row_stride,
+                                    uint32_t* offsets) {
+  if (nbins == 0) return;
+  const xgboost::Entry *data_ptr = dmat.data.DataConst();
+  const bst_row_t *offset_vec = dmat.row_ptr.DataConst();
+  const size_t num_rows = dmat.row_ptr.Size() - 1;
+  const bst_float* cut_values = cut_device.Values().DataConst();
+  const uint32_t* cut_ptrs = cut_device.Ptrs().DataConst();
+  size_t* hit_count_ptr = hit_count_buff.Data();
+
+  // Sparse case only
+  if (!offsets) {
+    // sort_buff has type uint8_t
+    sort_buff.Resize(&qu, num_rows * row_stride * sizeof(BinIdxType));
+  }
+  BinIdxType* sort_data = reinterpret_cast<BinIdxType*>(sort_buff.Data());
+
+  auto event = qu.submit([&](::sycl::handler& cgh) {
+    cgh.parallel_for<>(::sycl::range<1>(num_rows), [=](::sycl::item<1> pid) {
+      const size_t i = pid.get_id(0);
+      const size_t ibegin = offset_vec[i];
+      const size_t iend = offset_vec[i + 1];
+      const size_t size = iend - ibegin;
+      const size_t start = i * row_stride;
+      for (bst_uint j = 0; j < size; ++j) {
+        uint32_t idx = SearchBin(cut_values, cut_ptrs, data_ptr[ibegin + j]);
+        index_data[start + j] = offsets ? idx - offsets[j] : idx;
+        AtomicRef<size_t> hit_count_ref(hit_count_ptr[idx]);
+        hit_count_ref.fetch_add(1);
+      }
+      if (!offsets) {
+        // Sparse case only
+        mergeSort<BinIdxType>(index_data + start, index_data + start + size, sort_data + start);
+        for (bst_uint j = size; j < row_stride; ++j) {
+          index_data[start + j] = nbins;
+        }
+      }
+    });
+  });
+  qu.memcpy(hit_count.data(), hit_count_ptr, nbins * sizeof(size_t), event);
+  qu.wait();
+}
+
+void GHistIndexMatrix::ResizeIndex(size_t n_index, bool isDense) {
+  if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
+    index.SetBinTypeSize(BinTypeSize::kUint8BinsTypeSize);
+    index.Resize((sizeof(uint8_t)) * n_index);
+  } else if ((max_num_bins - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max())  &&
+    max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense) {
+    index.SetBinTypeSize(BinTypeSize::kUint16BinsTypeSize);
+    index.Resize((sizeof(uint16_t)) * n_index);
+  } else {
+    index.SetBinTypeSize(BinTypeSize::kUint32BinsTypeSize);
+    index.Resize((sizeof(uint32_t)) * n_index);
+  }
+}
+
+void GHistIndexMatrix::Init(::sycl::queue qu,
+                            Context const * ctx,
+                            const DeviceMatrix& p_fmat_device,
+                            int max_bins) {
+  nfeatures = p_fmat_device.p_mat->Info().num_col_;
+
+  cut = xgboost::common::SketchOnDMatrix(ctx, p_fmat_device.p_mat, max_bins);
+  cut_device.Init(qu, cut);
+
+  max_num_bins = max_bins;
+  const uint32_t nbins = cut.Ptrs().back();
+  this->nbins = nbins;
+  hit_count.resize(nbins, 0);
+  hit_count_buff.Resize(&qu, nbins, 0);
+
+  this->p_fmat = p_fmat_device.p_mat;
+  const bool isDense = p_fmat_device.p_mat->IsDense();
+  this->isDense_ = isDense;
+
+  index.setQueue(qu);
+
+  row_stride = 0;
+  for (const auto& batch : p_fmat_device.p_mat->GetBatches<SparsePage>()) {
+    const auto& row_offset = batch.offset.ConstHostVector();
+    for (auto i = 1ull; i < row_offset.size(); i++) {
+      row_stride = std::max(row_stride, static_cast<size_t>(row_offset[i] - row_offset[i - 1]));
+    }
+  }
+
+  const size_t n_offsets = cut_device.Ptrs().Size() - 1;
+  const size_t n_rows = p_fmat_device.row_ptr.Size() - 1;
+  const size_t n_index = n_rows * row_stride;
+  ResizeIndex(n_index, isDense);
+
+  CHECK_GT(cut_device.Values().Size(), 0U);
+
+  uint32_t* offsets = nullptr;
+  if (isDense) {
+    index.ResizeOffset(n_offsets);
+    offsets = index.Offset();
+    qu.memcpy(offsets, cut_device.Ptrs().DataConst(),
+              sizeof(uint32_t) * n_offsets).wait_and_throw();
+  }
+
+  if (isDense) {
+    BinTypeSize curent_bin_size = index.GetBinTypeSize();
+    if (curent_bin_size == BinTypeSize::kUint8BinsTypeSize) {
+      SetIndexData(qu, index.data<uint8_t>(), p_fmat_device, nbins, row_stride, offsets);
+
+    } else if (curent_bin_size == BinTypeSize::kUint16BinsTypeSize) {
+      SetIndexData(qu, index.data<uint16_t>(), p_fmat_device, nbins, row_stride, offsets);
+    } else {
+      CHECK_EQ(curent_bin_size, BinTypeSize::kUint32BinsTypeSize);
+      SetIndexData(qu, index.data<uint32_t>(), p_fmat_device, nbins, row_stride, offsets);
+    }
+  /* For sparse DMatrix we have to store index of feature for each bin
+     in index field to chose right offset. So offset is nullptr and index is not reduced */
+  } else {
+    SetIndexData(qu, index.data<uint32_t>(), p_fmat_device, nbins, row_stride, offsets);
+  }
+}
+
+}  // namespace common
+}  // namespace sycl
+}  // namespace xgboost
--- a/plugin/sycl/data/gradient_index.h
+++ b/plugin/sycl/data/gradient_index.h
@ -0,0 +1,216 @@
+/*!
+ * Copyright 2017-2024 by Contributors
+ * \file gradient_index.h
+ */
+#ifndef PLUGIN_SYCL_DATA_GRADIENT_INDEX_H_
+#define PLUGIN_SYCL_DATA_GRADIENT_INDEX_H_
+
+#include <vector>
+
+#include "../data.h"
+#include "../../src/common/hist_util.h"
+
+#include <CL/sycl.hpp>
+
+namespace xgboost {
+namespace sycl {
+namespace common {
+
+/*!
+ * \brief SYCL implementation of HistogramCuts stored in USM buffers to provide access from device kernels
+ */
+class HistogramCuts {
+ protected:
+  using BinIdx = uint32_t;
+
+ public:
+  HistogramCuts() {}
+
+  explicit HistogramCuts(::sycl::queue qu) {}
+
+  ~HistogramCuts() {
+  }
+
+  void Init(::sycl::queue qu, xgboost::common::HistogramCuts const& cuts) {
+    qu_ = qu;
+    cut_values_.Init(&qu_, cuts.cut_values_.HostVector());
+    cut_ptrs_.Init(&qu_, cuts.cut_ptrs_.HostVector());
+    min_vals_.Init(&qu_, cuts.min_vals_.HostVector());
+  }
+
+  // Getters for USM buffers to pass pointers into device kernels
+  const USMVector<uint32_t>& Ptrs()      const { return cut_ptrs_;   }
+  const USMVector<float>&    Values()    const { return cut_values_; }
+  const USMVector<float>&    MinValues() const { return min_vals_;   }
+
+ private:
+  USMVector<bst_float> cut_values_;
+  USMVector<uint32_t> cut_ptrs_;
+  USMVector<float> min_vals_;
+  ::sycl::queue qu_;
+};
+
+using BinTypeSize = ::xgboost::common::BinTypeSize;
+
+/*!
+ * \brief Index data and offsets stored in USM buffers to provide access from device kernels
+ */
+struct Index {
+  Index() {
+    SetBinTypeSize(binTypeSize_);
+  }
+  Index(const Index& i) = delete;
+  Index& operator=(Index i) = delete;
+  Index(Index&& i) = delete;
+  Index& operator=(Index&& i) = delete;
+  uint32_t operator[](size_t i) const {
+    if (!offset_.Empty()) {
+      return func_(data_.DataConst(), i) + offset_[i%p_];
+    } else {
+      return func_(data_.DataConst(), i);
+    }
+  }
+  void SetBinTypeSize(BinTypeSize binTypeSize) {
+    binTypeSize_ = binTypeSize;
+    switch (binTypeSize) {
+      case BinTypeSize::kUint8BinsTypeSize:
+        func_ = &GetValueFromUint8;
+        break;
+      case BinTypeSize::kUint16BinsTypeSize:
+        func_ = &GetValueFromUint16;
+        break;
+      case BinTypeSize::kUint32BinsTypeSize:
+        func_ = &GetValueFromUint32;
+        break;
+      default:
+        CHECK(binTypeSize == BinTypeSize::kUint8BinsTypeSize  ||
+              binTypeSize == BinTypeSize::kUint16BinsTypeSize ||
+              binTypeSize == BinTypeSize::kUint32BinsTypeSize);
+    }
+  }
+  BinTypeSize GetBinTypeSize() const {
+    return binTypeSize_;
+  }
+
+  template<typename T>
+  T* data() {
+    return reinterpret_cast<T*>(data_.Data());
+  }
+
+  template<typename T>
+  const T* data() const {
+    return reinterpret_cast<const T*>(data_.DataConst());
+  }
+
+  uint32_t* Offset() {
+    return offset_.Data();
+  }
+
+  const uint32_t* Offset() const {
+    return offset_.DataConst();
+  }
+
+  size_t Size() const {
+    return data_.Size() / (binTypeSize_);
+  }
+
+  void Resize(const size_t nBytesData) {
+    data_.Resize(&qu_, nBytesData);
+  }
+
+  void ResizeOffset(const size_t nDisps) {
+    offset_.Resize(&qu_, nDisps);
+    p_ = nDisps;
+  }
+
+  uint8_t* begin() const {
+    return data_.Begin();
+  }
+
+  uint8_t* end() const {
+    return data_.End();
+  }
+
+  void setQueue(::sycl::queue qu) {
+    qu_ = qu;
+  }
+
+ private:
+  static uint32_t GetValueFromUint8(const uint8_t* t, size_t i) {
+    return reinterpret_cast<const uint8_t*>(t)[i];
+  }
+  static uint32_t GetValueFromUint16(const uint8_t* t, size_t i) {
+    return reinterpret_cast<const uint16_t*>(t)[i];
+  }
+  static uint32_t GetValueFromUint32(const uint8_t* t, size_t i) {
+    return reinterpret_cast<const uint32_t*>(t)[i];
+  }
+
+  using Func = uint32_t (*)(const uint8_t*, size_t);
+
+  USMVector<uint8_t, MemoryType::on_device> data_;
+  // size of this field is equal to number of features
+  USMVector<uint32_t, MemoryType::on_device> offset_;
+  BinTypeSize binTypeSize_ {BinTypeSize::kUint8BinsTypeSize};
+  size_t p_ {1};
+  Func func_;
+
+  ::sycl::queue qu_;
+};
+
+/*!
+ * \brief Preprocessed global index matrix, in CSR format, stored in USM buffers
+ *
+ *  Transform floating values to integer index in histogram
+ */
+struct GHistIndexMatrix {
+  /*! \brief row pointer to rows by element position */
+  /*! \brief The index data */
+  Index index;
+  /*! \brief hit count of each index */
+  std::vector<size_t> hit_count;
+  /*! \brief buffers for calculations */
+  USMVector<size_t, MemoryType::on_device> hit_count_buff;
+  USMVector<uint8_t, MemoryType::on_device> sort_buff;
+  /*! \brief The corresponding cuts */
+  xgboost::common::HistogramCuts cut;
+  HistogramCuts cut_device;
+  DMatrix* p_fmat;
+  size_t max_num_bins;
+  size_t nbins;
+  size_t nfeatures;
+  size_t row_stride;
+
+  // Create a global histogram matrix based on a given DMatrix device wrapper
+  void Init(::sycl::queue qu, Context const * ctx,
+            const sycl::DeviceMatrix& p_fmat_device, int max_num_bins);
+
+  template <typename BinIdxType>
+  void SetIndexData(::sycl::queue qu, BinIdxType* index_data,
+                    const sycl::DeviceMatrix &dmat_device,
+                    size_t nbins, size_t row_stride, uint32_t* offsets);
+
+  void ResizeIndex(size_t n_index, bool isDense);
+
+  inline void GetFeatureCounts(size_t* counts) const {
+    auto nfeature = cut_device.Ptrs().Size() - 1;
+    for (unsigned fid = 0; fid < nfeature; ++fid) {
+      auto ibegin = cut_device.Ptrs()[fid];
+      auto iend = cut_device.Ptrs()[fid + 1];
+      for (auto i = ibegin; i < iend; ++i) {
+        *(counts + fid) += hit_count[i];
+      }
+    }
+  }
+  inline bool IsDense() const {
+    return isDense_;
+  }
+
+ private:
+  bool isDense_;
+};
+
+}  // namespace common
+}  // namespace sycl
+}  // namespace xgboost
+#endif  // PLUGIN_SYCL_DATA_GRADIENT_INDEX_H_
--- a/plugin/sycl/predictor/predictor.cc
+++ b/plugin/sycl/predictor/predictor.cc
@ -280,7 +280,8 @@ class Predictor : public xgboost::Predictor {
                    uint32_t tree_end = 0) const override {
    ::sycl::queue qu = device_manager.GetQueue(ctx_->Device());
    // TODO(razdoburdin): remove temporary workaround after cache fix
-    sycl::DeviceMatrix device_matrix(qu, dmat);
+    sycl::DeviceMatrix device_matrix;
+    device_matrix.Init(qu, dmat);

    auto* out_preds = &predts->predictions;
    if (tree_end == 0) {
--- a/Show More
+++ b/Show More