merge changes from upstream

2024-04-22 14:22:16 -07:00
parent 42edd78f30 b27f35e270
commit 45dc134151
378 changed files with 9121 additions and 3812 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -17,7 +17,7 @@ AllowShortEnumsOnASingleLine: true
 AllowShortBlocksOnASingleLine: Never
 AllowShortCaseLabelsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: All
-AllowShortLambdasOnASingleLine: All
+AllowShortLambdasOnASingleLine: Inline
 AllowShortIfStatementsOnASingleLine: WithoutElse
 AllowShortLoopsOnASingleLine: true
 AlwaysBreakAfterDefinitionReturnType: None
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -8,7 +8,7 @@ updates:
  - package-ecosystem: "maven"
    directory: "/jvm-packages"
    schedule:
-      interval: "daily"
+      interval: "monthly"
  - package-ecosystem: "maven"
    directory: "/jvm-packages/xgboost4j"
    schedule:
@@ -16,11 +16,11 @@ updates:
  - package-ecosystem: "maven"
    directory: "/jvm-packages/xgboost4j-gpu"
    schedule:
-      interval: "daily"
+      interval: "monthly"
  - package-ecosystem: "maven"
    directory: "/jvm-packages/xgboost4j-example"
    schedule:
-      interval: "daily"
+      interval: "monthly"
  - package-ecosystem: "maven"
    directory: "/jvm-packages/xgboost4j-spark"
    schedule:
@@ -28,4 +28,4 @@ updates:
  - package-ecosystem: "maven"
    directory: "/jvm-packages/xgboost4j-spark-gpu"
    schedule:
-      interval: "daily"
+      interval: "monthly"
--- a/.github/workflows/i386.yml
+++ b/.github/workflows/i386.yml
@@ -5,6 +5,10 @@ on: [push, pull_request]
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  build-32bit:
    name: Build 32-bit
--- a/.github/workflows/jvm_tests.yml
+++ b/.github/workflows/jvm_tests.yml
@@ -5,6 +5,10 @@ on: [push, pull_request]
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  test-with-jvm:
    name: Test JVM on OS ${{ matrix.os }}
@@ -15,31 +19,36 @@ jobs:
        os: [windows-latest, ubuntu-latest, macos-11]

    steps:
-    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      with:
        submodules: 'true'

-    - uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
+    - uses: mamba-org/setup-micromamba@422500192359a097648154e8db4e39bdb6c6eed7  # v1.8.1
      with:
-        python-version: '3.8'
-        architecture: 'x64'
-
-    - uses: actions/setup-java@d202f5dbf7256730fb690ec59f6381650114feb2 # v3.6.0
-      with:
-        java-version: 1.8
-
-    - name: Install Python packages
-      run: |
-        python -m pip install wheel setuptools
-        python -m pip install awscli
+        micromamba-version: '1.5.6-0'
+        environment-name: jvm_tests
+        create-args: >-
+          python=3.10
+          awscli
+        cache-downloads: true
+        cache-environment: true
+        init-shell: bash powershell

    - name: Cache Maven packages
-      uses: actions/cache@6998d139ddd3e68c71e9e398d8e40b71a2f39812 # v3.2.5
+      uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2  # v4.0.0
      with:
        path: ~/.m2
        key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
        restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}

+    - name: Build xgboost4j.dll
+      run: |
+        mkdir build
+        cd build
+        cmake .. -G"Visual Studio 17 2022" -A x64 -DJVM_BINDINGS=ON
+        cmake --build . --config Release
+      if: matrix.os == 'windows-latest'
+
    - name: Test XGBoost4J (Core)
      run: |
        cd jvm-packages
@@ -47,7 +56,8 @@ jobs:

    - name: Extract branch name
      shell: bash
-      run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
+      run: |
+        echo "branch=${GITHUB_REF#refs/heads/}" >> "$GITHUB_OUTPUT"
      id: extract_branch
      if: |
        (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
@@ -58,7 +68,7 @@ jobs:
        cd lib/
        Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
        dir
-        python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
+        python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2
      if: |
        (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
        matrix.os == 'windows-latest'
@@ -67,11 +77,12 @@ jobs:
        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}

    - name: Publish artifact libxgboost4j.dylib to S3
+      shell: bash -l {0}
      run: |
        cd lib/
        mv -v libxgboost4j.dylib libxgboost4j_${{ github.sha }}.dylib
        ls
-        python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
+        python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2
      if: |
        (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
        matrix.os == 'macos-11'
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -9,6 +9,10 @@ on: [push, pull_request]
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
  gtest-cpu:
@@ -174,7 +178,7 @@ jobs:
    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
      with:
        submodules: 'true'
-    - uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
+    - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: "3.8"
        architecture: 'x64'
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -9,6 +9,10 @@ defaults:
  run:
    shell: bash -l {0}

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  python-mypy-lint:
    runs-on: ubuntu-latest
@@ -310,7 +314,7 @@ jobs:
          submodules: 'true'

      - name: Set up Python 3.8
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
        with:
          python-version: 3.8

--- a/.github/workflows/python_wheels.yml
+++ b/.github/workflows/python_wheels.yml
@@ -5,6 +5,10 @@ on: [push, pull_request]
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  python-wheels:
    name: Build wheel for ${{ matrix.platform_id }}
@@ -17,11 +21,11 @@ jobs:
        - os: macos-latest
          platform_id: macosx_arm64
    steps:
-    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+    - uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # v3.0.0
      with:
        submodules: 'true'
    - name: Setup Python
-      uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
+      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: "3.8"
    - name: Build wheels
--- a/.github/workflows/r_nold.yml
+++ b/.github/workflows/r_nold.yml
@@ -10,6 +10,10 @@ on:
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  test-R-noLD:
    if: github.event.comment.body == '/gha run r-nold-test' && contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association)
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@@ -8,6 +8,10 @@ env:
 permissions:
  contents: read # to fetch code (actions/checkout)

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  lintr:
    runs-on: ${{ matrix.config.os }}
@@ -46,7 +50,7 @@ jobs:
        MAKEFLAGS="-j$(nproc)" R CMD INSTALL R-package/
        Rscript tests/ci_build/lint_r.R $(pwd)

-  test-R-on-Windows:
+  test-Rpkg:
    runs-on: ${{ matrix.config.os }}
    name: Test R on OS ${{ matrix.config.os }}, R ${{ matrix.config.r }}, Compiler ${{ matrix.config.compiler }}, Build ${{ matrix.config.build }}
    strategy:
@@ -54,11 +58,17 @@ jobs:
      matrix:
        config:
          - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'}
+          - {os: ubuntu-latest, r: 'release', compiler: 'none', build: 'cmake'}
    env:
      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
      RSPM: ${{ matrix.config.rspm }}

    steps:
+    - name: Install system dependencies
+      run: |
+        sudo apt update
+        sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev
+      if: matrix.config.os == 'ubuntu-latest'
    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
      with:
        submodules: 'true'
@@ -74,7 +84,7 @@ jobs:
        key: ${{ runner.os }}-r-${{ matrix.config.r }}-6-${{ hashFiles('R-package/DESCRIPTION') }}
        restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-6-${{ hashFiles('R-package/DESCRIPTION') }}

-    - uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
+    - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: "3.8"
        architecture: 'x64'
@@ -89,12 +99,18 @@ jobs:
    - name: Test R
      run: |
        python tests/ci_build/test_r_package.py --compiler='${{ matrix.config.compiler }}' --build-tool="${{ matrix.config.build }}" --task=check
+      if: matrix.config.compiler != 'none'
+
+    - name: Test R
+      run: |
+        python tests/ci_build/test_r_package.py --build-tool="${{ matrix.config.build }}" --task=check
+      if: matrix.config.compiler == 'none'

  test-R-on-Debian:
    name: Test R package on Debian
    runs-on: ubuntu-latest
    container:
-      image: rhub/debian-gcc-devel
+      image: rhub/debian-gcc-release

    steps:
    - name: Install system dependencies
@@ -114,12 +130,12 @@ jobs:
    - name: Install dependencies
      shell: bash -l {0}
      run: |
-        /tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
+        Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"

    - name: Test R
      shell: bash -l {0}
      run: |
-        python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --build-tool=autotools --task=check
+        python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check

    - uses: dorny/paths-filter@v2
      id: changes
@@ -131,4 +147,4 @@ jobs:
    - name: Run document check
      if: steps.changes.outputs.r_package == 'true'
      run: |
-        python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --task=doc
+        python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --task=doc
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -22,12 +22,12 @@ jobs:

    steps:
      - name: "Checkout code"
-        uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # tag=v3.0.0
+        uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # v3.0.0
        with:
          persist-credentials: false

      - name: "Run analysis"
-        uses: ossf/scorecard-action@08b4669551908b1024bb425080c797723083c031 # tag=v2.2.0
+        uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
        with:
          results_file: results.sarif
          results_format: sarif
@@ -41,7 +41,7 @@ jobs:
      # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
      # format to the repository Actions tab.
      - name: "Upload artifact"
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3.1.2
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: SARIF file
          path: results.sarif
@@ -49,6 +49,6 @@ jobs:

      # Upload the results to GitHub's code scanning dashboard.
      - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@7b6664fa89524ee6e3c3e9749402d5afd69b3cd8 # tag=v2.14.1
+        uses: github/codeql-action/upload-sarif@83a02f7883b12e0e4e1a146174f5e2292a01e601 # v2.16.4
        with:
          sarif_file: results.sarif
--- a/.github/workflows/update_rapids.yml
+++ b/.github/workflows/update_rapids.yml
@@ -3,7 +3,7 @@ name: update-rapids
 on:
  workflow_dispatch:
  schedule:
-    - cron: "0 20 * * *"  # Run once daily
+    - cron: "0 20 * * 1"  # Run once weekly

 permissions:
  pull-requests: write
@@ -32,7 +32,7 @@ jobs:
      run: |
        bash tests/buildkite/update-rapids.sh
    - name: Create Pull Request
-      uses: peter-evans/create-pull-request@v5
+      uses: peter-evans/create-pull-request@v6
      if: github.ref == 'refs/heads/master'
      with:
        add-paths: |
--- a/NEWS.md
+++ b/NEWS.md
@@ -2101,7 +2101,7 @@ This release marks a major milestone for the XGBoost project.
 ## v0.90 (2019.05.18)

 ### XGBoost Python package drops Python 2.x (#4379, #4381)
-Python 2.x is reaching its end-of-life at the end of this year. [Many scientific Python packages are now moving to drop Python 2.x](https://python3statement.org/).
+Python 2.x is reaching its end-of-life at the end of this year. [Many scientific Python packages are now moving to drop Python 2.x](https://python3statement.github.io/).

 ### XGBoost4J-Spark now requires Spark 2.4.x (#4377)
 * Spark 2.3 is reaching its end-of-life soon. See discussion at #4389.
--- a/R-package/CMakeLists.txt
+++ b/R-package/CMakeLists.txt
@@ -26,7 +26,6 @@ endif()
 target_compile_definitions(
  xgboost-r PUBLIC
  -DXGBOOST_STRICT_R_MODE=1
-  -DXGBOOST_CUSTOMIZE_GLOBAL_PRNG=1
  -DDMLC_LOG_BEFORE_THROW=0
  -DDMLC_DISABLE_STDIN=1
  -DDMLC_LOG_CUSTOMIZE=1
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -56,7 +56,8 @@ Suggests:
    testthat,
    igraph (>= 1.0.1),
    float,
-    titanic
+    titanic,
+    RhpcBLASctl
 Depends:
    R (>= 4.3.0)
 Imports:
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -20,15 +20,9 @@ export("xgb.attr<-")
 export("xgb.attributes<-")
 export("xgb.config<-")
 export("xgb.parameters<-")
-export(cb.cv.predict)
-export(cb.early.stop)
-export(cb.evaluation.log)
-export(cb.gblinear.history)
-export(cb.print.evaluation)
-export(cb.reset.parameters)
-export(cb.save.model)
 export(getinfo)
 export(setinfo)
+export(xgb.Callback)
 export(xgb.DMatrix)
 export(xgb.DMatrix.hasinfo)
 export(xgb.DMatrix.save)
@@ -39,6 +33,13 @@ export(xgb.QuantileDMatrix)
 export(xgb.QuantileDMatrix.from_iterator)
 export(xgb.attr)
 export(xgb.attributes)
+export(xgb.cb.cv.predict)
+export(xgb.cb.early.stop)
+export(xgb.cb.evaluation.log)
+export(xgb.cb.gblinear.history)
+export(xgb.cb.print.evaluation)
+export(xgb.cb.reset.parameters)
+export(xgb.cb.save.model)
 export(xgb.config)
 export(xgb.copy.Booster)
 export(xgb.create.features)
@@ -72,14 +73,10 @@ export(xgb.slice.DMatrix)
 export(xgb.train)
 export(xgboost)
 import(methods)
+importClassesFrom(Matrix,CsparseMatrix)
 importClassesFrom(Matrix,dgCMatrix)
 importClassesFrom(Matrix,dgRMatrix)
-importClassesFrom(Matrix,dgeMatrix)
-importFrom(Matrix,colSums)
 importFrom(Matrix,sparse.model.matrix)
-importFrom(Matrix,sparseMatrix)
-importFrom(Matrix,sparseVector)
-importFrom(Matrix,t)
 importFrom(data.table,":=")
 importFrom(data.table,as.data.table)
 importFrom(data.table,data.table)
@@ -101,6 +98,7 @@ importFrom(methods,new)
 importFrom(stats,coef)
 importFrom(stats,median)
 importFrom(stats,predict)
+importFrom(stats,sd)
 importFrom(stats,variable.names)
 importFrom(utils,head)
 importFrom(utils,object.size)
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -26,6 +26,11 @@ NVL <- function(x, val) {
           'multi:softprob', 'rank:pairwise', 'rank:ndcg', 'rank:map'))
 }

+.RANKING_OBJECTIVES <- function() {
+  return(c('binary:logistic', 'binary:logitraw', 'binary:hinge', 'multi:softmax',
+           'multi:softprob'))
+}
+

 #
 # Low-level functions for boosting --------------------------------------------
@@ -142,7 +147,7 @@ check.custom.eval <- function(env = parent.frame()) {
  if (!is.null(env$feval) &&
      is.null(env$maximize) && (
        !is.null(env$early_stopping_rounds) ||
-        has.callbacks(env$callbacks, 'cb.early.stop')))
+        has.callbacks(env$callbacks, "early_stop")))
    stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not")
 }

@@ -193,20 +198,20 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
 # Evaluate one iteration.
 # Returns a named vector of evaluation metrics
 # with the names in a 'datasetname-metricname' format.
-xgb.iter.eval <- function(bst, watchlist, iter, feval) {
+xgb.iter.eval <- function(bst, evals, iter, feval) {
  handle <- xgb.get.handle(bst)

-  if (length(watchlist) == 0)
+  if (length(evals) == 0)
    return(NULL)

-  evnames <- names(watchlist)
+  evnames <- names(evals)
  if (is.null(feval)) {
-    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), watchlist, as.list(evnames))
+    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), evals, as.list(evnames))
    mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
    res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
  } else {
-    res <- sapply(seq_along(watchlist), function(j) {
-      w <- watchlist[[j]]
+    res <- sapply(seq_along(evals), function(j) {
+      w <- evals[[j]]
      ## predict using all trees
      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
      eval_res <- feval(preds, w)
@@ -235,33 +240,43 @@ convert.labels <- function(labels, objective_name) {
 }

 # Generates random (stratified if needed) CV folds
-generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
+generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
+  if (NROW(group)) {
+    if (stratified) {
+      warning(
+        paste0(
+          "Stratified splitting is not supported when using 'group' attribute.",
+          " Will use unstratified splitting."
+        )
+      )
+    }
+    return(generate.group.folds(nfold, group))
+  }
+  objective <- params$objective
+  if (!is.character(objective)) {
+    warning("Will use unstratified splitting (custom objective used)")
+    stratified <- FALSE
+  }
+  # cannot stratify if label is NULL
+  if (stratified && is.null(label)) {
+    warning("Will use unstratified splitting (no 'labels' available)")
+    stratified <- FALSE
+  }

  # cannot do it for rank
-  objective <- params$objective
  if (is.character(objective) && strtrim(objective, 5) == 'rank:') {
-    stop("\n\tAutomatic generation of CV-folds is not implemented for ranking!\n",
+    stop("\n\tAutomatic generation of CV-folds is not implemented for ranking without 'group' field!\n",
         "\tConsider providing pre-computed CV-folds through the 'folds=' parameter.\n")
  }
  # shuffle
  rnd_idx <- sample.int(nrows)
-  if (stratified &&
-      length(label) == length(rnd_idx)) {
+  if (stratified && length(label) == length(rnd_idx)) {
    y <- label[rnd_idx]
-    # WARNING: some heuristic logic is employed to identify classification setting!
    #  - For classification, need to convert y labels to factor before making the folds,
    #    and then do stratification by factor levels.
    #  - For regression, leave y numeric and do stratification by quantiles.
    if (is.character(objective)) {
-      y <- convert.labels(y, params$objective)
-    } else {
-      # If no 'objective' given in params, it means that user either wants to
-      # use the default 'reg:squarederror' objective or has provided a custom
-      # obj function.  Here, assume classification setting when y has 5 or less
-      # unique values:
-      if (length(unique(y)) <= 5) {
-        y <- factor(y)
-      }
+      y <- convert.labels(y, objective)
    }
    folds <- xgb.createFolds(y = y, k = nfold)
  } else {
@@ -277,6 +292,29 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
  return(folds)
 }

+generate.group.folds <- function(nfold, group) {
+  ngroups <- length(group) - 1
+  if (ngroups < nfold) {
+    stop("DMatrix has fewer groups than folds.")
+  }
+  seq_groups <- seq_len(ngroups)
+  indices <- lapply(seq_groups, function(gr) seq(group[gr] + 1, group[gr + 1]))
+  assignments <- base::split(seq_groups, as.integer(seq_groups %% nfold))
+  assignments <- unname(assignments)
+
+  out <- vector("list", nfold)
+  randomized_groups <- sample(ngroups)
+  for (idx in seq_len(nfold)) {
+    groups_idx_test <- randomized_groups[assignments[[idx]]]
+    groups_test <- indices[groups_idx_test]
+    idx_test <- unlist(groups_test)
+    attributes(idx_test)$group_test <- lengths(groups_test)
+    attributes(idx_test)$group_train <- lengths(indices[-groups_idx_test])
+    out[[idx]] <- idx_test
+  }
+  return(out)
+}
+
 # Creates CV folds stratified by the values of y.
 # It was borrowed from caret::createFolds and simplified
 # by always returning an unnamed list of fold indices.
@@ -454,7 +492,8 @@ depr_par_lut <- matrix(c(
  'plot.height', 'plot_height',
  'plot.width', 'plot_width',
  'n_first_tree', 'trees',
-  'dummy', 'DUMMY'
+  'dummy', 'DUMMY',
+  'watchlist', 'evals'
 ), ncol = 2, byrow = TRUE)
 colnames(depr_par_lut) <- c('old', 'new')

--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -77,26 +77,45 @@ xgb.get.handle <- function(object) {

 #' Predict method for XGBoost model
 #'
-#' Predicted values based on either xgboost model or model handle object.
+#' Predict values on data based on xgboost model.
 #'
 #' @param object Object of class `xgb.Booster`.
-#' @param newdata Takes `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,
+#' @param newdata Takes `data.frame`, `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,
 #'        local data file, or `xgb.DMatrix`.
-#'        For single-row predictions on sparse data, it is recommended to use the CSR format.
-#'        If passing a sparse vector, it will take it as a row vector.
-#' @param missing Only used when input is a dense matrix. Pick a float value that represents
-#'        missing values in data (e.g., 0 or some other extreme value).
+#'
+#'        For single-row predictions on sparse data, it's recommended to use CSR format. If passing
+#'        a sparse vector, it will take it as a row vector.
+#'
+#'        Note that, for repeated predictions on the same data, one might want to create a DMatrix to
+#'        pass here instead of passing R types like matrices or data frames, as predictions will be
+#'        faster on DMatrix.
+#'
+#'        If `newdata` is a `data.frame`, be aware that:\itemize{
+#'        \item Columns will be converted to numeric if they aren't already, which could potentially make
+#'              the operation slower than in an equivalent `matrix` object.
+#'        \item The order of the columns must match with that of the data from which the model was fitted
+#'              (i.e. columns will not be referenced by their names, just by their order in the data).
+#'        \item If the model was fitted to data with categorical columns, these columns must be of
+#'              `factor` type here, and must use the same encoding (i.e. have the same levels).
+#'        \item If `newdata` contains any `factor` columns, they will be converted to base-0
+#'              encoding (same as during DMatrix creation) - hence, one should not pass a `factor`
+#'              under a column which during training had a different type.
+#'        }
+#' @param missing Float value that represents missing values in data (e.g., 0 or some other extreme value).
+#'
+#'        This parameter is not used when `newdata` is an `xgb.DMatrix` - in such cases, should pass
+#'        this as an argument to the DMatrix constructor instead.
 #' @param outputmargin Whether the prediction should be returned in the form of original untransformed
 #'        sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
 #'        logistic regression would return log-odds instead of probabilities.
-#' @param predleaf Whether to predict pre-tree leaf indices.
+#' @param predleaf Whether to predict per-tree leaf indices.
 #' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
 #' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
 #' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details).
 #' @param reshape Whether to reshape the vector of predictions to matrix form when there are several
 #'        prediction outputs per case. No effect if `predleaf`, `predcontrib`,
 #'        or `predinteraction` is `TRUE`.
-#' @param training Whether the predictions are used for training. For dart booster,
+#' @param training Whether the prediction result is used for training. For dart booster,
 #'        training predicting will perform dropout.
 #' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
 #'        a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.
@@ -111,6 +130,12 @@ xgb.get.handle <- function(object) {
 #'        If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
 #' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
 #'        type and shape of predictions are invariant to the model type.
+#' @param base_margin Base margin used for boosting from existing model.
+#'
+#'        Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
+#'        be ignored as it needs to be added to the DMatrix instead (e.g. by passing it as
+#'        an argument in its constructor, or by calling \link{setinfo.xgb.DMatrix}).
+#'
 #' @param validate_features When `TRUE`, validate that the Booster's and newdata's feature_names
 #'        match (only applicable when both `object` and `newdata` have feature names).
 #'
@@ -287,16 +312,80 @@ xgb.get.handle <- function(object) {
 predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
                                predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
                                reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE,
-                                validate_features = FALSE, ...) {
+                                validate_features = FALSE, base_margin = NULL, ...) {
  if (validate_features) {
    newdata <- validate.features(object, newdata)
  }
-  if (!inherits(newdata, "xgb.DMatrix")) {
+  is_dmatrix <- inherits(newdata, "xgb.DMatrix")
+  if (is_dmatrix && !is.null(base_margin)) {
+    stop(
+      "'base_margin' is not supported when passing 'xgb.DMatrix' as input.",
+      " Should be passed as argument to 'xgb.DMatrix' constructor."
+    )
+  }
+
+  use_as_df <- FALSE
+  use_as_dense_matrix <- FALSE
+  use_as_csr_matrix <- FALSE
+  n_row <- NULL
+  if (!is_dmatrix) {
+
+    inplace_predict_supported <- !predcontrib && !predinteraction && !predleaf
+    if (inplace_predict_supported) {
+      booster_type <- xgb.booster_type(object)
+      if (booster_type == "gblinear" || (booster_type == "dart" && training)) {
+        inplace_predict_supported <- FALSE
+      }
+    }
+    if (inplace_predict_supported) {
+
+      if (is.matrix(newdata)) {
+        use_as_dense_matrix <- TRUE
+      } else if (is.data.frame(newdata)) {
+        # note: since here it turns it into a non-data-frame list,
+        # needs to keep track of the number of rows it had for later
+        n_row <- nrow(newdata)
+        newdata <- lapply(
+          newdata,
+          function(x) {
+            if (is.factor(x)) {
+              return(as.numeric(x) - 1)
+            } else {
+              return(as.numeric(x))
+            }
+          }
+        )
+        use_as_df <- TRUE
+      } else if (inherits(newdata, "dgRMatrix")) {
+        use_as_csr_matrix <- TRUE
+        csr_data <- list(newdata@p, newdata@j, newdata@x, ncol(newdata))
+      } else if (inherits(newdata, "dsparseVector")) {
+        use_as_csr_matrix <- TRUE
+        n_row <- 1L
+        i <- newdata@i - 1L
+        if (storage.mode(i) != "integer") {
+          storage.mode(i) <- "integer"
+        }
+        csr_data <- list(c(0L, length(i)), i, newdata@x, length(newdata))
+      }
+
+    }
+
+  } # if (!is_dmatrix)
+
+  if (!is_dmatrix && !use_as_dense_matrix && !use_as_csr_matrix && !use_as_df) {
    nthread <- xgb.nthread(object)
    newdata <- xgb.DMatrix(
      newdata,
-      missing = missing, nthread = NVL(nthread, -1)
+      missing = missing,
+      base_margin = base_margin,
+      nthread = NVL(nthread, -1)
    )
+    is_dmatrix <- TRUE
+  }
+
+  if (is.null(n_row)) {
+    n_row <- nrow(newdata)
  }


@@ -354,18 +443,30 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
    args$type <- set_type(6)
  }

-  predts <- .Call(
-    XGBoosterPredictFromDMatrix_R,
-    xgb.get.handle(object),
-    newdata,
-    jsonlite::toJSON(args, auto_unbox = TRUE)
-  )
+  json_conf <- jsonlite::toJSON(args, auto_unbox = TRUE)
+  if (is_dmatrix) {
+    predts <- .Call(
+      XGBoosterPredictFromDMatrix_R, xgb.get.handle(object), newdata, json_conf
+    )
+  } else if (use_as_dense_matrix) {
+    predts <- .Call(
+      XGBoosterPredictFromDense_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
+    )
+  } else if (use_as_csr_matrix) {
+    predts <- .Call(
+      XGBoosterPredictFromCSR_R, xgb.get.handle(object), csr_data, missing, json_conf, base_margin
+    )
+  } else if (use_as_df) {
+    predts <- .Call(
+      XGBoosterPredictFromColumnar_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
+    )
+  }
+
  names(predts) <- c("shape", "results")
  shape <- predts$shape
  arr <- predts$results

  n_ret <- length(arr)
-  n_row <- nrow(newdata)
  if (n_row != shape[1]) {
    stop("Incorrect predict shape.")
  }
@@ -970,6 +1071,10 @@ xgb.best_iteration <- function(bst) {
 #' coef(model)
 #' @export
 coef.xgb.Booster <- function(object, ...) {
+  return(.internal.coef.xgb.Booster(object, add_names = TRUE))
+}
+
+.internal.coef.xgb.Booster <- function(object, add_names = TRUE) {
  booster_type <- xgb.booster_type(object)
  if (booster_type != "gblinear") {
    stop("Coefficients are not defined for Booster type ", booster_type)
@@ -988,21 +1093,27 @@ coef.xgb.Booster <- function(object, ...) {
  intercepts <- weights[seq(sep + 1, length(weights))]
  intercepts <- intercepts + as.numeric(base_score)

-  feature_names <- xgb.feature_names(object)
-  if (!NROW(feature_names)) {
-    # This mimics the default naming in R which names columns as "V1..N"
-    # when names are needed but not available
-    feature_names <- paste0("V", seq(1L, num_feature))
+  if (add_names) {
+    feature_names <- xgb.feature_names(object)
+    if (!NROW(feature_names)) {
+      # This mimics the default naming in R which names columns as "V1..N"
+      # when names are needed but not available
+      feature_names <- paste0("V", seq(1L, num_feature))
+    }
+    feature_names <- c("(Intercept)", feature_names)
  }
-  feature_names <- c("(Intercept)", feature_names)
  if (n_cols == 1L) {
    out <- c(intercepts, coefs)
-    names(out) <- feature_names
+    if (add_names) {
+      names(out) <- feature_names
+    }
  } else {
    coefs <- matrix(coefs, nrow = num_feature, byrow = TRUE)
    dim(intercepts) <- c(1L, n_cols)
    out <- rbind(intercepts, coefs)
-    row.names(out) <- feature_names
+    if (add_names) {
+      row.names(out) <- feature_names
+    }
    # TODO: if a class names attributes is added,
    # should use those names here.
  }
@@ -1154,12 +1265,9 @@ print.xgb.Booster <- function(x, ...) {
    cat("  ", paste(attr_names, collapse = ", "), "\n")
  }

-  if (!is.null(R_attrs$callbacks) && length(R_attrs$callbacks) > 0) {
-    cat('callbacks:\n')
-    lapply(callback.calls(R_attrs$callbacks), function(x) {
-      cat('  ')
-      print(x)
-    })
+  additional_attr <- setdiff(names(R_attrs), .reserved_cb_names)
+  if (NROW(additional_attr)) {
+    cat("callbacks:\n  ", paste(additional_attr, collapse = ", "), "\n")
  }

  if (!is.null(R_attrs$evaluation_log)) {
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -28,10 +28,27 @@
 #' 'xgb.QuantileDMatrix'.
 #' \item Single-row CSR matrices, as class `dsparseVector` from package `Matrix`, which is interpreted
 #' as a single row (only when making predictions from a fitted model).
-#' \item Text files in SVMLight / LibSVM formats, passed as a path to the file. These are \bold{not}
-#' supported for xgb.QuantileDMatrix'.
-#' \item Binary files generated by \link{xgb.DMatrix.save},  passed as a path to the file. These are
-#' \bold{not} supported for xgb.QuantileDMatrix'.
+#' \item Text files in a supported format, passed as a `character` variable containing the URI path to
+#' the file, with an optional format specifier.
+#'
+#' These are \bold{not} supported for `xgb.QuantileDMatrix`. Supported formats are:\itemize{
+#'   \item XGBoost's own binary format for DMatrices, as produced by \link{xgb.DMatrix.save}.
+#'   \item SVMLight (a.k.a. LibSVM) format for CSR matrices. This format can be signaled by suffix
+#'         `?format=libsvm` at the end of the file path. It will be the default format if not
+#'         otherwise specified.
+#'   \item CSV files (comma-separated values). This format can be specified by adding suffix
+#'         `?format=csv` at the end ofthe file path. It will \bold{not} be auto-deduced from file extensions.
+#'   }
+#'
+#' Be aware that the format of the file will not be auto-deduced - for example, if a file is named 'file.csv',
+#' it will not look at the extension or file contents to determine that it is a comma-separated value.
+#' Instead, the format must be specified following the URI format, so the input to `data` should be passed
+#' like this: `"file.csv?format=csv"` (or `"file.csv?format=csv&label_column=0"` if the first column
+#' corresponds to the labels).
+#'
+#' For more information about passing text files as input, see the articles
+#' \href{https://xgboost.readthedocs.io/en/stable/tutorials/input_format.html}{Text Input Format of DMatrix} and
+#' \href{https://xgboost.readthedocs.io/en/stable/python/python_intro.html#python-data-interface}{Data Interface}.
 #' }
 #' @param label Label of the training data. For classification problems, should be passed encoded as
 #' integers with numeration starting at zero.
@@ -81,6 +98,13 @@
 #' @param label_lower_bound Lower bound for survival training.
 #' @param label_upper_bound Upper bound for survival training.
 #' @param feature_weights Set feature weights for column sampling.
+#' @param data_split_mode When passing a URI (as R `character`) as input, this signals
+#' whether to split by row or column. Allowed values are `"row"` and `"col"`.
+#'
+#' In distributed mode, the file is split accordingly; otherwise this is only an indicator on
+#' how the file was split beforehand. Default to row.
+#'
+#' This is not used when `data` is not a URI.
 #' @return An 'xgb.DMatrix' object. If calling 'xgb.QuantileDMatrix', it will have additional
 #' subclass 'xgb.QuantileDMatrix'.
 #'
@@ -117,7 +141,8 @@ xgb.DMatrix <- function(
  qid = NULL,
  label_lower_bound = NULL,
  label_upper_bound = NULL,
-  feature_weights = NULL
+  feature_weights = NULL,
+  data_split_mode = "row"
 ) {
  if (!is.null(group) && !is.null(qid)) {
    stop("Either one of 'group' or 'qid' should be NULL")
@@ -131,7 +156,14 @@ xgb.DMatrix <- function(
      )
    }
    data <- path.expand(data)
-    handle <- .Call(XGDMatrixCreateFromFile_R, data, as.integer(silent))
+    if (data_split_mode == "row") {
+      data_split_mode <- 0L
+    } else if (data_split_mode == "col") {
+      data_split_mode <- 1L
+    } else {
+      stop("Passed invalid 'data_split_mode': ", data_split_mode)
+    }
+    handle <- .Call(XGDMatrixCreateFromURI_R, data, as.integer(silent), data_split_mode)
  } else if (is.matrix(data)) {
    handle <- .Call(
      XGDMatrixCreateFromMat_R, data, missing, nthread
@@ -1227,8 +1259,11 @@ xgb.get.DMatrix.data <- function(dmat) {
 #' Get a new DMatrix containing the specified rows of
 #' original xgb.DMatrix object
 #'
-#' @param object Object of class "xgb.DMatrix"
-#' @param idxset a integer vector of indices of rows needed
+#' @param object Object of class "xgb.DMatrix".
+#' @param idxset An integer vector of indices of rows needed (base-1 indexing).
+#' @param allow_groups Whether to allow slicing an `xgb.DMatrix` with `group` (or
+#'        equivalently `qid`) field. Note that in such case, the result will not have
+#'        the groups anymore - they need to be set manually through `setinfo`.
 #' @param colset currently not used (columns subsetting is not available)
 #'
 #' @examples
@@ -1243,11 +1278,11 @@ xgb.get.DMatrix.data <- function(dmat) {
 #'
 #' @rdname xgb.slice.DMatrix
 #' @export
-xgb.slice.DMatrix <- function(object, idxset) {
+xgb.slice.DMatrix <- function(object, idxset, allow_groups = FALSE) {
  if (!inherits(object, "xgb.DMatrix")) {
    stop("object must be xgb.DMatrix")
  }
-  ret <- .Call(XGDMatrixSliceDMatrix_R, object, idxset)
+  ret <- .Call(XGDMatrixSliceDMatrix_R, object, idxset, allow_groups)

  attr_list <- attributes(object)
  nr <- nrow(object)
@@ -1264,7 +1299,15 @@ xgb.slice.DMatrix <- function(object, idxset) {
      }
    }
  }
-  return(structure(ret, class = "xgb.DMatrix"))
+
+  out <- structure(ret, class = "xgb.DMatrix")
+  parent_fields <- as.list(attributes(object)$fields)
+  if (NROW(parent_fields)) {
+    child_fields <- parent_fields[!(names(parent_fields) %in% c("group", "qid"))]
+    child_fields <- as.environment(child_fields)
+    attributes(out)$fields <- child_fields
+  }
+  return(out)
 }

 #' @rdname xgb.slice.DMatrix
@@ -1308,11 +1351,11 @@ print.xgb.DMatrix <- function(x, verbose = FALSE, ...) {
  }

  cat(class_print, ' dim:', nrow(x), 'x', ncol(x), ' info: ')
-  infos <- character(0)
-  if (xgb.DMatrix.hasinfo(x, 'label')) infos <- 'label'
-  if (xgb.DMatrix.hasinfo(x, 'weight')) infos <- c(infos, 'weight')
-  if (xgb.DMatrix.hasinfo(x, 'base_margin')) infos <- c(infos, 'base_margin')
-  if (length(infos) == 0) infos <- 'NA'
+  infos <- names(attributes(x)$fields)
+  infos <- infos[infos != "feature_name"]
+  if (!NROW(infos)) infos <- "NA"
+  infos <- infos[order(infos)]
+  infos <- paste(infos, collapse = ", ")
  cat(infos)
  cnames <- colnames(x)
  cat('  colnames:')
--- a/R-package/R/xgb.DMatrix.save.R
+++ b/R-package/R/xgb.DMatrix.save.R
@@ -6,6 +6,7 @@
 #' @param fname the name of the file to write.
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #' fname <- file.path(tempdir(), "xgb.DMatrix.data")
--- a/R-package/R/xgb.config.R
+++ b/R-package/R/xgb.config.R
@@ -4,7 +4,14 @@
 #' values of one or more global-scope parameters. Use \code{xgb.get.config} to fetch the current
 #' values of all global-scope parameters (listed in
 #' \url{https://xgboost.readthedocs.io/en/stable/parameter.html}).
+#' @details
+#' Note that serialization-related functions might use a globally-configured number of threads,
+#' which is managed by the system's OpenMP (OMP) configuration instead. Typically, XGBoost methods
+#' accept an `nthreads` parameter, but some methods like `readRDS` might get executed before such
+#' parameter can be supplied.
 #'
+#' The number of OMP threads can in turn be configured for example through an environment variable
+#' `OMP_NUM_THREADS` (needs to be set before R is started), or through `RhpcBLASctl::omp_set_num_threads`.
 #' @rdname xgbConfig
 #' @title Set and get global configuration
 #' @name xgb.set.config, xgb.get.config
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@@ -71,7 +71,6 @@
 #' new.dtest <- xgb.DMatrix(
 #'   data = new.features.test, label = agaricus.test$label, nthread = 2
 #' )
-#' watchlist <- list(train = new.dtrain)
 #' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 #'
 #' # Model accuracy with new features
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -1,6 +1,6 @@
 #' Cross Validation
 #'
-#' The cross validation function of xgboost
+#' The cross validation function of xgboost.
 #'
 #' @param params the list of parameters. The complete list of parameters is
 #'   available in the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. Below
@@ -19,15 +19,19 @@
 #'
 #'   See \code{\link{xgb.train}} for further details.
 #'   See also demo/ for walkthrough example in R.
-#' @param data takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.
+#'
+#' Note that, while `params` accepts a `seed` entry and will use such parameter for model training if
+#' supplied, this seed is not used for creation of train-test splits, which instead rely on R's own RNG
+#' system - thus, for reproducible results, one needs to call the `set.seed` function beforehand.
+#' @param data An `xgb.DMatrix` object, with corresponding fields like `label` or bounds as required
+#'        for model training by the objective.
+#'
+#'        Note that only the basic `xgb.DMatrix` class is supported - variants such as `xgb.QuantileDMatrix`
+#'        or `xgb.ExternalDMatrix` are not supported here.
 #' @param nrounds the max number of iterations
 #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
-#' @param label vector of response values. Should be provided only when data is an R-matrix.
-#' @param missing is only used when input is a dense matrix. By default is set to NA, which means
-#'        that NA values should be considered as 'missing' by the algorithm.
-#'        Sometimes, 0 or other extreme value might be used to represent missing values.
 #' @param prediction A logical value indicating whether to return the test fold predictions
-#'        from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.
+#'        from each CV model. This parameter engages the \code{\link{xgb.cb.cv.predict}} callback.
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation
 #' @param metrics, list of evaluation metrics to be used in cross validation,
 #'   when it is not specified, the evaluation metric is chosen according to objective function.
@@ -47,27 +51,44 @@
 #' @param feval customized evaluation function. Returns
 #'        \code{list(metric='metric-name', value='metric-value')} with given
 #'        prediction and dtrain.
-#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
-#'        by the values of outcome labels.
+#' @param stratified A \code{boolean} indicating whether sampling of folds should be stratified
+#'        by the values of outcome labels. For real-valued labels in regression objectives,
+#'        stratification will be done by discretizing the labels into up to 5 buckets beforehand.
+#'
+#'        If passing "auto", will be set to `TRUE` if the objective in `params` is a classification
+#'        objective (from XGBoost's built-in objectives, doesn't apply to custom ones), and to
+#'        `FALSE` otherwise.
+#'
+#'        This parameter is ignored when `data` has a `group` field - in such case, the splitting
+#'        will be based on whole groups (note that this might make the folds have different sizes).
+#'
+#'        Value `TRUE` here is \bold{not} supported for custom objectives.
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
 #'        (each element must be a vector of test fold's indices). When folds are supplied,
 #'        the \code{nfold} and \code{stratified} parameters are ignored.
+#'
+#'        If `data` has a `group` field and the objective requires this field, each fold (list element)
+#'        must additionally have two attributes (retrievable through \link{attributes}) named `group_test`
+#'        and `group_train`, which should hold the `group` to assign through \link{setinfo.xgb.DMatrix} to
+#'        the resulting DMatrices.
 #' @param train_folds \code{list} list specifying which indicies to use for training. If \code{NULL}
 #'        (the default) all indices not specified in \code{folds} will be used for training.
+#'
+#'        This is not supported when `data` has `group` field.
 #' @param verbose \code{boolean}, print the statistics during the process
 #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
 #'        Default is 1 which means all messages are printed. This parameter is passed to the
-#'        \code{\link{cb.print.evaluation}} callback.
+#'        \code{\link{xgb.cb.print.evaluation}} callback.
 #' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
 #'        If set to an integer \code{k}, training with a validation set will stop if the performance
 #'        doesn't improve for \code{k} rounds.
-#'        Setting this parameter engages the \code{\link{cb.early.stop}} callback.
+#'        Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.
 #' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
 #'        then this parameter must be set as well.
 #'        When it is \code{TRUE}, it means the larger the evaluation score the better.
-#'        This parameter is passed to the \code{\link{cb.early.stop}} callback.
+#'        This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.
 #' @param callbacks a list of callback functions to perform various task during boosting.
-#'        See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+#'        See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
 #'        parameters' values. User can provide either existing or their own callback methods in order
 #'        to customize the training process.
 #' @param ... other parameters to pass to \code{params}.
@@ -90,25 +111,25 @@
 #' \itemize{
 #'   \item \code{call} a function call.
 #'   \item \code{params} parameters that were passed to the xgboost library. Note that it does not
-#'         capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
-#'   \item \code{callbacks} callback functions that were either automatically assigned or
-#'         explicitly passed.
+#'         capture parameters changed by the \code{\link{xgb.cb.reset.parameters}} callback.
 #'   \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
 #'         first column corresponding to iteration number and the rest corresponding to the
 #'         CV-based evaluation means and standard deviations for the training and test CV-sets.
-#'         It is created by the \code{\link{cb.evaluation.log}} callback.
+#'         It is created by the \code{\link{xgb.cb.evaluation.log}} callback.
 #'   \item \code{niter} number of boosting iterations.
 #'   \item \code{nfeatures} number of features in training data.
 #'   \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
 #'         parameter or randomly generated.
 #'   \item \code{best_iteration} iteration number with the best evaluation metric value
 #'         (only available with early stopping).
-#'   \item \code{pred} CV prediction values available when \code{prediction} is set.
-#'         It is either vector or matrix (see \code{\link{cb.cv.predict}}).
-#'   \item \code{models} a list of the CV folds' models. It is only available with the explicit
-#'         setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
 #' }
 #'
+#' Plus other potential elements that are the result of callbacks, such as a list `cv_predict` with
+#' a sub-element `pred` when passing `prediction = TRUE`, which is added by the \link{xgb.cb.cv.predict}
+#' callback (note that one can also pass it manually under `callbacks` with different settings,
+#' such as saving also the models created during cross validation); or a list `early_stop` which
+#' will contain elements such as `best_iteration` when using the early stopping callback (\link{xgb.cb.early.stop}).
+#'
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
@@ -118,13 +139,14 @@
 #' print(cv, verbose=TRUE)
 #'
 #' @export
-xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing = NA,
+xgb.cv <- function(params = list(), data, nrounds, nfold,
                   prediction = FALSE, showsd = TRUE, metrics = list(),
-                   obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, train_folds = NULL,
+                   obj = NULL, feval = NULL, stratified = "auto", folds = NULL, train_folds = NULL,
                   verbose = TRUE, print_every_n = 1L,
                   early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) {

  check.deprecation(...)
+  stopifnot(inherits(data, "xgb.DMatrix"))
  if (inherits(data, "xgb.DMatrix") && .Call(XGCheckNullPtr_R, data)) {
    stop("'data' is an invalid 'xgb.DMatrix' object. Must be constructed again.")
  }
@@ -137,16 +159,22 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
  check.custom.obj()
  check.custom.eval()

-  # Check the labels
-  if ((inherits(data, 'xgb.DMatrix') && !xgb.DMatrix.hasinfo(data, 'label')) ||
-      (!inherits(data, 'xgb.DMatrix') && is.null(label))) {
-    stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix")
-  } else if (inherits(data, 'xgb.DMatrix')) {
-    if (!is.null(label))
-      warning("xgb.cv: label will be ignored, since data is of type xgb.DMatrix")
-    cv_label <- getinfo(data, 'label')
-  } else {
-    cv_label <- label
+  if (stratified == "auto") {
+    if (is.character(params$objective)) {
+      stratified <- (
+        (params$objective %in% .CLASSIFICATION_OBJECTIVES())
+        && !(params$objective %in% .RANKING_OBJECTIVES())
+      )
+    } else {
+      stratified <- FALSE
+    }
+  }
+
+  # Check the labels and groups
+  cv_label <- getinfo(data, "label")
+  cv_group <- getinfo(data, "group")
+  if (!is.null(train_folds) && NROW(cv_group)) {
+    stop("'train_folds' is not supported for DMatrix object with 'group' field.")
  }

  # CV folds
@@ -157,63 +185,64 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
  } else {
    if (nfold <= 1)
      stop("'nfold' must be > 1")
-    folds <- generate.cv.folds(nfold, nrow(data), stratified, cv_label, params)
+    folds <- generate.cv.folds(nfold, nrow(data), stratified, cv_label, cv_group, params)
  }

+  # Callbacks
+  tmp <- .process.callbacks(callbacks, is_cv = TRUE)
+  callbacks <- tmp$callbacks
+  cb_names <- tmp$cb_names
+  rm(tmp)
+
+  # Early stopping callback
+  if (!is.null(early_stopping_rounds) && !("early_stop" %in% cb_names)) {
+    callbacks <- add.callback(
+      callbacks,
+      xgb.cb.early.stop(
+        early_stopping_rounds,
+        maximize = maximize,
+        verbose = verbose
+      ),
+      as_first_elt = TRUE
+    )
+  }
  # verbosity & evaluation printing callback:
  params <- c(params, list(silent = 1))
  print_every_n <- max(as.integer(print_every_n), 1L)
-  if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) {
-    callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd = showsd))
+  if (verbose && !("print_evaluation" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n, showsd = showsd))
  }
  # evaluation log callback: always is on in CV
-  evaluation_log <- list()
-  if (!has.callbacks(callbacks, 'cb.evaluation.log')) {
-    callbacks <- add.cb(callbacks, cb.evaluation.log())
-  }
-  # Early stopping callback
-  stop_condition <- FALSE
-  if (!is.null(early_stopping_rounds) &&
-      !has.callbacks(callbacks, 'cb.early.stop')) {
-    callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
-                                                 maximize = maximize, verbose = verbose))
+  if (!("evaluation_log" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
  }
  # CV-predictions callback
-  if (prediction &&
-      !has.callbacks(callbacks, 'cb.cv.predict')) {
-    callbacks <- add.cb(callbacks, cb.cv.predict(save_models = FALSE))
+  if (prediction && !("cv_predict" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.cv.predict(save_models = FALSE))
  }
-  # Sort the callbacks into categories
-  cb <- categorize.callbacks(callbacks)
-

  # create the booster-folds
  # train_folds
-  dall <- xgb.get.DMatrix(
-    data = data,
-    label = label,
-    missing = missing,
-    weight = NULL,
-    nthread = params$nthread
-  )
+  dall <- data
  bst_folds <- lapply(seq_along(folds), function(k) {
-    dtest  <- xgb.slice.DMatrix(dall, folds[[k]])
+    dtest <- xgb.slice.DMatrix(dall, folds[[k]], allow_groups = TRUE)
    # code originally contributed by @RolandASc on stackoverflow
    if (is.null(train_folds))
-       dtrain <- xgb.slice.DMatrix(dall, unlist(folds[-k]))
+       dtrain <- xgb.slice.DMatrix(dall, unlist(folds[-k]), allow_groups = TRUE)
    else
-       dtrain <- xgb.slice.DMatrix(dall, train_folds[[k]])
+       dtrain <- xgb.slice.DMatrix(dall, train_folds[[k]], allow_groups = TRUE)
+    if (!is.null(attributes(folds[[k]])$group_test)) {
+      setinfo(dtest, "group", attributes(folds[[k]])$group_test)
+      setinfo(dtrain, "group", attributes(folds[[k]])$group_train)
+    }
    bst <- xgb.Booster(
      params = params,
      cachelist = list(dtrain, dtest),
      modelfile = NULL
    )
    bst <- bst$bst
-    list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
+    list(dtrain = dtrain, bst = bst, evals = list(train = dtrain, test = dtest), index = folds[[k]])
  })
-  rm(dall)
-  # a "basket" to collect some results from callbacks
-  basket <- list()

  # extract parameters that can affect the relationship b/w #trees and #iterations
  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
@@ -222,10 +251,25 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
  begin_iteration <- 1
  end_iteration <- nrounds

+  .execute.cb.before.training(
+    callbacks,
+    bst_folds,
+    dall,
+    NULL,
+    begin_iteration,
+    end_iteration
+  )
+
  # synchronous CV boosting: run CV folds' models within each iteration
  for (iteration in begin_iteration:end_iteration) {

-    for (f in cb$pre_iter) f()
+    .execute.cb.before.iter(
+      callbacks,
+      bst_folds,
+      dall,
+      NULL,
+      iteration
+    )

    msg <- lapply(bst_folds, function(fd) {
      xgb.iter.update(
@@ -236,33 +280,42 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
      )
      xgb.iter.eval(
        bst = fd$bst,
-        watchlist = fd$watchlist,
+        evals = fd$evals,
        iter = iteration - 1,
        feval = feval
      )
    })
    msg <- simplify2array(msg)
-    # Note: these variables might look unused here, but they are used in the callbacks
-    bst_evaluation <- rowMeans(msg) # nolint
-    bst_evaluation_err <- apply(msg, 1, sd) # nolint

-    for (f in cb$post_iter) f()
+    should_stop <- .execute.cb.after.iter(
+      callbacks,
+      bst_folds,
+      dall,
+      NULL,
+      iteration,
+      msg
+    )

-    if (stop_condition) break
+    if (should_stop) break
  }
-  for (f in cb$finalize) f(finalize = TRUE)
+  cb_outputs <- .execute.cb.after.training(
+    callbacks,
+    bst_folds,
+    dall,
+    NULL,
+    iteration,
+    msg
+  )

  # the CV result
  ret <- list(
    call = match.call(),
    params = params,
-    callbacks = callbacks,
-    evaluation_log = evaluation_log,
-    niter = end_iteration,
-    nfeatures = ncol(data),
+    niter = iteration,
+    nfeatures = ncol(dall),
    folds = folds
  )
-  ret <- c(ret, basket)
+  ret <- c(ret, cb_outputs)

  class(ret) <- 'xgb.cv.synchronous'
  return(invisible(ret))
@@ -285,8 +338,8 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' train <- agaricus.train
-#' cv <- xgb.cv(data = train$data, label = train$label, nfold = 5, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' cv <- xgb.cv(data = xgb.DMatrix(train$data, label = train$label), nfold = 5, max_depth = 2,
+#'              eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 #' print(cv)
 #' print(cv, verbose=TRUE)
 #'
@@ -308,23 +361,16 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
                paste0('"', unlist(x$params), '"'),
                sep = ' = ', collapse = ', '), '\n', sep = '')
    }
-    if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
-      cat('callbacks:\n')
-      lapply(callback.calls(x$callbacks), function(x) {
-        cat('  ')
-        print(x)
-      })
-    }

    for (n in c('niter', 'best_iteration')) {
-      if (is.null(x[[n]]))
+      if (is.null(x$early_stop[[n]]))
        next
-      cat(n, ': ', x[[n]], '\n', sep = '')
+      cat(n, ': ', x$early_stop[[n]], '\n', sep = '')
    }

-    if (!is.null(x$pred)) {
+    if (!is.null(x$cv_predict$pred)) {
      cat('pred:\n')
-      str(x$pred)
+      str(x$cv_predict$pred)
    }
  }

@@ -332,9 +378,9 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
    cat('evaluation_log:\n')
  print(x$evaluation_log, row.names = FALSE, ...)

-  if (!is.null(x$best_iteration)) {
+  if (!is.null(x$early_stop$best_iteration)) {
    cat('Best iteration:\n')
-    print(x$evaluation_log[x$best_iteration], row.names = FALSE, ...)
+    print(x$evaluation_log[x$early_stop$best_iteration], row.names = FALSE, ...)
  }
  invisible(x)
 }
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@@ -24,6 +24,7 @@
 #' as a \code{character} vector. Otherwise it will return \code{TRUE}.
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #' train <- agaricus.train
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -6,7 +6,7 @@
 #'
 #' @details
 #' The input file is expected to contain a model saved in an xgboost model format
-#' using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
+#' using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some
 #' appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
 #' saved from there in xgboost format, could be loaded from R.
 #'
@@ -20,6 +20,7 @@
 #' \code{\link{xgb.save}}
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #'
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@@ -35,6 +35,7 @@
 #' \code{\link{xgb.load}}
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #'
--- a/R-package/R/xgb.save.raw.R
+++ b/R-package/R/xgb.save.raw.R
@@ -12,6 +12,7 @@
 #' }
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #'
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -114,13 +114,13 @@
 #' @param data training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
 #'        \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.
 #' @param nrounds max number of boosting iterations.
-#' @param watchlist named list of xgb.DMatrix datasets to use for evaluating model performance.
+#' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance.
 #'        Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
 #'        of these datasets during each boosting iteration, and stored in the end as a field named
 #'        \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
-#'        \code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
+#'        \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
 #'        printed out during the training.
-#'        E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+#'        E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
 #'        the performance of each round's model on mat1 and mat2.
 #' @param obj customized objective function. Returns gradient and second order
 #'        gradient with given prediction and dtrain.
@@ -130,31 +130,32 @@
 #' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance.
 #'        If 2, some additional information will be printed out.
 #'        Note that setting \code{verbose > 0} automatically engages the
-#'        \code{cb.print.evaluation(period=1)} callback function.
+#'        \code{xgb.cb.print.evaluation(period=1)} callback function.
 #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
 #'        Default is 1 which means all messages are printed. This parameter is passed to the
-#'        \code{\link{cb.print.evaluation}} callback.
+#'        \code{\link{xgb.cb.print.evaluation}} callback.
 #' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
 #'        If set to an integer \code{k}, training with a validation set will stop if the performance
 #'        doesn't improve for \code{k} rounds.
-#'        Setting this parameter engages the \code{\link{cb.early.stop}} callback.
+#'        Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.
 #' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
 #'        then this parameter must be set as well.
 #'        When it is \code{TRUE}, it means the larger the evaluation score the better.
-#'        This parameter is passed to the \code{\link{cb.early.stop}} callback.
+#'        This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.
 #' @param save_period when it is non-NULL, model is saved to disk after every \code{save_period} rounds,
-#'        0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.
+#'        0 means save at the end. The saving is handled by the \code{\link{xgb.cb.save.model}} callback.
 #' @param save_name the name or path for periodically saved model file.
 #' @param xgb_model a previously built model to continue the training from.
 #'        Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a
 #'        file with a previously saved model.
 #' @param callbacks a list of callback functions to perform various task during boosting.
-#'        See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+#'        See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
 #'        parameters' values. User can provide either existing or their own callback methods in order
 #'        to customize the training process.
 #'
-#'        Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs
-#'        are kept as R attributes, and thus do not get saved when using non-R serializaters like
+#'        Note that some callbacks might try to leave attributes in the resulting model object,
+#'        such as an evaluation log (a `data.table` object) - be aware that these objects are kept
+#'        as R attributes, and thus do not get saved when using XGBoost's own serializaters like
 #'        \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
 #' @param ... other parameters to pass to \code{params}.
 #' @param label vector of response values. Should not be provided when data is
@@ -170,7 +171,7 @@
 #' @details
 #' These are the training functions for \code{xgboost}.
 #'
-#' The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+#' The \code{xgb.train} interface supports advanced features such as \code{evals},
 #' customized objective and evaluation metric functions, therefore it is more flexible
 #' than the \code{xgboost} interface.
 #'
@@ -178,6 +179,11 @@
 #' Number of threads can also be manually specified via the \code{nthread}
 #' parameter.
 #'
+#' While in other interfaces, the default random seed defaults to zero, in R, if a parameter `seed`
+#' is not manually supplied, it will generate a random seed through R's own random number generator,
+#' whose seed in turn is controllable through `set.seed`. If `seed` is passed, it will override the
+#' RNG from R.
+#'
 #' The evaluation metric is chosen automatically by XGBoost (according to the objective)
 #' when the \code{eval_metric} parameter is not provided.
 #' User may set one or several \code{eval_metric} parameters.
@@ -201,18 +207,19 @@
 #'
 #' The following callbacks are automatically created when certain parameters are set:
 #' \itemize{
-#'   \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
+#'   \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
 #'         and the \code{print_every_n} parameter is passed to it.
-#'   \item \code{cb.evaluation.log} is on when \code{watchlist} is present.
-#'   \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
-#'   \item \code{cb.save.model}: when \code{save_period > 0} is set.
+#'   \item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
+#'   \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
+#'   \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
 #' }
 #'
 #' Note that objects of type `xgb.Booster` as returned by this function behave a bit differently
 #' from typical R objects (it's an 'altrep' list class), and it makes a separation between
 #' internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr}
 #' and shared between interfaces through serialization functions like \link{xgb.save}; and
-#' R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise
+#' R-specific attributes (typically the result from a callback), accessed through \link{attributes}
+#' and \link{attr}, which are otherwise
 #' only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and
 #' not anyhow used by functions like \link{predict.xgb.Booster}.
 #'
@@ -224,7 +231,7 @@
 #' effect elsewhere.
 #'
 #' @seealso
-#' \code{\link{callbacks}},
+#' \code{\link{xgb.Callback}},
 #' \code{\link{predict.xgb.Booster}},
 #' \code{\link{xgb.cv}}
 #'
@@ -247,12 +254,12 @@
 #' dtest <- with(
 #'   agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
 #' )
-#' watchlist <- list(train = dtrain, eval = dtest)
+#' evals <- list(train = dtrain, eval = dtest)
 #'
 #' ## A simple xgb.train example:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 #'
 #' ## An xgb.train example where custom objective and evaluation metric are
 #' ## used:
@@ -273,15 +280,15 @@
 #' #  as 'objective' and 'eval_metric' parameters in the params list:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = logregobj, eval_metric = evalerror)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 #'
 #' #  or through the ... arguments:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
 #'                  objective = logregobj, eval_metric = evalerror)
 #'
 #' #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
 #'                  obj = logregobj, feval = evalerror)
 #'
 #'
@@ -289,11 +296,11 @@
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
 #' my_etas <- list(eta = c(0.5, 0.1))
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-#'                  callbacks = list(cb.reset.parameters(my_etas)))
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+#'                  callbacks = list(xgb.cb.reset.parameters(my_etas)))
 #'
 #' ## Early stopping:
-#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
 #'                  early_stopping_rounds = 3)
 #'
 #' ## An 'xgboost' interface example:
@@ -304,7 +311,7 @@
 #'
 #' @rdname xgb.train
 #' @export
-xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
+xgb.train <- function(params = list(), data, nrounds, evals = list(),
                      obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
                      early_stopping_rounds = NULL, maximize = NULL,
                      save_period = NULL, save_name = "xgboost.model",
@@ -317,68 +324,68 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  check.custom.obj()
  check.custom.eval()

-  # data & watchlist checks
+  # data & evals checks
  dtrain <- data
  if (!inherits(dtrain, "xgb.DMatrix"))
    stop("second argument dtrain must be xgb.DMatrix")
-  if (length(watchlist) > 0) {
-    if (typeof(watchlist) != "list" ||
-        !all(vapply(watchlist, inherits, logical(1), what = 'xgb.DMatrix')))
-      stop("watchlist must be a list of xgb.DMatrix elements")
-    evnames <- names(watchlist)
+  if (length(evals) > 0) {
+    if (typeof(evals) != "list" ||
+        !all(vapply(evals, inherits, logical(1), what = 'xgb.DMatrix')))
+      stop("'evals' must be a list of xgb.DMatrix elements")
+    evnames <- names(evals)
    if (is.null(evnames) || any(evnames == ""))
-      stop("each element of the watchlist must have a name tag")
+      stop("each element of 'evals' must have a name tag")
  }
  # Handle multiple evaluation metrics given as a list
  for (m in params$eval_metric) {
    params <- c(params, list(eval_metric = m))
  }

-  # evaluation printing callback
  params <- c(params)
-  print_every_n <- max(as.integer(print_every_n), 1L)
-  if (!has.callbacks(callbacks, 'cb.print.evaluation') &&
-      verbose) {
-    callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n))
-  }
-  # evaluation log callback:  it is automatically enabled when watchlist is provided
-  evaluation_log <- list()
-  if (!has.callbacks(callbacks, 'cb.evaluation.log') &&
-      length(watchlist) > 0) {
-    callbacks <- add.cb(callbacks, cb.evaluation.log())
-  }
-  # Model saving callback
-  if (!is.null(save_period) &&
-      !has.callbacks(callbacks, 'cb.save.model')) {
-    callbacks <- add.cb(callbacks, cb.save.model(save_period, save_name))
-  }
-  # Early stopping callback
-  stop_condition <- FALSE
-  if (!is.null(early_stopping_rounds) &&
-      !has.callbacks(callbacks, 'cb.early.stop')) {
-    callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
-                                                 maximize = maximize, verbose = verbose))
+  params['validate_parameters'] <- TRUE
+  if (!("seed" %in% names(params))) {
+    params[["seed"]] <- sample(.Machine$integer.max, size = 1)
  }

-  # Sort the callbacks into categories
-  cb <- categorize.callbacks(callbacks)
-  params['validate_parameters'] <- TRUE
-  if (!is.null(params[['seed']])) {
-    warning("xgb.train: `seed` is ignored in R package.  Use `set.seed()` instead.")
+  # callbacks
+  tmp <- .process.callbacks(callbacks, is_cv = FALSE)
+  callbacks <- tmp$callbacks
+  cb_names <- tmp$cb_names
+  rm(tmp)
+
+  # Early stopping callback (should always come first)
+  if (!is.null(early_stopping_rounds) && !("early_stop" %in% cb_names)) {
+    callbacks <- add.callback(
+      callbacks,
+      xgb.cb.early.stop(
+        early_stopping_rounds,
+        maximize = maximize,
+        verbose = verbose
+      ),
+      as_first_elt = TRUE
+    )
+  }
+  # evaluation printing callback
+  print_every_n <- max(as.integer(print_every_n), 1L)
+  if (verbose && !("print_evaluation" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n))
+  }
+  # evaluation log callback:  it is automatically enabled when 'evals' is provided
+  if (length(evals) && !("evaluation_log" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
+  }
+  # Model saving callback
+  if (!is.null(save_period) && !("save_model" %in% cb_names)) {
+    callbacks <- add.callback(callbacks, xgb.cb.save.model(save_period, save_name))
  }

  # The tree updating process would need slightly different handling
  is_update <- NVL(params[['process_type']], '.') == 'update'

-  past_evaluation_log <- NULL
-  if (inherits(xgb_model, "xgb.Booster")) {
-    past_evaluation_log <- attributes(xgb_model)$evaluation_log
-  }
-
  # Construct a booster (either a new one or load from xgb_model)
  bst <- xgb.Booster(
    params = params,
-    cachelist = append(watchlist, dtrain),
+    cachelist = append(evals, dtrain),
    modelfile = xgb_model
  )
  niter_init <- bst$niter
@@ -389,11 +396,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
    dtrain
  )

-  # extract parameters that can affect the relationship b/w #trees and #iterations
-  # Note: it might look like these aren't used, but they need to be defined in this
-  # environment for the callbacks for work correctly.
-  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
-
  if (is_update && nrounds > niter_init)
    stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")

@@ -401,57 +403,83 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  begin_iteration <- niter_skip + 1
  end_iteration <- niter_skip + nrounds

+  .execute.cb.before.training(
+    callbacks,
+    bst,
+    dtrain,
+    evals,
+    begin_iteration,
+    end_iteration
+  )
+
  # the main loop for boosting iterations
  for (iteration in begin_iteration:end_iteration) {

-    for (f in cb$pre_iter) f()
-
-    xgb.iter.update(
-        bst = bst,
-        dtrain = dtrain,
-        iter = iteration - 1,
-        obj = obj
+    .execute.cb.before.iter(
+      callbacks,
+      bst,
+      dtrain,
+      evals,
+      iteration
    )

-    if (length(watchlist) > 0) {
-      bst_evaluation <- xgb.iter.eval(  # nolint: object_usage_linter
+    xgb.iter.update(
+      bst = bst,
+      dtrain = dtrain,
+      iter = iteration - 1,
+      obj = obj
+    )
+
+    bst_evaluation <- NULL
+    if (length(evals) > 0) {
+      bst_evaluation <- xgb.iter.eval(
        bst = bst,
-        watchlist = watchlist,
+        evals = evals,
        iter = iteration - 1,
        feval = feval
      )
    }

-    for (f in cb$post_iter) f()
+    should_stop <- .execute.cb.after.iter(
+      callbacks,
+      bst,
+      dtrain,
+      evals,
+      iteration,
+      bst_evaluation
+    )

-    if (stop_condition) break
+    if (should_stop) break
  }
-  for (f in cb$finalize) f(finalize = TRUE)

-  # store the evaluation results
-  keep_evaluation_log <- FALSE
-  if (length(evaluation_log) > 0 && nrow(evaluation_log) > 0) {
-    keep_evaluation_log <- TRUE
-    # include the previous compatible history when available
-    if (inherits(xgb_model, 'xgb.Booster') &&
-        !is_update &&
-        !is.null(past_evaluation_log) &&
-        isTRUE(all.equal(colnames(evaluation_log),
-                         colnames(past_evaluation_log)))) {
-      evaluation_log <- rbindlist(list(past_evaluation_log, evaluation_log))
-    }
-  }
+  cb_outputs <- .execute.cb.after.training(
+    callbacks,
+    bst,
+    dtrain,
+    evals,
+    iteration,
+    bst_evaluation
+  )

  extra_attrs <- list(
    call = match.call(),
-    params = params,
-    callbacks = callbacks
+    params = params
  )
-  if (keep_evaluation_log) {
-    extra_attrs$evaluation_log <- evaluation_log
-  }
+
  curr_attrs <- attributes(bst)
-  attributes(bst) <- c(curr_attrs, extra_attrs)
+  if (NROW(curr_attrs)) {
+    curr_attrs <- curr_attrs[
+      setdiff(
+        names(curr_attrs),
+        c(names(extra_attrs), names(cb_outputs))
+      )
+    ]
+  }
+  curr_attrs <- c(extra_attrs, curr_attrs)
+  if (NROW(cb_outputs)) {
+    curr_attrs <- c(curr_attrs, cb_outputs)
+  }
+  attributes(bst) <- curr_attrs

  return(bst)
 }
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -18,9 +18,9 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
    nthread = merged$nthread
  )

-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)

-  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n = print_every_n,
+  bst <- xgb.train(params, dtrain, nrounds, evals, verbose = verbose, print_every_n = print_every_n,
                   early_stopping_rounds = early_stopping_rounds, maximize = maximize,
                   save_period = save_period, save_name = save_name,
                   xgb_model = xgb_model, callbacks = callbacks, ...)
@@ -82,12 +82,8 @@ NULL
 NULL

 # Various imports
-#' @importClassesFrom Matrix dgCMatrix dgeMatrix dgRMatrix
-#' @importFrom Matrix colSums
+#' @importClassesFrom Matrix dgCMatrix dgRMatrix CsparseMatrix
 #' @importFrom Matrix sparse.model.matrix
-#' @importFrom Matrix sparseVector
-#' @importFrom Matrix sparseMatrix
-#' @importFrom Matrix t
 #' @importFrom data.table data.table
 #' @importFrom data.table is.data.table
 #' @importFrom data.table as.data.table
@@ -103,6 +99,7 @@ NULL
 #' @importFrom stats coef
 #' @importFrom stats predict
 #' @importFrom stats median
+#' @importFrom stats sd
 #' @importFrom stats variable.names
 #' @importFrom utils head
 #' @importFrom graphics barplot
--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
@@ -55,6 +55,8 @@ print(paste("test-error=", err))
 # save model to binary local file
 xgb.save(bst, "xgboost.model")
 # load binary model to R
+# Function doesn't take 'nthreads', but can be set like this:
+RhpcBLASctl::omp_set_num_threads(1)
 bst2 <- xgb.load("xgboost.model")
 pred2 <- predict(bst2, test$data)
 # pred2 should be identical to pred
@@ -72,17 +74,17 @@ print(paste("sum(abs(pred3-pred))=", sum(abs(pred3 - pred))))
 # to use advanced features, we need to put data in xgb.DMatrix
 dtrain <- xgb.DMatrix(data = train$data, label = train$label)
 dtest <- xgb.DMatrix(data = test$data, label = test$label)
-#---------------Using watchlist----------------
-# watchlist is a list of xgb.DMatrix, each of them is tagged with name
-watchlist <- list(train = dtrain, test = dtest)
-# to train with watchlist, use xgb.train, which contains more advanced features
-# watchlist allows us to monitor the evaluation result on all data in the list
-print("Train xgboost using xgb.train with watchlist")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+#---------------Using an evaluation set----------------
+# 'evals' is a list of xgb.DMatrix, each of them is tagged with name
+evals <- list(train = dtrain, test = dtest)
+# to train with an evaluation set, use xgb.train, which contains more advanced features
+# 'evals' argument allows us to monitor the evaluation result on all data in the list
+print("Train xgboost using xgb.train with evaluation data")
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                 nthread = 2, objective = "binary:logistic")
 # we can change evaluation metrics, or use multiple evaluation metrics
-print("train xgboost using xgb.train with watchlist, watch logloss and error")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+print("train xgboost using xgb.train with evaluation data, watch logloss and error")
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                 eval_metric = "error", eval_metric = "logloss",
                 nthread = 2, objective = "binary:logistic")

@@ -90,7 +92,7 @@ bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist =
 xgb.DMatrix.save(dtrain, "dtrain.buffer")
 # to load it in, simply call xgb.DMatrix
 dtrain2 <- xgb.DMatrix("dtrain.buffer")
-bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                 nthread = 2, objective = "binary:logistic")
 # information can be extracted from xgb.DMatrix using getinfo
 label <- getinfo(dtest, "label")
--- a/R-package/demo/boost_from_prediction.R
+++ b/R-package/demo/boost_from_prediction.R
@@ -5,14 +5,14 @@ data(agaricus.test, package = 'xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 ###
 # advanced: start from a initial base prediction
 #
 print('start running example to start from a initial prediction')
 # train xgboost for 1 round
 param <- list(max_depth = 2, eta = 1, nthread = 2, objective = 'binary:logistic')
-bst <- xgb.train(param, dtrain, 1, watchlist)
+bst <- xgb.train(param, dtrain, 1, evals)
 # Note: we need the margin value instead of transformed prediction in set_base_margin
 # do predict with output_margin=TRUE, will always give you margin values before logistic transformation
 ptrain <- predict(bst, dtrain, outputmargin = TRUE)
@@ -23,4 +23,4 @@ setinfo(dtrain, "base_margin", ptrain)
 setinfo(dtest, "base_margin", ptest)

 print('this is result of boost from initial prediction')
-bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist)
+bst <- xgb.train(params = param, data = dtrain, nrounds = 1, evals = evals)
--- a/R-package/demo/custom_objective.R
+++ b/R-package/demo/custom_objective.R
@@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 num_round <- 2

 # user define objective function, given prediction, return gradient and second order gradient
@@ -38,7 +38,7 @@ param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
 print('start training with user customized objective')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)

 #
 # there can be cases where you want additional information
@@ -62,4 +62,4 @@ param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
 print('start training with user customized objective, with additional attributes in DMatrix')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
 param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
-watchlist <- list(eval = dtest)
+evals <- list(eval = dtest)
 num_round <- 20
 # user define objective function, given prediction, return gradient and second order gradient
 # this is log likelihood loss
@@ -32,7 +32,7 @@ evalerror <- function(preds, dtrain) {
 }
 print('start training with early Stopping setting')

-bst <- xgb.train(param, dtrain, num_round, watchlist,
+bst <- xgb.train(param, dtrain, num_round, evals,
                 objective = logregobj, eval_metric = evalerror, maximize = FALSE,
                 early_stopping_round = 3)
 bst <- xgb.cv(param, dtrain, num_round, nfold = 5,
--- a/R-package/demo/generalized_linear_model.R
+++ b/R-package/demo/generalized_linear_model.R
@@ -25,9 +25,9 @@ param <- list(objective = "binary:logistic", booster = "gblinear",
 ##
 # the rest of settings are the same
 ##
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 num_round <- 2
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
 ypred <- predict(bst, dtest)
 labels <- getinfo(dtest, 'label')
 cat('error of preds=', mean(as.numeric(ypred > 0.5) != labels), '\n')
--- a/R-package/demo/gpu_accelerated.R
+++ b/R-package/demo/gpu_accelerated.R
@@ -23,7 +23,7 @@ y <- rbinom(N, 1, plogis(m))
 tr <- sample.int(N, N * 0.75)
 dtrain <- xgb.DMatrix(X[tr, ], label = y[tr])
 dtest <- xgb.DMatrix(X[-tr, ], label = y[-tr])
-wl <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)

 # An example of running 'gpu_hist' algorithm
 # which is
@@ -35,11 +35,11 @@ wl <- list(train = dtrain, test = dtest)
 param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
              max_bin = 64, tree_method = 'gpu_hist')
 pt <- proc.time()
-bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_gpu <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
 proc.time() - pt

 # Compare to the 'hist' algorithm:
 param$tree_method <- 'hist'
 pt <- proc.time()
-bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_hist <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
 proc.time() - pt
--- a/R-package/demo/predict_first_ntree.R
+++ b/R-package/demo/predict_first_ntree.R
@@ -6,11 +6,11 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

 param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 nrounds <- 2

 # training the model for two rounds
-bst <- xgb.train(param, dtrain, nrounds, nthread = 2, watchlist)
+bst <- xgb.train(param, dtrain, nrounds, nthread = 2, evals = evals)
 cat('start testing prediction from first n trees\n')
 labels <- getinfo(dtest, 'label')

--- a/R-package/demo/predict_leaf_indices.R
+++ b/R-package/demo/predict_leaf_indices.R
@@ -43,7 +43,6 @@ colnames(new.features.test) <- colnames(new.features.train)
 # learning with new features
 new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
 new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
-watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)

 # Model accuracy with new features
--- a/R-package/demo/tweedie_regression.R
+++ b/R-package/demo/tweedie_regression.R
@@ -39,7 +39,7 @@ bst <- xgb.train(
  data = d_train,
  params = params,
  maximize = FALSE,
-  watchlist = list(train = d_train),
+  evals = list(train = d_train),
  nrounds = 20)

 var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)
--- a/R-package/man/callbacks.Rd
+++ b/R-package/man/callbacks.Rd
@@ -1,37 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{callbacks}
-\alias{callbacks}
-\title{Callback closures for booster training.}
-\description{
-These are used to perform various service tasks either during boosting iterations or at the end.
-This approach helps to modularize many of such tasks without bloating the main training methods,
-and it offers .
-}
-\details{
-By default, a callback function is run after each boosting iteration.
-An R-attribute \code{is_pre_iteration} could be set for a callback to define a pre-iteration function.
-
-When a callback function has \code{finalize} parameter, its finalizer part will also be run after
-the boosting is completed.
-
-WARNING: side-effects!!! Be aware that these callback functions access and modify things in
-the environment from which they are called from, which is a fairly uncommon thing to do in R.
-
-To write a custom callback closure, make sure you first understand the main concepts about R environments.
-Check either R documentation on \code{\link[base]{environment}} or the
-\href{http://adv-r.had.co.nz/Environments.html}{Environments chapter} from the "Advanced R"
-book by Hadley Wickham. Further, the best option is to read the code of some of the existing callbacks -
-choose ones that do something similar to what you want to achieve. Also, you would need to get familiar
-with the objects available inside of the \code{xgb.train} and \code{xgb.cv} internal environments.
-}
-\seealso{
-\code{\link{cb.print.evaluation}},
-\code{\link{cb.evaluation.log}},
-\code{\link{cb.reset.parameters}},
-\code{\link{cb.early.stop}},
-\code{\link{cb.save.model}},
-\code{\link{cb.cv.predict}},
-\code{\link{xgb.train}},
-\code{\link{xgb.cv}}
-}
--- a/R-package/man/cb.early.stop.Rd
+++ b/R-package/man/cb.early.stop.Rd
@@ -1,62 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.early.stop}
-\alias{cb.early.stop}
-\title{Callback closure to activate the early stopping.}
-\usage{
-cb.early.stop(
-  stopping_rounds,
-  maximize = FALSE,
-  metric_name = NULL,
-  verbose = TRUE
-)
-}
-\arguments{
-\item{stopping_rounds}{The number of rounds with no improvement in
-the evaluation metric in order to stop the training.}
-
-\item{maximize}{whether to maximize the evaluation metric}
-
-\item{metric_name}{the name of an evaluation column to use as a criteria for early
-stopping. If not set, the last column would be used.
-Let's say the test data in \code{watchlist} was labelled as \code{dtest},
-and one wants to use the AUC in test data for early stopping regardless of where
-it is in the \code{watchlist}, then one of the following would need to be set:
-\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
-All dash '-' characters in metric names are considered equivalent to '_'.}
-
-\item{verbose}{whether to print the early stopping information.}
-}
-\description{
-Callback closure to activate the early stopping.
-}
-\details{
-This callback function determines the condition for early stopping
-by setting the \code{stop_condition = TRUE} flag in its calling frame.
-
-The following additional fields are assigned to the model's R object:
-\itemize{
-\item \code{best_score} the evaluation score at the best iteration
-\item \code{best_iteration} at which boosting iteration the best score has occurred (1-based index)
-}
-The Same values are also stored as xgb-attributes:
-\itemize{
-\item \code{best_iteration} is stored as a 0-based iteration index (for interoperability of binary models)
-\item \code{best_msg} message string is also stored.
-}
-
-At least one data element is required in the evaluation watchlist for early stopping to work.
-
-Callback function expects the following values to be set in its calling frame:
-\code{stop_condition},
-\code{bst_evaluation},
-\code{rank},
-\code{bst} (or \code{bst_folds} and \code{basket}),
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration},
-}
-\seealso{
-\code{\link{callbacks}},
-\code{\link{xgb.attr}}
-}
--- a/R-package/man/cb.evaluation.log.Rd
+++ b/R-package/man/cb.evaluation.log.Rd
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.evaluation.log}
-\alias{cb.evaluation.log}
-\title{Callback closure for logging the evaluation history}
-\usage{
-cb.evaluation.log()
-}
-\description{
-Callback closure for logging the evaluation history
-}
-\details{
-This callback function appends the current iteration evaluation results \code{bst_evaluation}
-available in the calling parent frame to the \code{evaluation_log} list in a calling frame.
-
-The finalizer callback (called with \code{finalize = TURE} in the end) converts
-the \code{evaluation_log} list into a final data.table.
-
-The iteration evaluation result \code{bst_evaluation} must be a named numeric vector.
-
-Note: in the column names of the final data.table, the dash '-' character is replaced with
-the underscore '_' in order to make the column names more like regular R identifiers.
-
-Callback function expects the following values to be set in its calling frame:
-\code{evaluation_log},
-\code{bst_evaluation},
-\code{iteration}.
-}
-\seealso{
-\code{\link{callbacks}}
-}
--- a/R-package/man/cb.print.evaluation.Rd
+++ b/R-package/man/cb.print.evaluation.Rd
@@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.print.evaluation}
-\alias{cb.print.evaluation}
-\title{Callback closure for printing the result of evaluation}
-\usage{
-cb.print.evaluation(period = 1, showsd = TRUE)
-}
-\arguments{
-\item{period}{results would be printed every number of periods}
-
-\item{showsd}{whether standard deviations should be printed (when available)}
-}
-\description{
-Callback closure for printing the result of evaluation
-}
-\details{
-The callback function prints the result of evaluation at every \code{period} iterations.
-The initial and the last iteration's evaluations are always printed.
-
-Callback function expects the following values to be set in its calling frame:
-\code{bst_evaluation} (also \code{bst_evaluation_err} when available),
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration}.
-}
-\seealso{
-\code{\link{callbacks}}
-}
--- a/R-package/man/cb.save.model.Rd
+++ b/R-package/man/cb.save.model.Rd
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.save.model}
-\alias{cb.save.model}
-\title{Callback closure for saving a model file.}
-\usage{
-cb.save.model(save_period = 0, save_name = "xgboost.ubj")
-}
-\arguments{
-\item{save_period}{save the model to disk after every
-\code{save_period} iterations; 0 means save the model at the end.}
-
-\item{save_name}{the name or path for the saved model file.
-
-\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that the format of the model being saved is determined by the file
-   extension specified here (see \link{xgb.save} for details about how it works).
-
-   It can contain a \code{\link[base]{sprintf}} formatting specifier
-   to include the integer iteration number in the file name.
-   E.g., with \code{save_name} = 'xgboost_\%04d.ubj',
-   the file saved at iteration 50 would be named "xgboost_0050.ubj".
-}\if{html}{\out{</div>}}}
-}
-\description{
-Callback closure for saving a model file.
-}
-\details{
-This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end.
-
-Callback function expects the following values to be set in its calling frame:
-\code{bst},
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration}.
-}
-\seealso{
-\link{xgb.save}
-
-\code{\link{callbacks}}
-}
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -18,25 +18,47 @@
  iterationrange = NULL,
  strict_shape = FALSE,
  validate_features = FALSE,
+  base_margin = NULL,
  ...
 )
 }
 \arguments{
 \item{object}{Object of class \code{xgb.Booster}.}

-\item{newdata}{Takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
+\item{newdata}{Takes \code{data.frame}, \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
 local data file, or \code{xgb.DMatrix}.
-For single-row predictions on sparse data, it is recommended to use the CSR format.
-If passing a sparse vector, it will take it as a row vector.}

-\item{missing}{Only used when input is a dense matrix. Pick a float value that represents
-missing values in data (e.g., 0 or some other extreme value).}
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   For single-row predictions on sparse data, it's recommended to use CSR format. If passing
+   a sparse vector, it will take it as a row vector.
+
+   Note that, for repeated predictions on the same data, one might want to create a DMatrix to
+   pass here instead of passing R types like matrices or data frames, as predictions will be
+   faster on DMatrix.
+
+   If `newdata` is a `data.frame`, be aware that:\\itemize\{
+   \\item Columns will be converted to numeric if they aren't already, which could potentially make
+         the operation slower than in an equivalent `matrix` object.
+   \\item The order of the columns must match with that of the data from which the model was fitted
+         (i.e. columns will not be referenced by their names, just by their order in the data).
+   \\item If the model was fitted to data with categorical columns, these columns must be of
+         `factor` type here, and must use the same encoding (i.e. have the same levels).
+   \\item If `newdata` contains any `factor` columns, they will be converted to base-0
+         encoding (same as during DMatrix creation) - hence, one should not pass a `factor`
+         under a column which during training had a different type.
+   \}
+}\if{html}{\out{</div>}}}
+
+\item{missing}{Float value that represents missing values in data (e.g., 0 or some other extreme value).
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   This parameter is not used when `newdata` is an `xgb.DMatrix` - in such cases, should pass
+   this as an argument to the DMatrix constructor instead.
+}\if{html}{\out{</div>}}}

 \item{outputmargin}{Whether the prediction should be returned in the form of original untransformed
 sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
 logistic regression would return log-odds instead of probabilities.}

-\item{predleaf}{Whether to predict pre-tree leaf indices.}
+\item{predleaf}{Whether to predict per-tree leaf indices.}

 \item{predcontrib}{Whether to return feature contributions to individual predictions (see Details).}

@@ -48,7 +70,7 @@ logistic regression would return log-odds instead of probabilities.}
 prediction outputs per case. No effect if \code{predleaf}, \code{predcontrib},
 or \code{predinteraction} is \code{TRUE}.}

-\item{training}{Whether the predictions are used for training. For dart booster,
+\item{training}{Whether the prediction result is used for training. For dart booster,
 training predicting will perform dropout.}

 \item{iterationrange}{Sequence of rounds/iterations from the model to use for prediction, specified by passing
@@ -84,6 +106,13 @@ match (only applicable when both \code{object} and \code{newdata} have feature n
   recommended to disable it for performance-sensitive applications.
 }\if{html}{\out{</div>}}}

+\item{base_margin}{Base margin used for boosting from existing model.
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
+   be ignored as it needs to be added to the DMatrix instead (e.g. by passing it as
+   an argument in its constructor, or by calling \link{setinfo.xgb.DMatrix}).
+}\if{html}{\out{</div>}}}
+
 \item{...}{Not used.}
 }
 \value{
@@ -115,7 +144,7 @@ When \code{strict_shape = TRUE}, the output is always an array:
 }
 }
 \description{
-Predicted values based on either xgboost model or model handle object.
+Predict values on data based on xgboost model.
 }
 \details{
 Note that \code{iterationrange} would currently do nothing for predictions from "gblinear",
--- a/R-package/man/print.xgb.cv.Rd
+++ b/R-package/man/print.xgb.cv.Rd
@@ -23,8 +23,8 @@ including the best iteration (when available).
 \examples{
 data(agaricus.train, package='xgboost')
 train <- agaricus.train
-cv <- xgb.cv(data = train$data, label = train$label, nfold = 5, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+cv <- xgb.cv(data = xgb.DMatrix(train$data, label = train$label), nfold = 5, max_depth = 2,
+             eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 print(cv)
 print(cv, verbose=TRUE)

--- a/R-package/man/xgb.Callback.Rd
+++ b/R-package/man/xgb.Callback.Rd
@@ -0,0 +1,248 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.Callback}
+\alias{xgb.Callback}
+\title{XGBoost Callback Constructor}
+\usage{
+xgb.Callback(
+  cb_name = "custom_callback",
+  env = new.env(),
+  f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)
+    NULL,
+  f_before_iter = function(env, model, data, evals, iteration) NULL,
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
+  f_after_training = function(env, model, data, evals, iteration, final_feval,
+    prev_cb_res) NULL
+)
+}
+\arguments{
+\item{cb_name}{Name for the callback.
+
+If the callback produces some non-NULL result (from executing the function passed under
+\code{f_after_training}), that result will be added as an R attribute to the resulting booster
+(or as a named element in the result of CV), with the attribute name specified here.
+
+Names of callbacks must be unique - i.e. there cannot be two callbacks with the same name.}
+
+\item{env}{An environment object that will be passed to the different functions in the callback.
+Note that this environment will not be shared with other callbacks.}
+
+\item{f_before_training}{A function that will be executed before the training has started.
+
+If passing \code{NULL} for this or for the other function inputs, then no function will be executed.
+
+If passing a function, it will be called with parameters supplied as non-named arguments
+matching the function signatures that are shown in the default value for each function argument.}
+
+\item{f_before_iter}{A function that will be executed before each boosting round.
+
+This function can signal whether the training should be finalized or not, by outputting
+a value that evaluates to \code{TRUE} - i.e. if the output from the function provided here at
+a given round is \code{TRUE}, then training will be stopped before the current iteration happens.
+
+Return values of \code{NULL} will be interpreted as \code{FALSE}.}
+
+\item{f_after_iter}{A function that will be executed after each boosting round.
+
+This function can signal whether the training should be finalized or not, by outputting
+a value that evaluates to \code{TRUE} - i.e. if the output from the function provided here at
+a given round is \code{TRUE}, then training will be stopped at that round.
+
+Return values of \code{NULL} will be interpreted as \code{FALSE}.}
+
+\item{f_after_training}{A function that will be executed after training is finished.
+
+This function can optionally output something non-NULL, which will become part of the R
+attributes of the booster (assuming one passes \code{keep_extra_attributes=TRUE} to \link{xgb.train})
+under the name supplied for parameter \code{cb_name} imn the case of \link{xgb.train}; or a part
+of the named elements in the result of \link{xgb.cv}.}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+Constructor for defining the structure of callback functions that can be executed
+at different stages of model training (before / after training, before / after each boosting
+iteration).
+}
+\details{
+Arguments that will be passed to the supplied functions are as follows:\itemize{
+
+\item env The same environment that is passed under argument \code{env}.
+
+It may be modified by the functions in order to e.g. keep tracking of what happens
+across iterations or similar.
+
+This environment is only used by the functions supplied to the callback, and will
+not be kept after the model fitting function terminates (see parameter \code{f_after_training}).
+
+\item model The booster object when using \link{xgb.train}, or the folds when using
+\link{xgb.cv}.
+
+For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
+\item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
+\item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
+\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
+(\code{test} is the held-out data for the fold).
+\item \code{index}: The indices of the hold-out data for that fold (base-1 indexing),
+from which the \code{test} entry in \code{evals} was obtained.
+}
+
+This object should \bold{not} be in-place modified in ways that conflict with the
+training (e.g. resetting the parameters for a training update in a way that resets
+the number of rounds to zero in order to overwrite rounds).
+
+Note that any R attributes that are assigned to the booster during the callback functions,
+will not be kept thereafter as the booster object variable is not re-assigned during
+training. It is however possible to set C-level attributes of the booster through
+\link{xgb.attr} or \link{xgb.attributes}, which should remain available for the rest
+of the iterations and after the training is done.
+
+For keeping variables across iterations, it's recommended to use \code{env} instead.
+\item data The data to which the model is being fit, as an \code{xgb.DMatrix} object.
+
+Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
+folds can be found in the \code{model} object.
+
+\item evals The evaluation data, as passed under argument \code{evals} to
+\link{xgb.train}.
+
+For \link{xgb.cv}, this will always be \code{NULL}.
+
+\item begin_iteration Index of the first boosting iteration that will be executed
+(base-1 indexing).
+
+This will typically be '1', but when using training continuation, depending on the
+parameters for updates, boosting rounds will be continued from where the previous
+model ended, in which case this will be larger than 1.
+
+\item end_iteration Index of the last boostign iteration that will be executed
+(base-1 indexing, inclusive of this end).
+
+It should match with argument \code{nrounds} passed to \link{xgb.train} or \link{xgb.cv}.
+
+Note that boosting might be interrupted before reaching this last iteration, for
+example by using the early stopping callback \link{xgb.cb.early.stop}.
+
+\item iteration Index of the iteration number that is being executed (first iteration
+will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).
+
+\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
+determined by the objective, or by parameter \code{feval}.
+
+For \link{xgb.train}, this will be a named vector with one entry per element in
+\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
+example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
+this will be a one-element vector with name "tr-rmse".
+
+For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
+where the row names will follow the same naming logic as the one-dimensional vector
+that is passed in \link{xgb.train}.
+
+Note that, internally, the built-in callbacks such as \link{xgb.cb.print.evaluation} summarize
+this table by calculating the row-wise means and standard deviations.
+
+\item final_feval The evaluation results after the last boosting round is executed
+(same format as \code{iter_feval}, and will be the exact same input as passed under
+\code{iter_feval} to the last round that is executed during model fitting).
+
+\item prev_cb_res Result from a previous run of a callback sharing the same name
+(as given by parameter \code{cb_name}) when conducting training continuation, if there
+was any in the booster R attributes.
+
+Some times, one might want to append the new results to the previous one, and this will
+be done automatically by the built-in callbacks such as \link{xgb.cb.evaluation.log},
+which will append the new rows to the previous table.
+
+If no such previous callback result is available (which it never will when fitting
+a model from start instead of updating an existing model), this will be \code{NULL}.
+
+For \link{xgb.cv}, which doesn't support training continuation, this will always be \code{NULL}.
+}
+
+The following names (\code{cb_name} values) are reserved for internal callbacks:\itemize{
+\item print_evaluation
+\item evaluation_log
+\item reset_parameters
+\item early_stop
+\item save_model
+\item cv_predict
+\item gblinear_history
+}
+
+The following names are reserved for other non-callback attributes:\itemize{
+\item names
+\item class
+\item call
+\item params
+\item niter
+\item nfeatures
+\item folds
+}
+
+When using the built-in early stopping callback (\link{xgb.cb.early.stop}), said callback
+will always be executed before the others, as it sets some booster C-level attributes
+that other callbacks might also use. Otherwise, the order of execution will match with
+the order in which the callbacks are passed to the model fitting function.
+}
+\examples{
+# Example constructing a custom callback that calculates
+# squared error on the training data (no separate test set),
+# and outputs the per-iteration results.
+ssq_callback <- xgb.Callback(
+  cb_name = "ssq",
+  f_before_training = function(env, model, data, evals,
+                               begin_iteration, end_iteration) {
+    # A vector to keep track of a number at each iteration
+    env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
+  },
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
+    # This calculates the sum of squared errors on the training data.
+    # Note that this can be better done by passing an 'evals' entry,
+    # but this demonstrates a way in which callbacks can be structured.
+    pred <- predict(model, data)
+    err <- pred - getinfo(data, "label")
+    sq_err <- sum(err^2)
+    env$logs[iteration] <- sq_err
+    cat(
+      sprintf(
+        "Squared error at iteration \%d: \%.2f\n",
+        iteration, sq_err
+      )
+    )
+
+    # A return value of 'TRUE' here would signal to finalize the training
+    return(FALSE)
+  },
+  f_after_training = function(env, model, data, evals, iteration,
+                              final_feval, prev_cb_res) {
+    return(env$logs)
+  }
+)
+
+data(mtcars)
+y <- mtcars$mpg
+x <- as.matrix(mtcars[, -1])
+dm <- xgb.DMatrix(x, label = y, nthread = 1)
+model <- xgb.train(
+  data = dm,
+  params = list(objective = "reg:squarederror", nthread = 1),
+  nrounds = 5,
+  callbacks = list(ssq_callback),
+  keep_extra_attributes = TRUE
+)
+
+# Result from 'f_after_iter' will be available as an attribute
+attributes(model)$ssq
+}
+\seealso{
+Built-in callbacks:\itemize{
+\item \link{xgb.cb.print.evaluation}
+\item \link{xgb.cb.evaluation.log}
+\item \link{xgb.cb.reset.parameters}
+\item \link{xgb.cb.early.stop}
+\item \link{xgb.cb.save.model}
+\item \link{xgb.cb.cv.predict}
+\item \link{xgb.cb.gblinear.history}
+}
+}
--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@@ -19,7 +19,8 @@ xgb.DMatrix(
  qid = NULL,
  label_lower_bound = NULL,
  label_upper_bound = NULL,
-  feature_weights = NULL
+  feature_weights = NULL,
+  data_split_mode = "row"
 )

 xgb.QuantileDMatrix(
@@ -60,10 +61,27 @@ Other column types are not supported.
 'xgb.QuantileDMatrix'.
 \item Single-row CSR matrices, as class \code{dsparseVector} from package \code{Matrix}, which is interpreted
 as a single row (only when making predictions from a fitted model).
-\item Text files in SVMLight / LibSVM formats, passed as a path to the file. These are \bold{not}
-supported for xgb.QuantileDMatrix'.
-\item Binary files generated by \link{xgb.DMatrix.save},  passed as a path to the file. These are
-\bold{not} supported for xgb.QuantileDMatrix'.
+\item Text files in a supported format, passed as a \code{character} variable containing the URI path to
+the file, with an optional format specifier.
+
+These are \bold{not} supported for \code{xgb.QuantileDMatrix}. Supported formats are:\itemize{
+\item XGBoost's own binary format for DMatrices, as produced by \link{xgb.DMatrix.save}.
+\item SVMLight (a.k.a. LibSVM) format for CSR matrices. This format can be signaled by suffix
+\code{?format=libsvm} at the end of the file path. It will be the default format if not
+otherwise specified.
+\item CSV files (comma-separated values). This format can be specified by adding suffix
+\code{?format=csv} at the end ofthe file path. It will \bold{not} be auto-deduced from file extensions.
+}
+
+Be aware that the format of the file will not be auto-deduced - for example, if a file is named 'file.csv',
+it will not look at the extension or file contents to determine that it is a comma-separated value.
+Instead, the format must be specified following the URI format, so the input to \code{data} should be passed
+like this: \code{"file.csv?format=csv"} (or \code{"file.csv?format=csv&label_column=0"} if the first column
+corresponds to the labels).
+
+For more information about passing text files as input, see the articles
+\href{https://xgboost.readthedocs.io/en/stable/tutorials/input_format.html}{Text Input Format of DMatrix} and
+\href{https://xgboost.readthedocs.io/en/stable/python/python_intro.html#python-data-interface}{Data Interface}.
 }}

 \item{label}{Label of the training data. For classification problems, should be passed encoded as
@@ -129,6 +147,14 @@ not be saved, so make sure that \code{factor} columns passed to \code{predict} h

 \item{feature_weights}{Set feature weights for column sampling.}

+\item{data_split_mode}{When passing a URI (as R \code{character}) as input, this signals
+whether to split by row or column. Allowed values are \code{"row"} and \code{"col"}.
+
+In distributed mode, the file is split accordingly; otherwise this is only an indicator on
+how the file was split beforehand. Default to row.
+
+This is not used when \code{data} is not a URI.}
+
 \item{ref}{The training dataset that provides quantile information, needed when creating
 validation/test dataset with \code{xgb.QuantileDMatrix}. Supplying the training DMatrix
 as a reference means that the same quantisation applied to the training data is
--- a/R-package/man/xgb.DMatrix.save.Rd
+++ b/R-package/man/xgb.DMatrix.save.Rd
@@ -15,6 +15,7 @@ xgb.DMatrix.save(dmatrix, fname)
 Save xgb.DMatrix object to binary file
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 fname <- file.path(tempdir(), "xgb.DMatrix.data")
--- a/R-package/man/xgb.cb.cv.predict.Rd
+++ b/R-package/man/xgb.cb.cv.predict.Rd
@@ -1,16 +1,27 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/callbacks.R
-\name{cb.cv.predict}
-\alias{cb.cv.predict}
-\title{Callback closure for returning cross-validation based predictions.}
+\name{xgb.cb.cv.predict}
+\alias{xgb.cb.cv.predict}
+\title{Callback for returning cross-validation based predictions.}
 \usage{
-cb.cv.predict(save_models = FALSE)
+xgb.cb.cv.predict(save_models = FALSE, outputmargin = FALSE)
 }
 \arguments{
-\item{save_models}{a flag for whether to save the folds' models.}
+\item{save_models}{A flag for whether to save the folds' models.}
+
+\item{outputmargin}{Whether to save margin predictions (same effect as passing this
+parameter to \link{predict.xgb.Booster}).}
 }
 \value{
-Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
+An \code{xgb.Callback} object, which can be passed to \link{xgb.cv},
+but \bold{not} to \link{xgb.train}.
+}
+\description{
+This callback function saves predictions for all of the test folds,
+and also allows to save the folds' models.
+}
+\details{
+Predictions are saved inside of the \code{pred} element, which is either a vector or a matrix,
 depending on the number of prediction outputs per data row. The order of predictions corresponds
 to the order of rows in the original dataset. Note that when a custom \code{folds} list is
 provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a
@@ -19,23 +30,3 @@ meaningful when user-provided folds have overlapping indices as in, e.g., random
 When some of the indices in the training dataset are not included into user-provided \code{folds},
 their prediction value would be \code{NA}.
 }
-\description{
-Callback closure for returning cross-validation based predictions.
-}
-\details{
-This callback function saves predictions for all of the test folds,
-and also allows to save the folds' models.
-
-It is a "finalizer" callback and it uses early stopping information whenever it is available,
-thus it must be run after the early stopping callback if the early stopping is used.
-
-Callback function expects the following values to be set in its calling frame:
-\code{bst_folds},
-\code{basket},
-\code{data},
-\code{end_iteration},
-\code{params},
-}
-\seealso{
-\code{\link{callbacks}}
-}
--- a/R-package/man/xgb.cb.early.stop.Rd
+++ b/R-package/man/xgb.cb.early.stop.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.early.stop}
+\alias{xgb.cb.early.stop}
+\title{Callback to activate early stopping}
+\usage{
+xgb.cb.early.stop(
+  stopping_rounds,
+  maximize = FALSE,
+  metric_name = NULL,
+  verbose = TRUE,
+  keep_all_iter = TRUE
+)
+}
+\arguments{
+\item{stopping_rounds}{The number of rounds with no improvement in
+the evaluation metric in order to stop the training.}
+
+\item{maximize}{Whether to maximize the evaluation metric.}
+
+\item{metric_name}{The name of an evaluation column to use as a criteria for early
+stopping. If not set, the last column would be used.
+Let's say the test data in \code{evals} was labelled as \code{dtest},
+and one wants to use the AUC in test data for early stopping regardless of where
+it is in the \code{evals}, then one of the following would need to be set:
+\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
+All dash '-' characters in metric names are considered equivalent to '_'.}
+
+\item{verbose}{Whether to print the early stopping information.}
+
+\item{keep_all_iter}{Whether to keep all of the boosting rounds that were produced
+in the resulting object. If passing \code{FALSE}, will only keep the boosting rounds
+up to the detected best iteration, discarding the ones that come after.}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+This callback function determines the condition for early stopping.
+
+The following attributes are assigned to the booster's object:
+\itemize{
+\item \code{best_score} the evaluation score at the best iteration
+\item \code{best_iteration} at which boosting iteration the best score has occurred
+(0-based index for interoperability of binary models)
+}
+
+The same values are also stored as R attributes as a result of the callback, plus an additional
+attribute \code{stopped_by_max_rounds} which indicates whether an early stopping by the \code{stopping_rounds}
+condition occurred. Note that the \code{best_iteration} that is stored under R attributes will follow
+base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
+through \link{xgb.attr} or \link{xgb.attributes}.
+
+At least one dataset is required in \code{evals} for early stopping to work.
+}
--- a/R-package/man/xgb.cb.evaluation.log.Rd
+++ b/R-package/man/xgb.cb.evaluation.log.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.evaluation.log}
+\alias{xgb.cb.evaluation.log}
+\title{Callback for logging the evaluation history}
+\usage{
+xgb.cb.evaluation.log()
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+Callback for logging the evaluation history
+}
+\details{
+This callback creates a table with per-iteration evaluation metrics (see parameters
+\code{evals} and \code{feval} in \link{xgb.train}).
+
+Note: in the column names of the final data.table, the dash '-' character is replaced with
+the underscore '_' in order to make the column names more like regular R identifiers.
+}
+\seealso{
+\link{xgb.cb.print.evaluation}
+}
--- a/R-package/man/xgb.cb.gblinear.history.Rd
+++ b/R-package/man/xgb.cb.gblinear.history.Rd
@@ -1,37 +1,48 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/callbacks.R
-\name{cb.gblinear.history}
-\alias{cb.gblinear.history}
-\title{Callback closure for collecting the model coefficients history of a gblinear booster
-during its training.}
+\name{xgb.cb.gblinear.history}
+\alias{xgb.cb.gblinear.history}
+\title{Callback for collecting coefficients history of a gblinear booster}
 \usage{
-cb.gblinear.history(sparse = FALSE)
+xgb.cb.gblinear.history(sparse = FALSE)
 }
 \arguments{
-\item{sparse}{when set to FALSE/TRUE, a dense/sparse matrix is used to store the result.
+\item{sparse}{when set to \code{FALSE}/\code{TRUE}, a dense/sparse matrix is used to store the result.
 Sparse format is useful when one expects only a subset of coefficients to be non-zero,
 when using the "thrifty" feature selector with fairly small number of top features
 selected per iteration.}
 }
 \value{
-Results are stored in the \code{coefs} element of the closure.
-The \code{\link{xgb.gblinear.history}} convenience function provides an easy
-way to access it.
-With \code{xgb.train}, it is either a dense of a sparse matrix.
-While with \code{xgb.cv}, it is a list (an element per each fold) of such
-matrices.
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
 }
 \description{
-Callback closure for collecting the model coefficients history of a gblinear booster
-during its training.
+Callback for collecting coefficients history of a gblinear booster
 }
 \details{
 To keep things fast and simple, gblinear booster does not internally store the history of linear
 model coefficients at each boosting iteration. This callback provides a workaround for storing
 the coefficients' path, by extracting them after each training iteration.

-Callback function expects the following values to be set in its calling frame:
-\code{bst} (or \code{bst_folds}).
+This callback will construct a matrix where rows are boosting iterations and columns are
+feature coefficients (same order as when calling \link{coef.xgb.Booster}, with the intercept
+corresponding to the first column).
+
+When there is more than one coefficient per feature (e.g. multi-class classification),
+the result will be reshaped into a vector where coefficients are arranged first by features and
+then by class (e.g. first 1 through N coefficients will be for the first class, then
+coefficients N+1 through 2N for the second class, and so on).
+
+If the result has only one coefficient per feature in the data, then the resulting matrix
+will have column names matching with the feature names, otherwise (when there's more than
+one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index'
+(so e.g. column 'c1' for class '0' will be named 'c1:0').
+
+With \code{xgb.train}, the output is either a dense or a sparse matrix.
+With with \code{xgb.cv}, it is a list (one element per each fold) of such
+matrices.
+
+Function \link{xgb.gblinear.history} function provides an easy way to retrieve the
+outputs from this callback.
 }
 \examples{
 #### Binary classification:
@@ -52,7 +63,7 @@ param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "a
 # rate does not break the convergence, but allows us to illustrate the typical pattern of
 # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
 bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1.,
-                 callbacks = list(cb.gblinear.history()))
+                 callbacks = list(xgb.cb.gblinear.history()))
 # Extract the coefficients' path and plot them vs boosting iteration number:
 coef_path <- xgb.gblinear.history(bst)
 matplot(coef_path, type = 'l')
@@ -61,7 +72,7 @@ matplot(coef_path, type = 'l')
 # Will try the classical componentwise boosting which selects a single best feature per round:
 bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
                 updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
-                 callbacks = list(cb.gblinear.history()))
+                 callbacks = list(xgb.cb.gblinear.history()))
 matplot(xgb.gblinear.history(bst), type = 'l')
 #  Componentwise boosting is known to have similar effect to Lasso regularization.
 # Try experimenting with various values of top_k, eta, nrounds,
@@ -69,7 +80,7 @@ matplot(xgb.gblinear.history(bst), type = 'l')

 # For xgb.cv:
 bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
-              callbacks = list(cb.gblinear.history()))
+              callbacks = list(xgb.cb.gblinear.history()))
 # coefficients in the CV fold #3
 matplot(xgb.gblinear.history(bst)[[3]], type = 'l')

@@ -82,7 +93,7 @@ param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
 # For the default linear updater 'shotgun' it sometimes is helpful
 # to use smaller eta to reduce instability
 bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
-                 callbacks = list(cb.gblinear.history()))
+                 callbacks = list(xgb.cb.gblinear.history()))
 # Will plot the coefficient paths separately for each class:
 matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l')
 matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l')
@@ -90,11 +101,11 @@ matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l')

 # CV:
 bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
-              callbacks = list(cb.gblinear.history(FALSE)))
+              callbacks = list(xgb.cb.gblinear.history(FALSE)))
 # 1st fold of 1st class
 matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l')

 }
 \seealso{
-\code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
+\link{xgb.gblinear.history}, \link{coef.xgb.Booster}.
 }
--- a/R-package/man/xgb.cb.print.evaluation.Rd
+++ b/R-package/man/xgb.cb.print.evaluation.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.print.evaluation}
+\alias{xgb.cb.print.evaluation}
+\title{Callback for printing the result of evaluation}
+\usage{
+xgb.cb.print.evaluation(period = 1, showsd = TRUE)
+}
+\arguments{
+\item{period}{results would be printed every number of periods}
+
+\item{showsd}{whether standard deviations should be printed (when available)}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+The callback function prints the result of evaluation at every \code{period} iterations.
+The initial and the last iteration's evaluations are always printed.
+
+Does not leave any attribute in the booster (see \link{xgb.cb.evaluation.log} for that).
+}
+\seealso{
+\link{xgb.Callback}
+}
--- a/R-package/man/xgb.cb.reset.parameters.Rd
+++ b/R-package/man/xgb.cb.reset.parameters.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/callbacks.R
-\name{cb.reset.parameters}
-\alias{cb.reset.parameters}
-\title{Callback closure for resetting the booster's parameters at each iteration.}
+\name{xgb.cb.reset.parameters}
+\alias{xgb.cb.reset.parameters}
+\title{Callback for resetting the booster's parameters at each iteration.}
 \usage{
-cb.reset.parameters(new_params)
+xgb.cb.reset.parameters(new_params)
 }
 \arguments{
 \item{new_params}{a list where each element corresponds to a parameter that needs to be reset.
@@ -14,23 +14,16 @@ or a function of two parameters \code{learning_rates(iteration, nrounds)}
 which returns a new parameter value by using the current iteration number
 and the total number of boosting rounds.}
 }
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
 \description{
-Callback closure for resetting the booster's parameters at each iteration.
+Callback for resetting the booster's parameters at each iteration.
 }
 \details{
-This is a "pre-iteration" callback function used to reset booster's parameters
-at the beginning of each iteration.
-
 Note that when training is resumed from some previous model, and a function is used to
 reset a parameter value, the \code{nrounds} argument in this function would be the
 the number of boosting rounds in the current training.

-Callback function expects the following values to be set in its calling frame:
-\code{bst} or \code{bst_folds},
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration}.
-}
-\seealso{
-\code{\link{callbacks}}
+Does not leave any attribute in the booster.
 }
--- a/R-package/man/xgb.cb.save.model.Rd
+++ b/R-package/man/xgb.cb.save.model.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.save.model}
+\alias{xgb.cb.save.model}
+\title{Callback for saving a model file.}
+\usage{
+xgb.cb.save.model(save_period = 0, save_name = "xgboost.ubj")
+}
+\arguments{
+\item{save_period}{Save the model to disk after every
+\code{save_period} iterations; 0 means save the model at the end.}
+
+\item{save_name}{The name or path for the saved model file.
+It can contain a \code{\link[base]{sprintf}} formatting specifier
+to include the integer iteration number in the file name.
+E.g., with \code{save_name} = 'xgboost_\%04d.model',
+the file saved at iteration 50 would be named "xgboost_0050.model".}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train},
+but \bold{not} to \link{xgb.cv}.
+}
+\description{
+This callback function allows to save an xgb-model file, either periodically
+after each \code{save_period}'s or at the end.
+
+Does not leave any attribute in the booster.
+}
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -82,7 +82,6 @@ new.dtrain <- xgb.DMatrix(
 new.dtest <- xgb.DMatrix(
  data = new.features.test, label = agaricus.test$label, nthread = 2
 )
-watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)

 # Model accuracy with new features
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -9,14 +9,12 @@ xgb.cv(
  data,
  nrounds,
  nfold,
-  label = NULL,
-  missing = NA,
  prediction = FALSE,
  showsd = TRUE,
  metrics = list(),
  obj = NULL,
  feval = NULL,
-  stratified = TRUE,
+  stratified = "auto",
  folds = NULL,
  train_folds = NULL,
  verbose = TRUE,
@@ -44,22 +42,25 @@ is a shorter summary:
 }

 See \code{\link{xgb.train}} for further details.
-See also demo/ for walkthrough example in R.}
+See also demo/ for walkthrough example in R.

-\item{data}{takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.}
+Note that, while \code{params} accepts a \code{seed} entry and will use such parameter for model training if
+supplied, this seed is not used for creation of train-test splits, which instead rely on R's own RNG
+system - thus, for reproducible results, one needs to call the \code{set.seed} function beforehand.}
+
+\item{data}{An \code{xgb.DMatrix} object, with corresponding fields like \code{label} or bounds as required
+for model training by the objective.
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that only the basic `xgb.DMatrix` class is supported - variants such as `xgb.QuantileDMatrix`
+   or `xgb.ExternalDMatrix` are not supported here.
+}\if{html}{\out{</div>}}}

 \item{nrounds}{the max number of iterations}

 \item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}

-\item{label}{vector of response values. Should be provided only when data is an R-matrix.}
-
-\item{missing}{is only used when input is a dense matrix. By default is set to NA, which means
-that NA values should be considered as 'missing' by the algorithm.
-Sometimes, 0 or other extreme value might be used to represent missing values.}
-
 \item{prediction}{A logical value indicating whether to return the test fold predictions
-from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.}
+from each CV model. This parameter engages the \code{\link{xgb.cb.cv.predict}} callback.}

 \item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}

@@ -84,34 +85,54 @@ gradient with given prediction and dtrain.}
 \code{list(metric='metric-name', value='metric-value')} with given
 prediction and dtrain.}

-\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified
-by the values of outcome labels.}
+\item{stratified}{A \code{boolean} indicating whether sampling of folds should be stratified
+by the values of outcome labels. For real-valued labels in regression objectives,
+stratification will be done by discretizing the labels into up to 5 buckets beforehand.
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   If passing "auto", will be set to `TRUE` if the objective in `params` is a classification
+   objective (from XGBoost's built-in objectives, doesn't apply to custom ones), and to
+   `FALSE` otherwise.
+
+   This parameter is ignored when `data` has a `group` field - in such case, the splitting
+   will be based on whole groups (note that this might make the folds have different sizes).
+
+   Value `TRUE` here is \\bold\{not\} supported for custom objectives.
+}\if{html}{\out{</div>}}}

 \item{folds}{\code{list} provides a possibility to use a list of pre-defined CV folds
 (each element must be a vector of test fold's indices). When folds are supplied,
-the \code{nfold} and \code{stratified} parameters are ignored.}
+the \code{nfold} and \code{stratified} parameters are ignored.
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   If `data` has a `group` field and the objective requires this field, each fold (list element)
+   must additionally have two attributes (retrievable through \link{attributes}) named `group_test`
+   and `group_train`, which should hold the `group` to assign through \link{setinfo.xgb.DMatrix} to
+   the resulting DMatrices.
+}\if{html}{\out{</div>}}}

 \item{train_folds}{\code{list} list specifying which indicies to use for training. If \code{NULL}
-(the default) all indices not specified in \code{folds} will be used for training.}
+(the default) all indices not specified in \code{folds} will be used for training.
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   This is not supported when `data` has `group` field.
+}\if{html}{\out{</div>}}}

 \item{verbose}{\code{boolean}, print the statistics during the process}

 \item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
 Default is 1 which means all messages are printed. This parameter is passed to the
-\code{\link{cb.print.evaluation}} callback.}
+\code{\link{xgb.cb.print.evaluation}} callback.}

 \item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered.
 If set to an integer \code{k}, training with a validation set will stop if the performance
 doesn't improve for \code{k} rounds.
-Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
+Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.}

 \item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set,
 then this parameter must be set as well.
 When it is \code{TRUE}, it means the larger the evaluation score the better.
-This parameter is passed to the \code{\link{cb.early.stop}} callback.}
+This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.}

 \item{callbacks}{a list of callback functions to perform various task during boosting.
-See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
 parameters' values. User can provide either existing or their own callback methods in order
 to customize the training process.}

@@ -122,27 +143,27 @@ An object of class \code{xgb.cv.synchronous} with the following elements:
 \itemize{
 \item \code{call} a function call.
 \item \code{params} parameters that were passed to the xgboost library. Note that it does not
-capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
-\item \code{callbacks} callback functions that were either automatically assigned or
-explicitly passed.
+capture parameters changed by the \code{\link{xgb.cb.reset.parameters}} callback.
 \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
 first column corresponding to iteration number and the rest corresponding to the
 CV-based evaluation means and standard deviations for the training and test CV-sets.
-It is created by the \code{\link{cb.evaluation.log}} callback.
+It is created by the \code{\link{xgb.cb.evaluation.log}} callback.
 \item \code{niter} number of boosting iterations.
 \item \code{nfeatures} number of features in training data.
 \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
 parameter or randomly generated.
 \item \code{best_iteration} iteration number with the best evaluation metric value
 (only available with early stopping).
-\item \code{pred} CV prediction values available when \code{prediction} is set.
-It is either vector or matrix (see \code{\link{cb.cv.predict}}).
-\item \code{models} a list of the CV folds' models. It is only available with the explicit
-setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
 }
+
+Plus other potential elements that are the result of callbacks, such as a list \code{cv_predict} with
+a sub-element \code{pred} when passing \code{prediction = TRUE}, which is added by the \link{xgb.cb.cv.predict}
+callback (note that one can also pass it manually under \code{callbacks} with different settings,
+such as saving also the models created during cross validation); or a list \code{early_stop} which
+will contain elements such as \code{best_iteration} when using the early stopping callback (\link{xgb.cb.early.stop}).
 }
 \description{
-The cross validation function of xgboost
+The cross validation function of xgboost.
 }
 \details{
 The original sample is randomly partitioned into \code{nfold} equal size subsamples.
--- a/R-package/man/xgb.dump.Rd
+++ b/R-package/man/xgb.dump.Rd
@@ -44,6 +44,7 @@ as a \code{character} vector. Otherwise it will return \code{TRUE}.
 Dump an xgboost model in text format.
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
--- a/R-package/man/xgb.gblinear.history.Rd
+++ b/R-package/man/xgb.gblinear.history.Rd
@@ -8,7 +8,7 @@ xgb.gblinear.history(model, class_index = NULL)
 }
 \arguments{
 \item{model}{either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
-using the \code{cb.gblinear.history()} callback, but \bold{not} a booster
+using the \link{xgb.cb.gblinear.history} callback, but \bold{not} a booster
 loaded from \link{xgb.load} or \link{xgb.load.raw}.}

 \item{class_index}{zero-based class index to extract the coefficients for only that
@@ -16,23 +16,31 @@ specific class in a multinomial multiclass model. When it is NULL, all the
 coefficients are returned. Has no effect in non-multiclass models.}
 }
 \value{
-For an \code{xgb.train} result, a matrix (either dense or sparse) with the columns
-corresponding to iteration's coefficients (in the order as \code{xgb.dump()} would
-return) and the rows corresponding to boosting iterations.
+For an \link{xgb.train} result, a matrix (either dense or sparse) with the columns
+corresponding to iteration's coefficients and the rows corresponding to boosting iterations.

-For an \code{xgb.cv} result, a list of such matrices is returned with the elements
+For an \link{xgb.cv} result, a list of such matrices is returned with the elements
 corresponding to CV folds.
+
+When there is more than one coefficient per feature (e.g. multi-class classification)
+and \code{class_index} is not provided,
+the result will be reshaped into a vector where coefficients are arranged first by features and
+then by class (e.g. first 1 through N coefficients will be for the first class, then
+coefficients N+1 through 2N for the second class, and so on).
 }
 \description{
 A helper function to extract the matrix of linear coefficients' history
-from a gblinear model created while using the \code{cb.gblinear.history()}
-callback.
+from a gblinear model created while using the \link{xgb.cb.gblinear.history}
+callback (which must be added manually as by default it's not used).
 }
 \details{
 Note that this is an R-specific function that relies on R attributes that
 are not saved when using xgboost's own serialization functions like \link{xgb.load}
 or \link{xgb.load.raw}.

-In order for a serialized model to be accepted by tgis function, one must use R
+In order for a serialized model to be accepted by this function, one must use R
 serializers such as \link{saveRDS}.
 }
+\seealso{
+\link{xgb.cb.gblinear.history}, \link{coef.xgb.Booster}.
+}
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@@ -17,7 +17,7 @@ Load xgboost model from the binary model file.
 }
 \details{
 The input file is expected to contain a model saved in an xgboost model format
-using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
+using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some
 appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
 saved from there in xgboost format, could be loaded from R.

@@ -25,6 +25,7 @@ Note: a model saved as an R-object, has to be loaded using corresponding R-metho
 not \code{xgb.load}.
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

--- a/R-package/man/xgb.save.Rd
+++ b/R-package/man/xgb.save.Rd
@@ -41,6 +41,7 @@ how to persist models in a future-proof way, i.e. to make the model accessible i
 releases of XGBoost.
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

--- a/R-package/man/xgb.save.raw.Rd
+++ b/R-package/man/xgb.save.raw.Rd
@@ -21,6 +21,7 @@ xgb.save.raw(model, raw_format = "ubj")
 Save xgboost model from xgboost or xgb.train
 }
 \examples{
+\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

--- a/R-package/man/xgb.slice.DMatrix.Rd
+++ b/R-package/man/xgb.slice.DMatrix.Rd
@@ -6,14 +6,18 @@
 \title{Get a new DMatrix containing the specified rows of
 original xgb.DMatrix object}
 \usage{
-xgb.slice.DMatrix(object, idxset)
+xgb.slice.DMatrix(object, idxset, allow_groups = FALSE)

 \method{[}{xgb.DMatrix}(object, idxset, colset = NULL)
 }
 \arguments{
-\item{object}{Object of class "xgb.DMatrix"}
+\item{object}{Object of class "xgb.DMatrix".}

-\item{idxset}{a integer vector of indices of rows needed}
+\item{idxset}{An integer vector of indices of rows needed (base-1 indexing).}
+
+\item{allow_groups}{Whether to allow slicing an \code{xgb.DMatrix} with \code{group} (or
+equivalently \code{qid}) field. Note that in such case, the result will not have
+the groups anymore - they need to be set manually through \code{setinfo}.}

 \item{colset}{currently not used (columns subsetting is not available)}
 }
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -9,7 +9,7 @@ xgb.train(
  params = list(),
  data,
  nrounds,
-  watchlist = list(),
+  evals = list(),
  obj = NULL,
  feval = NULL,
  verbose = 1,
@@ -158,13 +158,13 @@ List is provided in detail section.}

 \item{nrounds}{max number of boosting iterations.}

-\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
+\item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance.
 Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
 of these datasets during each boosting iteration, and stored in the end as a field named
 \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
-\code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
+\code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
 printed out during the training.
-E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
 the performance of each round's model on mat1 and mat2.}

 \item{obj}{customized objective function. Returns gradient and second order
@@ -177,24 +177,24 @@ prediction and dtrain.}
 \item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.
 If 2, some additional information will be printed out.
 Note that setting \code{verbose > 0} automatically engages the
-\code{cb.print.evaluation(period=1)} callback function.}
+\code{xgb.cb.print.evaluation(period=1)} callback function.}

 \item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
 Default is 1 which means all messages are printed. This parameter is passed to the
-\code{\link{cb.print.evaluation}} callback.}
+\code{\link{xgb.cb.print.evaluation}} callback.}

 \item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered.
 If set to an integer \code{k}, training with a validation set will stop if the performance
 doesn't improve for \code{k} rounds.
-Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
+Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.}

 \item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set,
 then this parameter must be set as well.
 When it is \code{TRUE}, it means the larger the evaluation score the better.
-This parameter is passed to the \code{\link{cb.early.stop}} callback.}
+This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.}

 \item{save_period}{when it is non-NULL, model is saved to disk after every \code{save_period} rounds,
-0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.}
+0 means save at the end. The saving is handled by the \code{\link{xgb.cb.save.model}} callback.}

 \item{save_name}{the name or path for periodically saved model file.}

@@ -203,12 +203,13 @@ Could be either an object of class \code{xgb.Booster}, or its raw data, or the n
 file with a previously saved model.}

 \item{callbacks}{a list of callback functions to perform various task during boosting.
-See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
 parameters' values. User can provide either existing or their own callback methods in order
 to customize the training process.

-\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs
-   are kept as R attributes, and thus do not get saved when using non-R serializaters like
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   Note that some callbacks might try to leave attributes in the resulting model object,
+   such as an evaluation log (a `data.table` object) - be aware that these objects are kept
+   as R attributes, and thus do not get saved when using XGBoost's own serializaters like
   \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
 }\if{html}{\out{</div>}}}

@@ -233,7 +234,7 @@ The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
 \details{
 These are the training functions for \code{xgboost}.

-The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+The \code{xgb.train} interface supports advanced features such as \code{evals},
 customized objective and evaluation metric functions, therefore it is more flexible
 than the \code{xgboost} interface.

@@ -241,6 +242,11 @@ Parallelization is automatically enabled if \code{OpenMP} is present.
 Number of threads can also be manually specified via the \code{nthread}
 parameter.

+While in other interfaces, the default random seed defaults to zero, in R, if a parameter \code{seed}
+is not manually supplied, it will generate a random seed through R's own random number generator,
+whose seed in turn is controllable through \code{set.seed}. If \code{seed} is passed, it will override the
+RNG from R.
+
 The evaluation metric is chosen automatically by XGBoost (according to the objective)
 when the \code{eval_metric} parameter is not provided.
 User may set one or several \code{eval_metric} parameters.
@@ -264,18 +270,19 @@ Different threshold (e.g., 0.) could be specified as "error@0."

 The following callbacks are automatically created when certain parameters are set:
 \itemize{
-\item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
+\item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
 and the \code{print_every_n} parameter is passed to it.
-\item \code{cb.evaluation.log} is on when \code{watchlist} is present.
-\item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
-\item \code{cb.save.model}: when \code{save_period > 0} is set.
+\item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
+\item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
+\item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
 }

 Note that objects of type \code{xgb.Booster} as returned by this function behave a bit differently
 from typical R objects (it's an 'altrep' list class), and it makes a separation between
 internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr}
 and shared between interfaces through serialization functions like \link{xgb.save}; and
-R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise
+R-specific attributes (typically the result from a callback), accessed through \link{attributes}
+and \link{attr}, which are otherwise
 only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and
 not anyhow used by functions like \link{predict.xgb.Booster}.

@@ -300,12 +307,12 @@ dtrain <- with(
 dtest <- with(
  agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
 )
-watchlist <- list(train = dtrain, eval = dtest)
+evals <- list(train = dtrain, eval = dtest)

 ## A simple xgb.train example:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)

 ## An xgb.train example where custom objective and evaluation metric are
 ## used:
@@ -326,15 +333,15 @@ evalerror <- function(preds, dtrain) {
 #  as 'objective' and 'eval_metric' parameters in the params list:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = logregobj, eval_metric = evalerror)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)

 #  or through the ... arguments:
 param <- list(max_depth = 2, eta = 1, nthread = nthread)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                 objective = logregobj, eval_metric = evalerror)

 #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
                 obj = logregobj, feval = evalerror)


@@ -342,11 +349,11 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                 callbacks = list(cb.reset.parameters(my_etas)))
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                 callbacks = list(xgb.cb.reset.parameters(my_etas)))

 ## Early stopping:
-bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
                 early_stopping_rounds = 3)

 ## An 'xgboost' interface example:
@@ -361,7 +368,7 @@ Tianqi Chen and Carlos Guestrin, "XGBoost: A Scalable Tree Boosting System",
 22nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016, \url{https://arxiv.org/abs/1603.02754}
 }
 \seealso{
-\code{\link{callbacks}},
+\code{\link{xgb.Callback}},
 \code{\link{predict.xgb.Booster}},
 \code{\link{xgb.cv}}
 }
--- a/R-package/man/xgbConfig.Rd
+++ b/R-package/man/xgbConfig.Rd
@@ -25,6 +25,15 @@ values of one or more global-scope parameters. Use \code{xgb.get.config} to fetc
 values of all global-scope parameters (listed in
 \url{https://xgboost.readthedocs.io/en/stable/parameter.html}).
 }
+\details{
+Note that serialization-related functions might use a globally-configured number of threads,
+which is managed by the system's OpenMP (OMP) configuration instead. Typically, XGBoost methods
+accept an \code{nthreads} parameter, but some methods like \code{readRDS} might get executed before such
+parameter can be supplied.
+
+The number of OMP threads can in turn be configured for example through an environment variable
+\code{OMP_NUM_THREADS} (needs to be set before R is started), or through \code{RhpcBLASctl::omp_set_num_threads}.
+}
 \examples{
 # Set verbosity level to silent (0)
 xgb.set.config(verbosity = 0)
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -99,11 +99,14 @@ OBJECTS= \
    $(PKGROOT)/src/context.o \
    $(PKGROOT)/src/logging.o \
    $(PKGROOT)/src/global_config.o \
+    $(PKGROOT)/src/collective/result.o \
    $(PKGROOT)/src/collective/allgather.o \
    $(PKGROOT)/src/collective/allreduce.o \
    $(PKGROOT)/src/collective/broadcast.o \
    $(PKGROOT)/src/collective/comm.o \
+    $(PKGROOT)/src/collective/comm_group.o \
    $(PKGROOT)/src/collective/coll.o \
+    $(PKGROOT)/src/collective/communicator-inl.o \
    $(PKGROOT)/src/collective/tracker.o \
    $(PKGROOT)/src/collective/communicator.o \
    $(PKGROOT)/src/collective/in_memory_communicator.o \
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -99,11 +99,14 @@ OBJECTS= \
    $(PKGROOT)/src/context.o \
    $(PKGROOT)/src/logging.o \
    $(PKGROOT)/src/global_config.o \
+    $(PKGROOT)/src/collective/result.o \
    $(PKGROOT)/src/collective/allgather.o \
    $(PKGROOT)/src/collective/allreduce.o \
    $(PKGROOT)/src/collective/broadcast.o \
    $(PKGROOT)/src/collective/comm.o \
+    $(PKGROOT)/src/collective/comm_group.o \
    $(PKGROOT)/src/collective/coll.o \
+    $(PKGROOT)/src/collective/communicator-inl.o \
    $(PKGROOT)/src/collective/tracker.o \
    $(PKGROOT)/src/collective/communicator.o \
    $(PKGROOT)/src/collective/in_memory_communicator.o \
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@@ -37,6 +37,9 @@ extern SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value);
 extern SEXP XGBoosterSerializeToBuffer_R(SEXP handle);
 extern SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw);
 extern SEXP XGBoosterPredictFromDMatrix_R(SEXP, SEXP, SEXP);
+extern SEXP XGBoosterPredictFromDense_R(SEXP, SEXP, SEXP, SEXP, SEXP);
+extern SEXP XGBoosterPredictFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP);
+extern SEXP XGBoosterPredictFromColumnar_R(SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
 extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
@@ -46,7 +49,7 @@ extern SEXP XGSetArrayDimInplace_R(SEXP, SEXP);
 extern SEXP XGSetArrayDimNamesInplace_R(SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
-extern SEXP XGDMatrixCreateFromFile_R(SEXP, SEXP);
+extern SEXP XGDMatrixCreateFromURI_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromMat_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixGetFloatInfo_R(SEXP, SEXP);
 extern SEXP XGDMatrixGetUIntInfo_R(SEXP, SEXP);
@@ -68,11 +71,12 @@ extern SEXP XGDMatrixGetDataAsCSR_R(SEXP);
 extern SEXP XGDMatrixSaveBinary_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixSetInfo_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixSetStrFeatureInfo_R(SEXP, SEXP, SEXP);
-extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP);
+extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP, SEXP);
 extern SEXP XGBSetGlobalConfig_R(SEXP);
 extern SEXP XGBGetGlobalConfig_R(void);
 extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
 extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP);
+extern SEXP XGBoosterSliceAndReplace_R(SEXP, SEXP, SEXP, SEXP);

 static const R_CallMethodDef CallEntries[] = {
  {"XGDuplicate_R",               (DL_FUNC) &XGDuplicate_R,               1},
@@ -96,6 +100,9 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBoosterSerializeToBuffer_R",     (DL_FUNC) &XGBoosterSerializeToBuffer_R,     1},
  {"XGBoosterUnserializeFromBuffer_R", (DL_FUNC) &XGBoosterUnserializeFromBuffer_R, 2},
  {"XGBoosterPredictFromDMatrix_R", (DL_FUNC) &XGBoosterPredictFromDMatrix_R, 3},
+  {"XGBoosterPredictFromDense_R", (DL_FUNC) &XGBoosterPredictFromDense_R, 5},
+  {"XGBoosterPredictFromCSR_R",   (DL_FUNC) &XGBoosterPredictFromCSR_R,   5},
+  {"XGBoosterPredictFromColumnar_R", (DL_FUNC) &XGBoosterPredictFromColumnar_R, 5},
  {"XGBoosterSaveModel_R",        (DL_FUNC) &XGBoosterSaveModel_R,        2},
  {"XGBoosterSetAttr_R",          (DL_FUNC) &XGBoosterSetAttr_R,          3},
  {"XGBoosterSetParam_R",         (DL_FUNC) &XGBoosterSetParam_R,         3},
@@ -105,7 +112,7 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGSetArrayDimNamesInplace_R", (DL_FUNC) &XGSetArrayDimNamesInplace_R, 2},
  {"XGDMatrixCreateFromCSC_R",    (DL_FUNC) &XGDMatrixCreateFromCSC_R,    6},
  {"XGDMatrixCreateFromCSR_R",    (DL_FUNC) &XGDMatrixCreateFromCSR_R,    6},
-  {"XGDMatrixCreateFromFile_R",   (DL_FUNC) &XGDMatrixCreateFromFile_R,   2},
+  {"XGDMatrixCreateFromURI_R",    (DL_FUNC) &XGDMatrixCreateFromURI_R,    3},
  {"XGDMatrixCreateFromMat_R",    (DL_FUNC) &XGDMatrixCreateFromMat_R,    3},
  {"XGDMatrixGetFloatInfo_R",     (DL_FUNC) &XGDMatrixGetFloatInfo_R,     2},
  {"XGDMatrixGetUIntInfo_R",      (DL_FUNC) &XGDMatrixGetUIntInfo_R,      2},
@@ -127,11 +134,12 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGDMatrixSaveBinary_R",       (DL_FUNC) &XGDMatrixSaveBinary_R,       3},
  {"XGDMatrixSetInfo_R",          (DL_FUNC) &XGDMatrixSetInfo_R,          3},
  {"XGDMatrixSetStrFeatureInfo_R", (DL_FUNC) &XGDMatrixSetStrFeatureInfo_R, 3},
-  {"XGDMatrixSliceDMatrix_R",     (DL_FUNC) &XGDMatrixSliceDMatrix_R,     2},
+  {"XGDMatrixSliceDMatrix_R",     (DL_FUNC) &XGDMatrixSliceDMatrix_R,     3},
  {"XGBSetGlobalConfig_R",        (DL_FUNC) &XGBSetGlobalConfig_R,        1},
  {"XGBGetGlobalConfig_R",        (DL_FUNC) &XGBGetGlobalConfig_R,        0},
  {"XGBoosterFeatureScore_R",     (DL_FUNC) &XGBoosterFeatureScore_R,     2},
  {"XGBoosterSlice_R",            (DL_FUNC) &XGBoosterSlice_R,            4},
+  {"XGBoosterSliceAndReplace_R",  (DL_FUNC) &XGBoosterSliceAndReplace_R,  4},
  {NULL, NULL, 0}
 };

--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@@ -13,6 +13,7 @@
 #include <cstdint>
 #include <cstdio>
 #include <cstring>
+#include <memory>
 #include <limits>
 #include <sstream>
 #include <string>
@@ -207,25 +208,24 @@ SEXP SafeAllocInteger(size_t size, SEXP continuation_token) {
  return xgboost::Json::Dump(jinterface);
 }

-[[nodiscard]] std::string MakeJsonConfigForArray(SEXP missing, SEXP n_threads, SEXPTYPE arr_type) {
-  using namespace ::xgboost;  // NOLINT
-  Json jconfig{Object{}};
-
-  const SEXPTYPE missing_type = TYPEOF(missing);
-  if (Rf_isNull(missing) || (missing_type == REALSXP && ISNAN(Rf_asReal(missing))) ||
-      (missing_type == LGLSXP && Rf_asLogical(missing) == R_NaInt) ||
-      (missing_type == INTSXP && Rf_asInteger(missing) == R_NaInt)) {
+void AddMissingToJson(xgboost::Json *jconfig, SEXP missing, SEXPTYPE arr_type) {
+  if (Rf_isNull(missing) || ISNAN(Rf_asReal(missing))) {
    // missing is not specified
    if (arr_type == REALSXP) {
-      jconfig["missing"] = std::numeric_limits<double>::quiet_NaN();
+      (*jconfig)["missing"] = std::numeric_limits<double>::quiet_NaN();
    } else {
-      jconfig["missing"] = R_NaInt;
+      (*jconfig)["missing"] = R_NaInt;
    }
  } else {
    // missing specified
-    jconfig["missing"] = Rf_asReal(missing);
+    (*jconfig)["missing"] = Rf_asReal(missing);
  }
+}

+[[nodiscard]] std::string MakeJsonConfigForArray(SEXP missing, SEXP n_threads, SEXPTYPE arr_type) {
+  using namespace ::xgboost;  // NOLINT
+  Json jconfig{Object{}};
+  AddMissingToJson(&jconfig, missing, arr_type);
  jconfig["nthread"] = Rf_asInteger(n_threads);
  return Json::Dump(jconfig);
 }
@@ -365,15 +365,22 @@ XGB_DLL SEXP XGBGetGlobalConfig_R() {
  return mkString(json_str);
 }

-XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+XGB_DLL SEXP XGDMatrixCreateFromURI_R(SEXP uri, SEXP silent, SEXP data_split_mode) {
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP uri_char = Rf_protect(Rf_asChar(uri));
+  const char *uri_ptr = CHAR(uri_char);
  R_API_BEGIN();
+  xgboost::Json jconfig{xgboost::Object{}};
+  jconfig["uri"] = std::string(uri_ptr);
+  jconfig["silent"] = Rf_asLogical(silent);
+  jconfig["data_split_mode"] = Rf_asInteger(data_split_mode);
+  const std::string sconfig = xgboost::Json::Dump(jconfig);
  DMatrixHandle handle;
-  CHECK_CALL(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
+  CHECK_CALL(XGDMatrixCreateFromURI(sconfig.c_str(), &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
  R_API_END();
-  UNPROTECT(1);
+  Rf_unprotect(2);
  return ret;
 }

@@ -404,7 +411,7 @@ XGB_DLL SEXP XGDMatrixCreateFromDF_R(SEXP df, SEXP missing, SEXP n_threads) {
  DMatrixHandle handle;
  std::int32_t rc{0};
  {
-    std::string sinterface = MakeArrayInterfaceFromRDataFrame(df);
+    const std::string sinterface = MakeArrayInterfaceFromRDataFrame(df);
    xgboost::Json jconfig{xgboost::Object{}};
    jconfig["missing"] = asReal(missing);
    jconfig["nthread"] = asInteger(n_threads);
@@ -456,7 +463,7 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP
    Json jconfig{Object{}};
    // Construct configuration
    jconfig["nthread"] = Integer{threads};
-    jconfig["missing"] = xgboost::Number{asReal(missing)};
+    AddMissingToJson(&jconfig, missing, TYPEOF(data));
    std::string config;
    Json::Dump(jconfig, &config);
    res_code = XGDMatrixCreateFromCSC(sindptr.c_str(), sindices.c_str(), sdata.c_str(), nrow,
@@ -491,7 +498,7 @@ XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP
    Json jconfig{Object{}};
    // Construct configuration
    jconfig["nthread"] = Integer{threads};
-    jconfig["missing"] = xgboost::Number{asReal(missing)};
+    AddMissingToJson(&jconfig, missing, TYPEOF(data));
    std::string config;
    Json::Dump(jconfig, &config);
    res_code = XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), ncol,
@@ -505,7 +512,7 @@ XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP
  return ret;
 }

-XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
+XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset, SEXP allow_groups) {
  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  R_API_BEGIN();
  R_xlen_t len = Rf_xlength(idxset);
@@ -524,7 +531,7 @@ XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
    res_code = XGDMatrixSliceDMatrixEx(R_ExternalPtrAddr(handle),
                                       BeginPtr(idxvec), len,
                                       &res,
-                                       0);
+                                       Rf_asLogical(allow_groups));
  }
  CHECK_CALL(res_code);
  R_SetExternalPtrAddr(ret, res);
@@ -1240,7 +1247,60 @@ XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evn
  return mkString(ret);
 }

-XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config)  {
+namespace {
+
+struct ProxyDmatrixError : public std::exception {};
+
+struct ProxyDmatrixWrapper {
+  DMatrixHandle proxy_dmat_handle;
+
+  ProxyDmatrixWrapper() {
+    int res_code = XGProxyDMatrixCreate(&this->proxy_dmat_handle);
+    if (res_code != 0) {
+      throw ProxyDmatrixError();
+    }
+  }
+
+  ~ProxyDmatrixWrapper() {
+    if (this->proxy_dmat_handle) {
+      XGDMatrixFree(this->proxy_dmat_handle);
+      this->proxy_dmat_handle = nullptr;
+    }
+  }
+
+  DMatrixHandle get_handle() {
+    return this->proxy_dmat_handle;
+  }
+};
+
+std::unique_ptr<ProxyDmatrixWrapper> GetProxyDMatrixWithBaseMargin(SEXP base_margin) {
+  if (Rf_isNull(base_margin)) {
+    return std::unique_ptr<ProxyDmatrixWrapper>(nullptr);
+  }
+
+  SEXP base_margin_dim = Rf_getAttrib(base_margin, R_DimSymbol);
+  int res_code;
+  try {
+    const std::string array_str = Rf_isNull(base_margin_dim)?
+      MakeArrayInterfaceFromRVector(base_margin) : MakeArrayInterfaceFromRMat(base_margin);
+    std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat(new ProxyDmatrixWrapper());
+    res_code = XGDMatrixSetInfoFromInterface(proxy_dmat->get_handle(),
+                                             "base_margin",
+                                             array_str.c_str());
+    if (res_code != 0) {
+      throw ProxyDmatrixError();
+    }
+    return proxy_dmat;
+  } catch(ProxyDmatrixError &err) {
+    Rf_error("%s", XGBGetLastError());
+  }
+}
+
+enum class PredictionInputType {DMatrix, DenseMatrix, CSRMatrix, DataFrame};
+
+SEXP XGBoosterPredictGeneric(SEXP handle, SEXP input_data, SEXP json_config,
+                                    PredictionInputType input_type, SEXP missing,
+                                    SEXP base_margin) {
  SEXP r_out_shape;
  SEXP r_out_result;
  SEXP r_out = PROTECT(allocVector(VECSXP, 2));
@@ -1252,9 +1312,79 @@ XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_con
  bst_ulong out_dim;
  bst_ulong const *out_shape;
  float const *out_result;
-  CHECK_CALL(XGBoosterPredictFromDMatrix(R_ExternalPtrAddr(handle),
-                                         R_ExternalPtrAddr(dmat), c_json_config,
-                                         &out_shape, &out_dim, &out_result));
+
+  int res_code;
+  {
+    switch (input_type) {
+      case PredictionInputType::DMatrix: {
+        res_code = XGBoosterPredictFromDMatrix(R_ExternalPtrAddr(handle),
+                                               R_ExternalPtrAddr(input_data), c_json_config,
+                                               &out_shape, &out_dim, &out_result);
+        break;
+      }
+
+      case PredictionInputType::CSRMatrix: {
+        std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat = GetProxyDMatrixWithBaseMargin(
+          base_margin);
+        DMatrixHandle proxy_dmat_handle = proxy_dmat.get()? proxy_dmat->get_handle() : nullptr;
+
+        SEXP indptr = VECTOR_ELT(input_data, 0);
+        SEXP indices = VECTOR_ELT(input_data, 1);
+        SEXP data = VECTOR_ELT(input_data, 2);
+        const int ncol_csr = Rf_asInteger(VECTOR_ELT(input_data, 3));
+        const SEXPTYPE type_data = TYPEOF(data);
+        CHECK_EQ(type_data, REALSXP);
+        std::string sindptr, sindices, sdata;
+        CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
+
+        xgboost::StringView json_str(c_json_config);
+        xgboost::Json new_json = xgboost::Json::Load(json_str);
+        AddMissingToJson(&new_json, missing, type_data);
+        const std::string new_c_json = xgboost::Json::Dump(new_json);
+
+        res_code = XGBoosterPredictFromCSR(
+          R_ExternalPtrAddr(handle), sindptr.c_str(), sindices.c_str(), sdata.c_str(),
+          ncol_csr, new_c_json.c_str(), proxy_dmat_handle, &out_shape, &out_dim, &out_result);
+        break;
+      }
+
+      case PredictionInputType::DenseMatrix: {
+        std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat = GetProxyDMatrixWithBaseMargin(
+          base_margin);
+        DMatrixHandle proxy_dmat_handle = proxy_dmat.get()? proxy_dmat->get_handle() : nullptr;
+        const std::string array_str = MakeArrayInterfaceFromRMat(input_data);
+
+        xgboost::StringView json_str(c_json_config);
+        xgboost::Json new_json = xgboost::Json::Load(json_str);
+        AddMissingToJson(&new_json, missing, TYPEOF(input_data));
+        const std::string new_c_json = xgboost::Json::Dump(new_json);
+
+        res_code = XGBoosterPredictFromDense(
+          R_ExternalPtrAddr(handle), array_str.c_str(), new_c_json.c_str(),
+          proxy_dmat_handle, &out_shape, &out_dim, &out_result);
+        break;
+      }
+
+      case PredictionInputType::DataFrame: {
+        std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat = GetProxyDMatrixWithBaseMargin(
+          base_margin);
+        DMatrixHandle proxy_dmat_handle = proxy_dmat.get()? proxy_dmat->get_handle() : nullptr;
+
+        const std::string df_str = MakeArrayInterfaceFromRDataFrame(input_data);
+
+        xgboost::StringView json_str(c_json_config);
+        xgboost::Json new_json = xgboost::Json::Load(json_str);
+        AddMissingToJson(&new_json, missing, REALSXP);
+        const std::string new_c_json = xgboost::Json::Dump(new_json);
+
+        res_code = XGBoosterPredictFromColumnar(
+          R_ExternalPtrAddr(handle), df_str.c_str(), new_c_json.c_str(),
+          proxy_dmat_handle, &out_shape, &out_dim, &out_result);
+        break;
+      }
+    }
+  }
+  CHECK_CALL(res_code);

  r_out_shape = PROTECT(allocVector(INTSXP, out_dim));
  size_t len = 1;
@@ -1275,6 +1405,31 @@ XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_con
  return r_out;
 }

+}  // namespace
+
+XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config)  {
+  return XGBoosterPredictGeneric(handle, dmat, json_config,
+                                 PredictionInputType::DMatrix, R_NilValue, R_NilValue);
+}
+
+XGB_DLL SEXP XGBoosterPredictFromDense_R(SEXP handle, SEXP R_mat, SEXP missing,
+                                         SEXP json_config, SEXP base_margin) {
+  return XGBoosterPredictGeneric(handle, R_mat, json_config,
+                                 PredictionInputType::DenseMatrix, missing, base_margin);
+}
+
+XGB_DLL SEXP XGBoosterPredictFromCSR_R(SEXP handle, SEXP lst, SEXP missing,
+                                       SEXP json_config, SEXP base_margin) {
+  return XGBoosterPredictGeneric(handle, lst, json_config,
+                                 PredictionInputType::CSRMatrix, missing, base_margin);
+}
+
+XGB_DLL SEXP XGBoosterPredictFromColumnar_R(SEXP handle, SEXP R_df, SEXP missing,
+                                            SEXP json_config, SEXP base_margin) {
+  return XGBoosterPredictGeneric(handle, R_df, json_config,
+                                 PredictionInputType::DataFrame, missing, base_margin);
+}
+
 XGB_DLL SEXP XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
  R_API_BEGIN();
  CHECK_CALL(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
@@ -1519,3 +1674,18 @@ XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEX
  Rf_unprotect(1);
  return out;
 }
+
+XGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {
+  R_API_BEGIN();
+  BoosterHandle old_handle = R_ExternalPtrAddr(handle);
+  BoosterHandle new_handle = nullptr;
+  CHECK_CALL(XGBoosterSlice(old_handle,
+                            Rf_asInteger(begin_layer),
+                            Rf_asInteger(end_layer),
+                            Rf_asInteger(step),
+                            &new_handle));
+  R_SetExternalPtrAddr(handle, new_handle);
+  CHECK_CALL(XGBoosterFree(old_handle));
+  R_API_END();
+  return R_NilValue;
+}
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -53,12 +53,13 @@ XGB_DLL SEXP XGBSetGlobalConfig_R(SEXP json_str);
 XGB_DLL SEXP XGBGetGlobalConfig_R();

 /*!
- * \brief load a data matrix
- * \param fname name of the content
+ * \brief load a data matrix from URI
+ * \param uri URI to the source file to read data from
 * \param silent whether print messages
+ * \param Data split mode (0=rows, 1=columns)
 * \return a loaded data matrix
 */
-XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
+XGB_DLL SEXP XGDMatrixCreateFromURI_R(SEXP uri, SEXP silent, SEXP data_split_mode);

 /*!
 * \brief create matrix content from dense matrix
@@ -111,9 +112,10 @@ XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP
 * \brief create a new dmatrix from sliced content of existing matrix
 * \param handle instance of data matrix to be sliced
 * \param idxset index set
+ * \param allow_groups Whether to allow slicing the DMatrix if it has a 'group' field
 * \return a sliced new matrix
 */
-XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset);
+XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset, SEXP allow_groups);

 /*!
 * \brief load a data matrix into binary file
@@ -370,6 +372,50 @@ XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evn
 * \return A list containing 2 vectors, first one for shape while second one for prediction result.
 */
 XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config);
+
+/*!
+ * \brief Run prediction on R dense matrix
+ * \param handle handle
+ * \param R_mat R matrix
+ * \param missing missing value
+ * \param json_config See `XGBoosterPredictFromDense` in xgboost c_api.h. Doesn't include 'missing'
+ * \param base_margin base margin for the prediction
+ *
+ * \return A list containing 2 vectors, first one for shape while second one for prediction result.
+ */
+XGB_DLL SEXP XGBoosterPredictFromDense_R(SEXP handle, SEXP R_mat, SEXP missing,
+                                         SEXP json_config, SEXP base_margin);
+
+/*!
+ * \brief Run prediction on R CSR matrix
+ * \param handle handle
+ * \param lst An R list, containing, in this order:
+ *              (a) 'p' array (a.k.a. indptr)
+ *              (b) 'j' array (a.k.a. indices)
+ *              (c) 'x' array (a.k.a. data / values)
+ *              (d) number of columns
+ * \param missing missing value
+ * \param json_config See `XGBoosterPredictFromCSR` in xgboost c_api.h. Doesn't include 'missing'
+ * \param base_margin base margin for the prediction
+ *
+ * \return A list containing 2 vectors, first one for shape while second one for prediction result.
+ */
+XGB_DLL SEXP XGBoosterPredictFromCSR_R(SEXP handle, SEXP lst, SEXP missing,
+                                       SEXP json_config, SEXP base_margin);
+
+/*!
+ * \brief Run prediction on R data.frame
+ * \param handle handle
+ * \param R_df R data.frame
+ * \param missing missing value
+ * \param json_config See `XGBoosterPredictFromDense` in xgboost c_api.h. Doesn't include 'missing'
+ * \param base_margin base margin for the prediction
+ *
+ * \return A list containing 2 vectors, first one for shape while second one for prediction result.
+ */
+XGB_DLL SEXP XGBoosterPredictFromColumnar_R(SEXP handle, SEXP R_df, SEXP missing,
+                                            SEXP json_config, SEXP base_margin);
+
 /*!
 * \brief load model from existing file
 * \param handle handle
@@ -490,4 +536,14 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);
 */
 XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);

+/*!
+ * \brief Slice a fitted booster model (by rounds), and replace its handle with the result
+ * \param handle handle to the fitted booster
+ * \param begin_layer start of the slice
+ * \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round
+ * \param step step size of the slice
+ * \return NULL
+ */
+XGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);
+
 #endif  // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
--- a/R-package/src/xgboost_custom.cc
+++ b/R-package/src/xgboost_custom.cc
@@ -41,16 +41,6 @@ double LogGamma(double v) {
  return lgammafn(v);
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
-// customize random engine.
-void CustomGlobalRandomEngine::seed(CustomGlobalRandomEngine::result_type val) {
-  // ignore the seed
-}

-// use R's PRNG to replacd
-CustomGlobalRandomEngine::result_type
-CustomGlobalRandomEngine::operator()() {
-  return static_cast<result_type>(
-      std::floor(unif_rand() * CustomGlobalRandomEngine::max()));
-}
 }  // namespace common
 }  // namespace xgboost
--- a/R-package/tests/helper_scripts/install_deps.R
+++ b/R-package/tests/helper_scripts/install_deps.R
@@ -20,6 +20,7 @@ pkgs <- c(
  "igraph",
  "float",
  "titanic",
+  "RhpcBLASctl",
  ## imports
  "Matrix",
  "methods",
--- a/R-package/tests/testthat.R
+++ b/R-package/tests/testthat.R
@@ -1,4 +1,6 @@
 library(testthat)
 library(xgboost)
+library(Matrix)

 test_check("xgboost", reporter = ProgressReporter)
+RhpcBLASctl::omp_set_num_threads(1)
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -20,7 +20,7 @@ test_that("train and predict binary classification", {
      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
      eta = 1, nthread = n_threads, nrounds = nrounds,
      objective = "binary:logistic", eval_metric = "error",
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
    ),
    "train-error"
  )
@@ -139,8 +139,8 @@ test_that("dart prediction works", {
  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, iterationrange = c(1, nrounds))
  pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)

-  expect_true(all(matrix(pred_by_train_0, byrow = TRUE) == matrix(pred_by_xgboost_0, byrow = TRUE)))
-  expect_true(all(matrix(pred_by_train_1, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))
+  expect_equal(pred_by_train_0, pred_by_xgboost_0, tolerance = 1e-6)
+  expect_equal(pred_by_train_1, pred_by_xgboost_1, tolerance = 1e-6)
  expect_true(all(matrix(pred_by_train_2, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
 })

@@ -152,7 +152,7 @@ test_that("train and predict softprob", {
      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
      max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
      objective = "multi:softprob", num_class = 3, eval_metric = "merror",
-      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
    ),
    "train-merror"
  )
@@ -203,7 +203,7 @@ test_that("train and predict softmax", {
      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
      max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
      objective = "multi:softmax", num_class = 3, eval_metric = "merror",
-      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
    ),
    "train-merror"
  )
@@ -226,7 +226,7 @@ test_that("train and predict RF", {
    nthread = n_threads,
    nrounds = 1, objective = "binary:logistic", eval_metric = "error",
    num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1,
-    watchlist = list(train = xgb.DMatrix(train$data, label = lb))
+    evals = list(train = xgb.DMatrix(train$data, label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 1)

@@ -250,7 +250,7 @@ test_that("train and predict RF with softprob", {
    objective = "multi:softprob", eval_metric = "merror",
    num_class = 3, verbose = 0,
    num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5,
-    watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+    evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 15)
  # predict for all iterations:
@@ -271,7 +271,7 @@ test_that("use of multiple eval metrics works", {
      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
      eval_metric = "error", eval_metric = "auc", eval_metric = "logloss",
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
    ),
    "train-error.*train-auc.*train-logloss"
  )
@@ -283,7 +283,7 @@ test_that("use of multiple eval metrics works", {
      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
      eval_metric = list("error", "auc", "logloss"),
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
    ),
    "train-error.*train-auc.*train-logloss"
  )
@@ -295,19 +295,19 @@ test_that("use of multiple eval metrics works", {

 test_that("training continuation works", {
  dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
  param <- list(
    objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
  )

  # for the reference, use 4 iterations at once:
  set.seed(11)
-  bst <- xgb.train(param, dtrain, nrounds = 4, watchlist, verbose = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 4, evals = evals, verbose = 0)
  # first two iterations:
  set.seed(11)
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
  # continue for two more:
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = bst1)
  if (!windows_flag && !solaris_flag) {
    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
  }
@@ -315,7 +315,7 @@ test_that("training continuation works", {
  expect_equal(dim(attributes(bst2)$evaluation_log), c(4, 2))
  expect_equal(attributes(bst2)$evaluation_log, attributes(bst)$evaluation_log)
  # test continuing from raw model data
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
  if (!windows_flag && !solaris_flag) {
    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
  }
@@ -323,7 +323,7 @@ test_that("training continuation works", {
  # test continuing from a model in file
  fname <- file.path(tempdir(), "xgboost.json")
  xgb.save(bst1, fname)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = fname)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = fname)
  if (!windows_flag && !solaris_flag) {
    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
  }
@@ -334,7 +334,7 @@ test_that("xgb.cv works", {
  set.seed(11)
  expect_output(
    cv <- xgb.cv(
-      data = train$data, label = train$label, max_depth = 2, nfold = 5,
+      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, nfold = 5,
      eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
      eval_metric = "error", verbose = TRUE
    ),
@@ -348,7 +348,6 @@ test_that("xgb.cv works", {
  expect_false(is.null(cv$folds) && is.list(cv$folds))
  expect_length(cv$folds, 5)
  expect_false(is.null(cv$params) && is.list(cv$params))
-  expect_false(is.null(cv$callbacks))
  expect_false(is.null(cv$call))
 })

@@ -358,13 +357,13 @@ test_that("xgb.cv works with stratified folds", {
  cv <- xgb.cv(
    data = dtrain, max_depth = 2, nfold = 5,
    eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
-    verbose = TRUE, stratified = FALSE
+    verbose = FALSE, stratified = FALSE
  )
  set.seed(314159)
  cv2 <- xgb.cv(
    data = dtrain, max_depth = 2, nfold = 5,
    eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
-    verbose = TRUE, stratified = TRUE
+    verbose = FALSE, stratified = TRUE
  )
  # Stratified folds should result in a different evaluation logs
  expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
@@ -418,7 +417,7 @@ test_that("max_delta_step works", {
  dtrain <- xgb.DMatrix(
    agaricus.train$data, label = agaricus.train$label, nthread = n_threads
  )
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
  param <- list(
    objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
    nthread = n_threads,
@@ -426,9 +425,9 @@ test_that("max_delta_step works", {
  )
  nrounds <- 5
  # model with no restriction on max_delta_step
-  bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
+  bst1 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1)
  # model with restricted max_delta_step
-  bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
+  bst2 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1, max_delta_step = 1)
  # the no-restriction model is expected to have consistently lower loss during the initial iterations
  expect_true(all(attributes(bst1)$evaluation_log$train_logloss < attributes(bst2)$evaluation_log$train_logloss))
  expect_lt(mean(attributes(bst1)$evaluation_log$train_logloss) / mean(attributes(bst2)$evaluation_log$train_logloss), 0.8)
@@ -445,7 +444,7 @@ test_that("colsample_bytree works", {
  colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
  dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
  dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
-  watchlist <- list(train = dtrain, eval = dtest)
+  evals <- list(train = dtrain, eval = dtest)
  ## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
  ## each tree
  param <- list(
@@ -454,7 +453,7 @@ test_that("colsample_bytree works", {
    eval_metric = "auc"
  )
  set.seed(2)
-  bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 100, evals = evals, verbose = 0)
  xgb.importance(model = bst)
  # If colsample_bytree works properly, a variety of features should be used
  # in the 100 trees
@@ -651,6 +650,51 @@ test_that("Can use ranking objectives with either 'qid' or 'group'", {
  expect_equal(pred_qid, pred_gr)
 })

+test_that("Can predict on data.frame objects", {
+  data("mtcars")
+  y <- mtcars$mpg
+  x_df <- mtcars[, -1]
+  x_mat <- as.matrix(x_df)
+  dm <- xgb.DMatrix(x_mat, label = y, nthread = n_threads)
+  model <- xgb.train(
+    params = list(
+      tree_method = "hist",
+      objective = "reg:squarederror",
+      nthread = n_threads
+    ),
+    data = dm,
+    nrounds = 5
+  )
+
+  pred_mat <- predict(model, xgb.DMatrix(x_mat), nthread = n_threads)
+  pred_df <- predict(model, x_df, nthread = n_threads)
+  expect_equal(pred_mat, pred_df)
+})
+
+test_that("'base_margin' gives the same result in DMatrix as in inplace_predict", {
+  data("mtcars")
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = y, nthread = n_threads)
+  model <- xgb.train(
+    params = list(
+      tree_method = "hist",
+      objective = "reg:squarederror",
+      nthread = n_threads
+    ),
+    data = dm,
+    nrounds = 5
+  )
+
+  set.seed(123)
+  base_margin <- rnorm(nrow(x))
+  dm_w_base <- xgb.DMatrix(data = x, base_margin = base_margin)
+  pred_from_dm <- predict(model, dm_w_base)
+  pred_from_mat <- predict(model, x, base_margin = base_margin)
+
+  expect_equal(pred_from_dm, pred_from_mat)
+})
+
 test_that("Coefficients from gblinear have the expected shape and names", {
  # Single-column coefficients
  data(mtcars)
@@ -778,3 +822,120 @@ test_that("DMatrix field are set to booster when training", {
  expect_equal(getinfo(model_feature_types, "feature_type"), c("q", "c", "q"))
  expect_equal(getinfo(model_both, "feature_type"), c("q", "c", "q"))
 })
+
+test_that("Seed in params override PRNG from R", {
+  set.seed(123)
+  model1 <- xgb.train(
+    data = xgb.DMatrix(
+      agaricus.train$data,
+      label = agaricus.train$label, nthread = 1L
+    ),
+    params = list(
+      objective = "binary:logistic",
+      max_depth = 3L,
+      subsample = 0.1,
+      colsample_bytree = 0.1,
+      seed = 111L
+    ),
+    nrounds = 3L
+  )
+
+  set.seed(456)
+  model2 <- xgb.train(
+    data = xgb.DMatrix(
+      agaricus.train$data,
+      label = agaricus.train$label, nthread = 1L
+    ),
+    params = list(
+      objective = "binary:logistic",
+      max_depth = 3L,
+      subsample = 0.1,
+      colsample_bytree = 0.1,
+      seed = 111L
+    ),
+    nrounds = 3L
+  )
+
+  expect_equal(
+    xgb.save.raw(model1, raw_format = "json"),
+    xgb.save.raw(model2, raw_format = "json")
+  )
+
+  set.seed(123)
+  model3 <- xgb.train(
+    data = xgb.DMatrix(
+      agaricus.train$data,
+      label = agaricus.train$label, nthread = 1L
+    ),
+    params = list(
+      objective = "binary:logistic",
+      max_depth = 3L,
+      subsample = 0.1,
+      colsample_bytree = 0.1,
+      seed = 222L
+    ),
+    nrounds = 3L
+  )
+  expect_false(
+    isTRUE(
+      all.equal(
+        xgb.save.raw(model1, raw_format = "json"),
+        xgb.save.raw(model3, raw_format = "json")
+      )
+    )
+  )
+})
+
+test_that("xgb.cv works for AFT", {
+  X <- matrix(c(1, -1, -1, 1, 0, 1, 1, 0), nrow = 4, byrow = TRUE)  # 4x2 matrix
+  dtrain <- xgb.DMatrix(X, nthread = n_threads)
+
+  params <- list(objective = 'survival:aft', learning_rate = 0.2, max_depth = 2L)
+
+  # data must have bounds
+  expect_error(
+    xgb.cv(
+      params = params,
+      data = dtrain,
+      nround = 5L,
+      nfold = 4L,
+      nthread = n_threads
+    )
+  )
+
+  setinfo(dtrain, 'label_lower_bound', c(2, 3, 0, 4))
+  setinfo(dtrain, 'label_upper_bound', c(2, Inf, 4, 5))
+
+  # automatic stratified splitting is turned off
+  expect_warning(
+    xgb.cv(
+      params = params, data = dtrain, nround = 5L, nfold = 4L,
+      nthread = n_threads, stratified = TRUE, verbose = FALSE
+    )
+  )
+
+  # this works without any issue
+  expect_no_warning(
+    xgb.cv(params = params, data = dtrain, nround = 5L, nfold = 4L, verbose = FALSE)
+  )
+})
+
+test_that("xgb.cv works for ranking", {
+  data(iris)
+  x <- iris[, -(4:5)]
+  y <- as.integer(iris$Petal.Width)
+  group <- rep(50, 3)
+  dm <- xgb.DMatrix(x, label = y, group = group)
+  res <- xgb.cv(
+    data = dm,
+    params = list(
+      objective = "rank:pairwise",
+      max_depth = 3
+    ),
+    nrounds = 3,
+    nfold = 2,
+    verbose = FALSE,
+    stratified = FALSE
+  )
+  expect_equal(length(res$folds), 2L)
+})
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -19,7 +19,7 @@ ltrain <- add.noise(train$label, 0.2)
 ltest <- add.noise(test$label, 0.2)
 dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
 dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
-watchlist <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)


 err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
@@ -28,79 +28,125 @@ param <- list(objective = "binary:logistic", eval_metric = "error",
              max_depth = 2, nthread = n_threads)


-test_that("cb.print.evaluation works as expected", {
+test_that("xgb.cb.print.evaluation works as expected for xgb.train", {
+  logs1 <- capture.output({
+    model <- xgb.train(
+      data = dtrain,
+      params = list(
+        objective = "binary:logistic",
+        eval_metric = "auc",
+        max_depth = 2,
+        nthread = n_threads
+      ),
+      nrounds = 10,
+      evals = list(train = dtrain, test = dtest),
+      callbacks = list(xgb.cb.print.evaluation(period = 1))
+    )
+  })
+  expect_equal(length(logs1), 10)
+  expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+\ttest-auc:0\\.\\d+\\s*$", logs1)))
+  lapply(seq(1, 10), function(x) expect_true(grepl(paste0("^\\[", x), logs1[x])))

-  bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8)
-  bst_evaluation_err <- NULL
-  begin_iteration <- 1
-  end_iteration <- 7
-
-  f0 <- cb.print.evaluation(period = 0)
-  f1 <- cb.print.evaluation(period = 1)
-  f5 <- cb.print.evaluation(period = 5)
-
-  expect_false(is.null(attr(f1, 'call')))
-  expect_equal(attr(f1, 'name'), 'cb.print.evaluation')
-
-  iteration <- 1
-  expect_silent(f0())
-  expect_output(f1(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-  expect_output(f5(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-  expect_null(f1())
-
-  iteration <- 2
-  expect_output(f1(), "\\[2\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-  expect_silent(f5())
-
-  iteration <- 7
-  expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-  expect_output(f5(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
-
-  bst_evaluation_err  <- c('train-auc' = 0.1, 'test-auc' = 0.2)
-  expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000±0.100000\ttest-auc:0.800000±0.200000")
+  logs2 <- capture.output({
+    model <- xgb.train(
+      data = dtrain,
+      params = list(
+        objective = "binary:logistic",
+        eval_metric = "auc",
+        max_depth = 2,
+        nthread = n_threads
+      ),
+      nrounds = 10,
+      evals = list(train = dtrain, test = dtest),
+      callbacks = list(xgb.cb.print.evaluation(period = 2))
+    )
+  })
+  expect_equal(length(logs2), 6)
+  expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+\ttest-auc:0\\.\\d+\\s*$", logs2)))
+  seq_matches <- c(seq(1, 10, 2), 10)
+  lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0("^\\[", seq_matches[x]), logs2[x])))
 })

-test_that("cb.evaluation.log works as expected", {
+test_that("xgb.cb.print.evaluation works as expected for xgb.cv", {
+  logs1 <- capture.output({
+    model <- xgb.cv(
+      data = dtrain,
+      params = list(
+        objective = "binary:logistic",
+        eval_metric = "auc",
+        max_depth = 2,
+        nthread = n_threads
+      ),
+      nrounds = 10,
+      nfold = 3,
+      callbacks = list(xgb.cb.print.evaluation(period = 1, showsd = TRUE))
+    )
+  })
+  expect_equal(length(logs1), 10)
+  expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+±0\\.\\d+\ttest-auc:0\\.\\d+±0\\.\\d+\\s*$", logs1)))
+  lapply(seq(1, 10), function(x) expect_true(grepl(paste0("^\\[", x), logs1[x])))

-  bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8)
-  bst_evaluation_err <- NULL
+  logs2 <- capture.output({
+    model <- xgb.cv(
+      data = dtrain,
+      params = list(
+        objective = "binary:logistic",
+        eval_metric = "auc",
+        max_depth = 2,
+        nthread = n_threads
+      ),
+      nrounds = 10,
+      nfold = 3,
+      callbacks = list(xgb.cb.print.evaluation(period = 2, showsd = TRUE))
+    )
+  })
+  expect_equal(length(logs2), 6)
+  expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+±0\\.\\d+\ttest-auc:0\\.\\d+±0\\.\\d+\\s*$", logs2)))
+  seq_matches <- c(seq(1, 10, 2), 10)
+  lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0("^\\[", seq_matches[x]), logs2[x])))
+})

-  evaluation_log <- list()
-  f <- cb.evaluation.log()
+test_that("xgb.cb.evaluation.log works as expected for xgb.train", {
+  model <- xgb.train(
+    data = dtrain,
+    params = list(
+      objective = "binary:logistic",
+      eval_metric = "auc",
+      max_depth = 2,
+      nthread = n_threads
+    ),
+    nrounds = 10,
+    verbose = FALSE,
+    evals = list(train = dtrain, test = dtest),
+    callbacks = list(xgb.cb.evaluation.log())
+  )
+  logs <- attributes(model)$evaluation_log

-  expect_false(is.null(attr(f, 'call')))
-  expect_equal(attr(f, 'name'), 'cb.evaluation.log')
+  expect_equal(nrow(logs), 10)
+  expect_equal(colnames(logs), c("iter", "train_auc", "test_auc"))
+})

-  iteration <- 1
-  expect_silent(f())
-  expect_equal(evaluation_log,
-               list(c(iter = 1, bst_evaluation)))
-  iteration <- 2
-  expect_silent(f())
-  expect_equal(evaluation_log,
-               list(c(iter = 1, bst_evaluation), c(iter = 2, bst_evaluation)))
-  expect_silent(f(finalize = TRUE))
-  expect_equal(evaluation_log,
-               data.table::data.table(iter = 1:2, train_auc = c(0.9, 0.9), test_auc = c(0.8, 0.8)))
+test_that("xgb.cb.evaluation.log works as expected for xgb.cv", {
+  model <- xgb.cv(
+    data = dtrain,
+    params = list(
+      objective = "binary:logistic",
+      eval_metric = "auc",
+      max_depth = 2,
+      nthread = n_threads
+    ),
+    nrounds = 10,
+    verbose = FALSE,
+    nfold = 3,
+    callbacks = list(xgb.cb.evaluation.log())
+  )
+  logs <- model$evaluation_log

-  bst_evaluation_err  <- c('train-auc' = 0.1, 'test-auc' = 0.2)
-  evaluation_log <- list()
-  f <- cb.evaluation.log()
-
-  iteration <- 1
-  expect_silent(f())
-  expect_equal(evaluation_log,
-               list(c(iter = 1, c(bst_evaluation, bst_evaluation_err))))
-  iteration <- 2
-  expect_silent(f())
-  expect_equal(evaluation_log,
-               list(c(iter = 1, c(bst_evaluation, bst_evaluation_err)),
-                    c(iter = 2, c(bst_evaluation, bst_evaluation_err))))
-  expect_silent(f(finalize = TRUE))
-  expect_equal(evaluation_log,
-               data.table::data.table(iter = 1:2,
-                          train_auc_mean = c(0.9, 0.9), train_auc_std = c(0.1, 0.1),
-                          test_auc_mean = c(0.8, 0.8), test_auc_std = c(0.2, 0.2)))
+  expect_equal(nrow(logs), 10)
+  expect_equal(
+    colnames(logs),
+    c("iter", "train_auc_mean", "train_auc_std", "test_auc_mean", "test_auc_std")
+  )
 })


@@ -109,26 +155,26 @@ param <- list(objective = "binary:logistic", eval_metric = "error",

 test_that("can store evaluation_log without printing", {
  expect_silent(
-    bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, verbose = 0)
+    bst <- xgb.train(param, dtrain, nrounds = 10, evals = evals, eta = 1, verbose = 0)
  )
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_false(is.null(attributes(bst)$evaluation_log$train_error))
  expect_lt(attributes(bst)$evaluation_log[, min(train_error)], 0.2)
 })

-test_that("cb.reset.parameters works as expected", {
+test_that("xgb.cb.reset.parameters works as expected", {

  # fixed eta
  set.seed(111)
-  bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9, verbose = 0)
+  bst0 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 0.9, verbose = 0)
  expect_false(is.null(attributes(bst0)$evaluation_log))
  expect_false(is.null(attributes(bst0)$evaluation_log$train_error))

  # same eta but re-set as a vector parameter in the callback
  set.seed(111)
  my_par <- list(eta = c(0.9, 0.9))
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                    callbacks = list(cb.reset.parameters(my_par)))
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bst1)$evaluation_log$train_error))
  expect_equal(attributes(bst0)$evaluation_log$train_error,
               attributes(bst1)$evaluation_log$train_error)
@@ -136,8 +182,8 @@ test_that("cb.reset.parameters works as expected", {
  # same eta but re-set via a function in the callback
  set.seed(111)
  my_par <- list(eta = function(itr, itr_end) 0.9)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                    callbacks = list(cb.reset.parameters(my_par)))
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bst2)$evaluation_log$train_error))
  expect_equal(attributes(bst0)$evaluation_log$train_error,
               attributes(bst2)$evaluation_log$train_error)
@@ -145,39 +191,39 @@ test_that("cb.reset.parameters works as expected", {
  # different eta re-set as a vector parameter in the callback
  set.seed(111)
  my_par <- list(eta = c(0.6, 0.5))
-  bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                    callbacks = list(cb.reset.parameters(my_par)))
+  bst3 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bst3)$evaluation_log$train_error))
  expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error))

  # resetting multiple parameters at the same time runs with no error
  my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8))
  expect_error(
-    bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                      callbacks = list(cb.reset.parameters(my_par)))
+    bst4 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                      callbacks = list(xgb.cb.reset.parameters(my_par)))
  , NA) # NA = no error
  # CV works as well
  expect_error(
    bst4 <- xgb.cv(param, dtrain, nfold = 2, nrounds = 2, verbose = 0,
-                   callbacks = list(cb.reset.parameters(my_par)))
+                   callbacks = list(xgb.cb.reset.parameters(my_par)))
  , NA) # NA = no error

  # expect no learning with 0 learning rate
  my_par <- list(eta = c(0., 0.))
-  bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-                    callbacks = list(cb.reset.parameters(my_par)))
+  bstX <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+                    callbacks = list(xgb.cb.reset.parameters(my_par)))
  expect_false(is.null(attributes(bstX)$evaluation_log$train_error))
  er <- unique(attributes(bstX)$evaluation_log$train_error)
  expect_length(er, 1)
  expect_gt(er, 0.4)
 })

-test_that("cb.save.model works as expected", {
+test_that("xgb.cb.save.model works as expected", {
  files <- c('xgboost_01.json', 'xgboost_02.json', 'xgboost.json')
  files <- unname(sapply(files, function(f) file.path(tempdir(), f)))
  for (f in files) if (file.exists(f)) file.remove(f)

-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+  bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
                   save_period = 1, save_name = file.path(tempdir(), "xgboost_%02d.json"))
  expect_true(file.exists(files[1]))
  expect_true(file.exists(files[2]))
@@ -193,7 +239,7 @@ test_that("cb.save.model works as expected", {
  expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))

  # save_period = 0 saves the last iteration's model
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+  bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
                   save_period = 0, save_name = file.path(tempdir(), 'xgboost.json'))
  expect_true(file.exists(files[3]))
  b2 <- xgb.load(files[3])
@@ -206,7 +252,7 @@ test_that("cb.save.model works as expected", {
 test_that("early stopping xgb.train works", {
  set.seed(11)
  expect_output(
-    bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+    bst <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
                     early_stopping_rounds = 3, maximize = FALSE)
  , "Stopping. Best iteration")
  expect_false(is.null(xgb.attr(bst, "best_iteration")))
@@ -220,7 +266,7 @@ test_that("early stopping xgb.train works", {

  set.seed(11)
  expect_silent(
-    bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+    bst0 <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
                      early_stopping_rounds = 3, maximize = FALSE, verbose = 0)
  )
  expect_equal(attributes(bst)$evaluation_log, attributes(bst0)$evaluation_log)
@@ -236,10 +282,10 @@ test_that("early stopping xgb.train works", {
 test_that("early stopping using a specific metric works", {
  set.seed(11)
  expect_output(
-    bst <- xgb.train(param[-2], dtrain, nrounds = 20, watchlist, eta = 0.6,
+    bst <- xgb.train(param[-2], dtrain, nrounds = 20, evals = evals, eta = 0.6,
                     eval_metric = "logloss", eval_metric = "auc",
-                     callbacks = list(cb.early.stop(stopping_rounds = 3, maximize = FALSE,
-                                                    metric_name = 'test_logloss')))
+                     callbacks = list(xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE,
+                                                        metric_name = 'test_logloss')))
  , "Stopping. Best iteration")
  expect_false(is.null(xgb.attr(bst, "best_iteration")))
  expect_lt(xgb.attr(bst, "best_iteration"), 19)
@@ -269,7 +315,7 @@ test_that("early stopping works with titanic", {
    nrounds = 100,
    early_stopping_rounds = 3,
    nthread = n_threads,
-    watchlist = list(train = xgb.DMatrix(dtx, label = dty))
+    evals = list(train = xgb.DMatrix(dtx, label = dty))
  )

  expect_true(TRUE)  # should not crash
@@ -281,10 +327,10 @@ test_that("early stopping xgb.cv works", {
    cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.3, nrounds = 20,
                 early_stopping_rounds = 3, maximize = FALSE)
  , "Stopping. Best iteration")
-  expect_false(is.null(cv$best_iteration))
-  expect_lt(cv$best_iteration, 19)
+  expect_false(is.null(cv$early_stop$best_iteration))
+  expect_lt(cv$early_stop$best_iteration, 19)
  # the best error is min error:
-  expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)])
+  expect_true(cv$evaluation_log[, test_error_mean[cv$early_stop$best_iteration] == min(test_error_mean)])
 })

 test_that("prediction in xgb.cv works", {
@@ -292,19 +338,19 @@ test_that("prediction in xgb.cv works", {
  nrounds <- 4
  cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
  expect_false(is.null(cv$evaluation_log))
-  expect_false(is.null(cv$pred))
-  expect_length(cv$pred, nrow(train$data))
-  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))))
+  expect_false(is.null(cv$cv_predict$pred))
+  expect_length(cv$cv_predict$pred, nrow(train$data))
+  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f]))))
  err_log <- cv$evaluation_log[nrounds, test_error_mean]
  expect_equal(err_pred, err_log, tolerance = 1e-6)

  # save CV models
  set.seed(11)
  cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0,
-                callbacks = list(cb.cv.predict(save_models = TRUE)))
+                callbacks = list(xgb.cb.cv.predict(save_models = TRUE)))
  expect_equal(cv$evaluation_log, cvx$evaluation_log)
-  expect_length(cvx$models, 5)
-  expect_true(all(sapply(cvx$models, class) == 'xgb.Booster'))
+  expect_length(cvx$cv_predict$models, 5)
+  expect_true(all(sapply(cvx$cv_predict$models, class) == 'xgb.Booster'))
 })

 test_that("prediction in xgb.cv works for gblinear too", {
@@ -312,8 +358,8 @@ test_that("prediction in xgb.cv works for gblinear too", {
  p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads)
  cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
  expect_false(is.null(cv$evaluation_log))
-  expect_false(is.null(cv$pred))
-  expect_length(cv$pred, nrow(train$data))
+  expect_false(is.null(cv$cv_predict$pred))
+  expect_length(cv$cv_predict$pred, nrow(train$data))
 })

 test_that("prediction in early-stopping xgb.cv works", {
@@ -321,17 +367,17 @@ test_that("prediction in early-stopping xgb.cv works", {
  expect_output(
    cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.1, nrounds = 20,
                 early_stopping_rounds = 5, maximize = FALSE, stratified = FALSE,
-                 prediction = TRUE, base_score = 0.5)
+                 prediction = TRUE, base_score = 0.5, verbose = TRUE)
  , "Stopping. Best iteration")

-  expect_false(is.null(cv$best_iteration))
-  expect_lt(cv$best_iteration, 19)
+  expect_false(is.null(cv$early_stop$best_iteration))
+  expect_lt(cv$early_stop$best_iteration, 19)
  expect_false(is.null(cv$evaluation_log))
-  expect_false(is.null(cv$pred))
-  expect_length(cv$pred, nrow(train$data))
+  expect_false(is.null(cv$cv_predict$pred))
+  expect_length(cv$cv_predict$pred, nrow(train$data))

-  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))))
-  err_log <- cv$evaluation_log[cv$best_iteration, test_error_mean]
+  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f]))))
+  err_log <- cv$evaluation_log[cv$early_stop$best_iteration, test_error_mean]
  expect_equal(err_pred, err_log, tolerance = 1e-6)
  err_log_last <- cv$evaluation_log[cv$niter, test_error_mean]
  expect_gt(abs(err_pred - err_log_last), 1e-4)
@@ -341,14 +387,14 @@ test_that("prediction in xgb.cv for softprob works", {
  lb <- as.numeric(iris$Species) - 1
  set.seed(11)
  expect_warning(
-    cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
+    cv <- xgb.cv(data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), nfold = 4,
                 eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads,
                 subsample = 0.8, gamma = 2, verbose = 0,
                 prediction = TRUE, objective = "multi:softprob", num_class = 3)
  , NA)
-  expect_false(is.null(cv$pred))
-  expect_equal(dim(cv$pred), c(nrow(iris), 3))
-  expect_lt(diff(range(rowSums(cv$pred))), 1e-6)
+  expect_false(is.null(cv$cv_predict$pred))
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(iris), 3))
+  expect_lt(diff(range(rowSums(cv$cv_predict$pred))), 1e-6)
 })

 test_that("prediction in xgb.cv works for multi-quantile", {
@@ -368,7 +414,7 @@ test_that("prediction in xgb.cv works for multi-quantile", {
    prediction = TRUE,
    verbose = 0
  )
-  expect_equal(dim(cv$pred), c(nrow(x), 5))
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5))
 })

 test_that("prediction in xgb.cv works for multi-output", {
@@ -389,5 +435,46 @@ test_that("prediction in xgb.cv works for multi-output", {
    prediction = TRUE,
    verbose = 0
  )
-  expect_equal(dim(cv$pred), c(nrow(x), 2))
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2))
+})
+
+test_that("prediction in xgb.cv works for multi-quantile", {
+  data(mtcars)
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = y, nthread = 1)
+  cv <- xgb.cv(
+    data = dm,
+    params = list(
+      objective = "reg:quantileerror",
+      quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),
+      nthread = 1
+    ),
+    nrounds = 5,
+    nfold = 3,
+    prediction = TRUE,
+    verbose = 0
+  )
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5))
+})
+
+test_that("prediction in xgb.cv works for multi-output", {
+  data(mtcars)
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)
+  cv <- xgb.cv(
+    data = dm,
+    params = list(
+      tree_method = "hist",
+      multi_strategy = "multi_output_tree",
+      objective = "reg:squarederror",
+      nthread = n_threads
+    ),
+    nrounds = 5,
+    nfold = 3,
+    prediction = TRUE,
+    verbose = 0
+  )
+  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2))
 })
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -12,7 +12,7 @@ dtrain <- xgb.DMatrix(
 dtest <- xgb.DMatrix(
  agaricus.test$data, label = agaricus.test$label, nthread = n_threads
 )
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)

 logregobj <- function(preds, dtrain) {
  labels <- getinfo(dtrain, "label")
@@ -33,7 +33,7 @@ param <- list(max_depth = 2, eta = 1, nthread = n_threads,
 num_round <- 2

 test_that("custom objective works", {
-  bst <- xgb.train(param, dtrain, num_round, watchlist)
+  bst <- xgb.train(param, dtrain, num_round, evals)
  expect_equal(class(bst), "xgb.Booster")
  expect_false(is.null(attributes(bst)$evaluation_log))
  expect_false(is.null(attributes(bst)$evaluation_log$eval_error))
@@ -48,7 +48,7 @@ test_that("custom objective in CV works", {
 })

 test_that("custom objective with early stop works", {
-  bst <- xgb.train(param, dtrain, 10, watchlist)
+  bst <- xgb.train(param, dtrain, 10, evals)
  expect_equal(class(bst), "xgb.Booster")
  train_log <- attributes(bst)$evaluation_log$train_error
  expect_true(all(diff(train_log) <= 0))
@@ -66,7 +66,7 @@ test_that("custom objective using DMatrix attr works", {
    return(list(grad = grad, hess = hess))
  }
  param$objective <- logregobjattr
-  bst <- xgb.train(param, dtrain, num_round, watchlist)
+  bst <- xgb.train(param, dtrain, num_round, evals)
  expect_equal(class(bst), "xgb.Booster")
 })

--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@@ -41,13 +41,13 @@ test_that("xgb.DMatrix: basic construction", {

  params <- list(tree_method = "hist", nthread = n_threads)
  bst_fd <- xgb.train(
-    params, nrounds = 8, fd, watchlist = list(train = fd)
+    params, nrounds = 8, fd, evals = list(train = fd)
  )
  bst_dgr <- xgb.train(
-    params, nrounds = 8, fdgr, watchlist = list(train = fdgr)
+    params, nrounds = 8, fdgr, evals = list(train = fdgr)
  )
  bst_dgc <- xgb.train(
-    params, nrounds = 8, fdgc, watchlist = list(train = fdgc)
+    params, nrounds = 8, fdgc, evals = list(train = fdgc)
  )

  raw_fd <- xgb.save.raw(bst_fd, raw_format = "ubj")
@@ -243,7 +243,7 @@ test_that("xgb.DMatrix: print", {
    txt <- capture.output({
        print(dtrain)
    })
-    expect_equal(txt, "xgb.DMatrix  dim: 6513 x 126  info: label weight base_margin  colnames: yes")
+    expect_equal(txt, "xgb.DMatrix  dim: 6513 x 126  info: base_margin, label, weight  colnames: yes")

    # DMatrix with just features
    dtrain <- xgb.DMatrix(
@@ -302,6 +302,37 @@ test_that("xgb.DMatrix: Inf as missing", {
  file.remove(fname_nan)
 })

+test_that("xgb.DMatrix: missing in CSR", {
+  x_dense <- matrix(as.numeric(1:10), nrow = 5)
+  x_dense[2, 1] <- NA_real_
+
+  x_csr <- as(x_dense, "RsparseMatrix")
+
+  m_dense <- xgb.DMatrix(x_dense, nthread = n_threads, missing = NA_real_)
+  xgb.DMatrix.save(m_dense, "dense.dmatrix")
+
+  m_csr <- xgb.DMatrix(x_csr, nthread = n_threads, missing = NA)
+  xgb.DMatrix.save(m_csr, "csr.dmatrix")
+
+  denseconn <- file("dense.dmatrix", "rb")
+  csrconn <- file("csr.dmatrix", "rb")
+
+  expect_equal(file.size("dense.dmatrix"), file.size("csr.dmatrix"))
+
+  bytes <- file.size("dense.dmatrix")
+  densedmatrix <- readBin(denseconn, "raw", n = bytes)
+  csrmatrix <- readBin(csrconn, "raw", n = bytes)
+
+  expect_equal(length(densedmatrix), length(csrmatrix))
+  expect_equal(densedmatrix, csrmatrix)
+
+  close(denseconn)
+  close(csrconn)
+
+  file.remove("dense.dmatrix")
+  file.remove("csr.dmatrix")
+})
+
 test_that("xgb.DMatrix: error on three-dimensional array", {
  set.seed(123)
  x <- matrix(rnorm(500), nrow = 50)
@@ -692,3 +723,58 @@ test_that("xgb.DMatrix: quantile cuts look correct", {
    }
  )
 })
+
+test_that("xgb.DMatrix: slicing keeps field indicators", {
+  data(mtcars)
+  x <- as.matrix(mtcars[, -1])
+  y <- mtcars[, 1]
+  dm <- xgb.DMatrix(
+    data = x,
+    label_lower_bound = -y,
+    label_upper_bound = y,
+    nthread = 1
+  )
+  idx_take <- seq(1, 5)
+  dm_slice <- xgb.slice.DMatrix(dm, idx_take)
+
+  expect_true(xgb.DMatrix.hasinfo(dm_slice, "label_lower_bound"))
+  expect_true(xgb.DMatrix.hasinfo(dm_slice, "label_upper_bound"))
+  expect_false(xgb.DMatrix.hasinfo(dm_slice, "label"))
+
+  expect_equal(getinfo(dm_slice, "label_lower_bound"), -y[idx_take], tolerance = 1e-6)
+  expect_equal(getinfo(dm_slice, "label_upper_bound"), y[idx_take], tolerance = 1e-6)
+})
+
+test_that("xgb.DMatrix: can slice with groups", {
+  data(iris)
+  x <- as.matrix(iris[, -5])
+  set.seed(123)
+  y <- sample(3, size = nrow(x), replace = TRUE)
+  group <- c(50, 50, 50)
+  dm <- xgb.DMatrix(x, label = y, group = group, nthread = 1)
+  idx_take <- seq(1, 50)
+  dm_slice <- xgb.slice.DMatrix(dm, idx_take, allow_groups = TRUE)
+
+  expect_true(xgb.DMatrix.hasinfo(dm_slice, "label"))
+  expect_false(xgb.DMatrix.hasinfo(dm_slice, "group"))
+  expect_false(xgb.DMatrix.hasinfo(dm_slice, "qid"))
+  expect_null(getinfo(dm_slice, "group"))
+  expect_equal(getinfo(dm_slice, "label"), y[idx_take], tolerance = 1e-6)
+})
+
+test_that("xgb.DMatrix: can read CSV", {
+  txt <- paste(
+    "1,2,3",
+    "-1,3,2",
+    sep = "\n"
+  )
+  fname <- file.path(tempdir(), "data.csv")
+  writeChar(txt, fname)
+  uri <- paste0(fname, "?format=csv&label_column=0")
+  dm <- xgb.DMatrix(uri, silent = TRUE)
+  expect_equal(getinfo(dm, "label"), c(1, -1))
+  expect_equal(
+    as.matrix(xgb.get.DMatrix.data(dm)),
+    matrix(c(2, 3, 3, 2), nrow = 2, byrow = TRUE)
+  )
+})
--- a/R-package/tests/testthat/test_feature_weights.R
+++ b/R-package/tests/testthat/test_feature_weights.R
@@ -25,7 +25,7 @@ test_that("training with feature weights works", {
    expect_lt(importance[1, Frequency], importance[9, Frequency])
  }

-  for (tm in c("hist", "approx", "exact")) {
+  for (tm in c("hist", "approx")) {
    test(tm)
  }
 })
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@@ -14,37 +14,37 @@ test_that("gblinear works", {

  param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
                nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
-  watchlist <- list(eval = dtest, train = dtrain)
+  evals <- list(eval = dtest, train = dtrain)

  n <- 5         # iterations
  ERR_UL <- 0.005 # upper limit for the test set error
  VERB <- 0      # chatterbox switch

  param$updater <- 'shotgun'
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
  ypred <- predict(bst, dtest)
  expect_equal(length(getinfo(dtest, 'label')), 1611)
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)

-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic',
-                   callbacks = list(cb.gblinear.history()))
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic',
+                   callbacks = list(xgb.cb.gblinear.history()))
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
  h <- xgb.gblinear.history(bst)
  expect_equal(dim(h), c(n, ncol(dtrain) + 1))
  expect_is(h, "matrix")

  param$updater <- 'coord_descent'
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic')
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)

-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)

-  bst <- xgb.train(param, dtrain, 2, watchlist, verbose = VERB, feature_selector = 'greedy')
+  bst <- xgb.train(param, dtrain, 2, evals, verbose = VERB, feature_selector = 'greedy')
  expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL)

-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'thrifty',
-                   top_k = 50, callbacks = list(cb.gblinear.history(sparse = TRUE)))
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'thrifty',
+                   top_k = 50, callbacks = list(xgb.cb.gblinear.history(sparse = TRUE)))
  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
  h <- xgb.gblinear.history(bst)
  expect_equal(dim(h), c(n, ncol(dtrain) + 1))
--- a/R-package/tests/testthat/test_ranking.R
+++ b/R-package/tests/testthat/test_ranking.R
@@ -15,7 +15,7 @@ test_that('Test ranking with unweighted data', {

  params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
                 eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
-  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
  # Check if the metric is monotone increasing
  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
@@ -39,7 +39,7 @@ test_that('Test ranking with weighted data', {
    eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
    eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
  )
-  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
  # Check if the metric is monotone increasing
  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@@ -17,7 +17,7 @@ dtest <- xgb.DMatrix(
 win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8

 test_that("updating the model works", {
-  watchlist <- list(train = dtrain, test = dtest)
+  evals <- list(train = dtrain, test = dtest)

  # no-subsampling
  p1 <- list(
@@ -25,19 +25,19 @@ test_that("updating the model works", {
    updater = "grow_colmaker,prune"
  )
  set.seed(11)
-  bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst1 <- xgb.train(p1, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr1 <- xgb.model.dt.tree(model = bst1)

  # with subsampling
  p2 <- modifyList(p1, list(subsample = 0.1))
  set.seed(11)
-  bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst2 <- xgb.train(p2, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr2 <- xgb.model.dt.tree(model = bst2)

  # the same no-subsampling boosting with an extra 'refresh' updater:
  p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
  set.seed(11)
-  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr1r <- xgb.model.dt.tree(model = bst1r)
  # all should be the same when no subsampling
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1r)$evaluation_log)
@@ -53,7 +53,7 @@ test_that("updating the model works", {
  # the same boosting with subsampling with an extra 'refresh' updater:
  p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
  set.seed(11)
-  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, evals = evals, verbose = 0)
  tr2r <- xgb.model.dt.tree(model = bst2r)
  # should be the same evaluation but different gains and larger cover
  expect_equal(attributes(bst2)$evaluation_log, attributes(bst2r)$evaluation_log)
@@ -66,7 +66,7 @@ test_that("updating the model works", {
  # process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
  set.seed(123)
  p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
  tr1u <- xgb.model.dt.tree(model = bst1u)
  # all should be the same when no subsampling
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@@ -79,7 +79,7 @@ test_that("updating the model works", {

  # same thing but with a serialized model
  set.seed(123)
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
  tr1u <- xgb.model.dt.tree(model = bst1u)
  # all should be the same when no subsampling
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@@ -87,7 +87,7 @@ test_that("updating the model works", {

  # process type 'update' for model with subsampling, refreshing only the tree stats from training data:
  p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
+  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst2)
  tr2u <- xgb.model.dt.tree(model = bst2u)
  # should be the same evaluation but different gains and larger cover
  expect_equal(attributes(bst2)$evaluation_log, attributes(bst2u)$evaluation_log)
@@ -102,7 +102,7 @@ test_that("updating the model works", {

  # process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
  p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
  tr1ut <- xgb.model.dt.tree(model = bst1ut)
  # should be the same evaluations but different gains and smaller cover (test data is smaller)
  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1ut)$evaluation_log)
@@ -115,18 +115,18 @@ test_that("updating works for multiclass & multitree", {
  dtr <- xgb.DMatrix(
    as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
  )
-  watchlist <- list(train = dtr)
+  evals <- list(train = dtr)
  p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
             objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
             base_score = 0)
  set.seed(121)
-  bst0 <- xgb.train(p0, dtr, 5, watchlist, verbose = 0)
+  bst0 <- xgb.train(p0, dtr, 5, evals = evals, verbose = 0)
  tr0 <- xgb.model.dt.tree(model = bst0)

  # run update process for an original model with subsampling
  p0u <- modifyList(p0, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
  bst0u <- xgb.train(p0u, dtr, nrounds = xgb.get.num.boosted.rounds(bst0),
-                     watchlist, xgb_model = bst0, verbose = 0)
+                     evals = evals, xgb_model = bst0, verbose = 0)
  tr0u <- xgb.model.dt.tree(model = bst0u)

  # should be the same evaluation but different gains and larger cover
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -341,10 +341,10 @@ One way to measure progress in learning of a model is to provide to **XGBoost**

 > in some way it is similar to what we have done above with the average error. The main difference is that below it was after building the model, and now it is during the construction that we measure errors.

-For the purpose of this example, we use `watchlist` parameter. It is a list of `xgb.DMatrix`, each of them tagged with a name.
+For the purpose of this example, we use the `evals` parameter. It is a list of `xgb.DMatrix` objects, each of them tagged with a name.

-```{r watchlist, message=F, warning=F}
-watchlist <- list(train = dtrain, test = dtest)
+```{r evals, message=F, warning=F}
+evals <- list(train = dtrain, test = dtest)

 bst <- xgb.train(
    data = dtrain
@@ -355,7 +355,7 @@ bst <- xgb.train(
        , objective = "binary:logistic"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```

@@ -367,7 +367,7 @@ If with your own dataset you have not such results, you should think about how y

 For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.

-```{r watchlist2, message=F, warning=F}
+```{r evals2, message=F, warning=F}
 bst <- xgb.train(
    data = dtrain
    , max_depth = 2
@@ -379,7 +379,7 @@ bst <- xgb.train(
        , eval_metric = "logloss"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```

@@ -401,7 +401,7 @@ bst <- xgb.train(
        , eval_metric = "logloss"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```

@@ -430,7 +430,7 @@ bst <- xgb.train(
        , objective = "binary:logistic"
    )
    , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```

@@ -496,6 +496,9 @@ An interesting test to see how identical our saved model is to the original one

 ```{r loadModel, message=F, warning=F}
 # load binary model to R
+# Note that the number of threads for 'xgb.load' is taken from global config,
+# can be modified like this:
+RhpcBLASctl::omp_set_num_threads(1)
 bst2 <- xgb.load(fname)
 xgb.parameters(bst2) <- list(nthread = 2)
 pred2 <- predict(bst2, test$data)
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -1,6 +1,5 @@
 # Automatically set source group based on folder
 function(auto_source_group SOURCES)
-
  foreach(FILE ${SOURCES})
      get_filename_component(PARENT_DIR "${FILE}" PATH)

--- a/demo/dask/cpu_training.py
+++ b/demo/dask/cpu_training.py
@@ -40,7 +40,7 @@ def main(client):
    # you can pass output directly into `predict` too.
    prediction = dxgb.predict(client, bst, dtrain)
    print("Evaluation history:", history)
-    return prediction
+    print("Error:", da.sqrt((prediction - y) ** 2).mean().compute())


 if __name__ == "__main__":
--- a/doc/R-package/index.rst
+++ b/doc/R-package/index.rst
@@ -34,4 +34,5 @@ Other topics
 .. toctree::
  :maxdepth: 2
  :titlesonly:
+
  Handling of indexable elements <index_base>
--- a/doc/contrib/unit_tests.rst
+++ b/doc/contrib/unit_tests.rst
@@ -144,6 +144,14 @@ which provides higher flexibility. For example:

  ctest --verbose

+If you need to debug errors on Windows using the debugger from VS, you can append the gtest flags in `test_main.cc`:
+
+.. code-block::
+
+  ::testing::GTEST_FLAG(filter) = "Suite.Test";
+  ::testing::GTEST_FLAG(repeat) = 10;
+
+
 ***********************************************
 Sanitizers: Detect memory errors and data races
 ***********************************************
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -28,7 +28,7 @@ Contents
  Python Package <python/index>
  R Package <R-package/index>
  JVM Package <jvm/index>
-  Ruby Package <https://github.com/ankane/xgb>
+  Ruby Package <https://github.com/ankane/xgboost-ruby>
  Swift Package <https://github.com/kongzii/SwiftXGBoost>
  Julia Package <julia>
  C Package <c>
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -118,7 +118,7 @@ Parameters for Tree Booster
  - All ``colsample_by*`` parameters have a range of (0, 1], the default value of 1, and specify the fraction of columns to be subsampled.
  - ``colsample_bytree`` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
  - ``colsample_bylevel`` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
-  - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level.
+  - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.
  - ``colsample_by*`` parameters work cumulatively. For instance,
    the combination ``{'colsample_bytree':0.5, 'colsample_bylevel':0.5,
    'colsample_bynode':0.5}`` with 64 features will leave 8 features to choose from at
@@ -450,7 +450,7 @@ Specify the learning task and the corresponding learning objective. The objectiv

 * ``seed`` [default=0]

-  - Random number seed.  This parameter is ignored in R package, use `set.seed()` instead.
+  - Random number seed.  In the R package, if not specified, instead of defaulting to seed 'zero', will take a random seed through R's own RNG engine.

 * ``seed_per_iteration`` [default= ``false``]

@@ -489,7 +489,7 @@ Parameters for learning to rank (``rank:ndcg``, ``rank:map``, ``rank:pairwise``)

 These are parameters specific to learning to rank task. See :doc:`Learning to Rank </tutorials/learning_to_rank>` for an in-depth explanation.

-* ``lambdarank_pair_method`` [default = ``mean``]
+* ``lambdarank_pair_method`` [default = ``topk``]

  How to construct pairs for pair-wise learning.

@@ -500,7 +500,13 @@ These are parameters specific to learning to rank task. See :doc:`Learning to Ra

  It specifies the number of pairs sampled for each document when pair method is ``mean``, or the truncation level for queries when the pair method is ``topk``. For example, to train with ``ndcg@6``, set ``lambdarank_num_pair_per_sample`` to :math:`6` and ``lambdarank_pair_method`` to ``topk``.

-* ``lambdarank_unbiased`` [default = ``false``]
+* ``lambdarank_normalization`` [default = ``true``]
+
+  .. versionadded:: 2.1.0
+
+  Whether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress.
+
+*  ``lambdarank_unbiased`` [default = ``false``]

  Specify whether do we need to debias input click data.

--- a/doc/python/callbacks.rst
+++ b/doc/python/callbacks.rst
@@ -36,7 +36,7 @@ inside iteration loop.  You can also pass this callback function directly into X
    # Specify which dataset and which metric should be used for early stopping.
    early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
                                            metric_name='CustomErr',
-                                            data_name='Train')
+                                            data_name='Valid')

    booster = xgb.train(
        {'objective': 'binary:logistic',
--- a/doc/python/python_intro.rst
+++ b/doc/python/python_intro.rst
@@ -63,7 +63,7 @@ The input data is stored in a :py:class:`DMatrix <xgboost.DMatrix>` object. For

  .. code-block:: python

-    dtrain = xgb.DMatrix('train.svm.txt')
+    dtrain = xgb.DMatrix('train.svm.txt?format=libsvm')
    dtrain.save_binary('train.buffer')

 * Missing values can be replaced by a default value in the :py:class:`DMatrix <xgboost.DMatrix>` constructor:
@@ -86,7 +86,7 @@ to number of groups.

  .. code-block:: python

-    dtrain = xgb.DMatrix('train.svm.txt')
+    dtrain = xgb.DMatrix('train.svm.txt?format=libsvm')
    dtest = xgb.DMatrix('test.svm.buffer')

  The parser in XGBoost has limited functionality. When using Python interface, it's
@@ -176,7 +176,6 @@ Support Matrix
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
 | pyarrow.Table           | NPA       | NPA               | NPA       | NPA       | NPA                | NPA         |
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
 | _\_array\_\_            | NPA       | F                 | NPA       | NPA       | H                  |             |
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
 | Others                  | SciCSR    | F                 |           | F         | F                  |             |
@@ -240,7 +239,7 @@ A saved model can be loaded as follows:
 .. code-block:: python

  bst = xgb.Booster({'nthread': 4})  # init model
-  bst.load_model('model.bin')  # load data
+  bst.load_model('model.bin')  # load model data

 Methods including `update` and `boost` from `xgboost.Booster` are designed for
 internal usage only.  The wrapper function `xgboost.train` does some
--- a/doc/python/sklearn_estimator.rst
+++ b/doc/python/sklearn_estimator.rst
@@ -62,7 +62,7 @@ stack of trees:
 .. code-block:: python

    early_stop = xgb.callback.EarlyStopping(
-        rounds=2, metric_name='logloss', data_name='Validation_0', save_best=True
+        rounds=2, metric_name='logloss', data_name='validation_0', save_best=True
    )
    clf = xgb.XGBClassifier(tree_method="hist", callbacks=[early_stop])
    clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -7,7 +7,9 @@ sh
 matplotlib
 graphviz
 numpy
+scipy
 myst-parser
+ray[train]
 xgboost_ray
 sphinx-gallery
 pyspark
--- a/doc/tutorials/learning_to_rank.rst
+++ b/doc/tutorials/learning_to_rank.rst
@@ -48,11 +48,11 @@ Notice that the samples are sorted based on their query index in a non-decreasin
  import xgboost as xgb

  # Make a synthetic ranking dataset for demonstration
-  seed = 1994 
+  seed = 1994
  X, y = make_classification(random_state=seed)
  rng = np.random.default_rng(seed)
  n_query_groups = 3
-  qid = rng.integers(0, 3, size=X.shape[0])
+  qid = rng.integers(0, n_query_groups, size=X.shape[0])

  # Sort the inputs based on query index
  sorted_idx = np.argsort(qid)
@@ -65,14 +65,14 @@ The simplest way to train a ranking model is by using the scikit-learn estimator
 .. code-block:: python

  ranker = xgb.XGBRanker(tree_method="hist", lambdarank_num_pair_per_sample=8, objective="rank:ndcg", lambdarank_pair_method="topk")
-  ranker.fit(X, y, qid=qid)
+  ranker.fit(X, y, qid=qid[sorted_idx])

 Please note that, as of writing, there's no learning-to-rank interface in scikit-learn. As a result, the :py:class:`xgboost.XGBRanker` class does not fully conform the scikit-learn estimator guideline and can not be directly used with some of its utility functions. For instances, the ``auc_score`` and ``ndcg_score`` in scikit-learn don't consider query group information nor the pairwise loss. Most of the metrics are implemented as part of XGBoost, but to use scikit-learn utilities like :py:func:`sklearn.model_selection.cross_validation`, we need to make some adjustments in order to pass the ``qid`` as an additional parameter for :py:meth:`xgboost.XGBRanker.score`. Given a data frame ``X`` (either pandas or cuDF), add the column ``qid`` as follows:

 .. code-block:: python

  df = pd.DataFrame(X, columns=[str(i) for i in range(X.shape[1])])
-  df["qid"] = qid
+  df["qid"] = qid[sorted_idx]
  ranker.fit(df, y)  # No need to pass qid as a separate argument

  from sklearn.model_selection import StratifiedGroupKFold, cross_val_score
@@ -146,7 +146,8 @@ The consideration of effective pairs also applies to the choice of pair method (

 When using the mean strategy for generating pairs, where the target metric (like ``NDCG``) is computed over the whole query list, users can specify how many pairs should be generated per each document, by setting the ``lambdarank_num_pair_per_sample``. XGBoost will randomly sample ``lambdarank_num_pair_per_sample`` pairs for each element in the query group (:math:`|pairs| = |query| \times num\_pairsample`). Often, setting it to 1 can produce reasonable results. In cases where performance is inadequate due to insufficient number of effective pairs being generated, set ``lambdarank_num_pair_per_sample`` to a higher value. As more document pairs are generated, more effective pairs will be generated as well.

-On the other hand, if you are prioritizing the top :math:`k` documents, the ``lambdarank_num_pair_per_sample`` should be set slightly higher than :math:`k` (with a few more documents) to obtain a good training result.
+On the other hand, if you are prioritizing the top :math:`k` documents, the ``lambdarank_num_pair_per_sample`` should be set slightly higher than :math:`k` (with a few more documents) to obtain a good training result. Lastly, XGBoost employs additional regularization for learning to rank objectives, which can be disabled by setting the ``lambdarank_normalization`` to ``False``.
+

 **Summary** If you have large amount of training data:

--- a/doc/tutorials/spark_estimator.rst
+++ b/doc/tutorials/spark_estimator.rst
@@ -28,7 +28,7 @@ We can create a ``SparkXGBRegressor`` estimator like:
 .. code-block:: python

  from xgboost.spark import SparkXGBRegressor
-  spark_reg_estimator = SparkXGBRegressor(
+  xgb_regressor = SparkXGBRegressor(
    features_col="features",
    label_col="label",
    num_workers=2,
@@ -61,7 +61,7 @@ type or spark array type.

 .. code-block:: python

-  transformed_test_spark_dataframe = xgb_regressor.predict(test_spark_dataframe)
+  transformed_test_spark_dataframe = xgb_regressor_model.transform(test_spark_dataframe)


 The above snippet code returns a ``transformed_test_spark_dataframe`` that contains the input
--- a/doc/xgboost_doc.yml
+++ b/doc/xgboost_doc.yml
@@ -1,15 +1,23 @@
 name: xgboost_docs
 dependencies:
-  - python
+  - python=3.10
  - pip
  - pygraphviz
  - sphinx
+  - sphinx-gallery
  - recommonmark
  - mock
  - sh
  - matplotlib
+  - numpy
+  - scipy
+  - scikit-learn
+  - myst-parser
+  - pyspark
  - pip:
    - breathe
    - sphinx_rtd_theme
    - pydot-ng
    - graphviz
+    - ray[train]
+    - xgboost_ray
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -1,20 +1,18 @@
 /**
- * Copyright 2015-2023 by XGBoost Contributors
+ * Copyright 2015-2024, XGBoost Contributors
 * \file base.h
 * \brief Defines configuration macros and basic types for xgboost.
 */
 #ifndef XGBOOST_BASE_H_
 #define XGBOOST_BASE_H_

-#include <dmlc/base.h>
-#include <dmlc/omp.h>
+#include <dmlc/omp.h>  // for omp_uint, omp_ulong

-#include <cmath>
-#include <cstdint>
-#include <iostream>
-#include <string>
-#include <utility>
-#include <vector>
+#include <cstdint>  // for int32_t, uint64_t, int16_t
+#include <ostream>  // for ostream
+#include <string>   // for string
+#include <utility>  // for pair
+#include <vector>   // for vector

 /*!
 * \brief string flag for R library, to leave hooks when needed.
@@ -37,7 +35,7 @@
 * \brief Whether to customize global PRNG.
 */
 #ifndef XGBOOST_CUSTOMIZE_GLOBAL_PRNG
-#define XGBOOST_CUSTOMIZE_GLOBAL_PRNG XGBOOST_STRICT_R_MODE
+#define XGBOOST_CUSTOMIZE_GLOBAL_PRNG 0
 #endif  // XGBOOST_CUSTOMIZE_GLOBAL_PRNG

 /*!
@@ -86,34 +84,31 @@

 #endif  // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined()

-/*! \brief namespace of xgboost*/
 namespace xgboost {
-
 /*! \brief unsigned integer type used for feature index. */
-using bst_uint = uint32_t;  // NOLINT
+using bst_uint = std::uint32_t;  // NOLINT
 /*! \brief unsigned long integers */
-using bst_ulong = uint64_t;  // NOLINT
+using bst_ulong = std::uint64_t;  // NOLINT
 /*! \brief float type, used for storing statistics */
 using bst_float = float;  // NOLINT
 /*! \brief Categorical value type. */
-using bst_cat_t = int32_t;  // NOLINT
+using bst_cat_t = std::int32_t;  // NOLINT
 /*! \brief Type for data column (feature) index. */
-using bst_feature_t = uint32_t;  // NOLINT
-/*! \brief Type for histogram bin index. */
-using bst_bin_t = int32_t;  // NOLINT
-/*! \brief Type for data row index.
- *
- * Be careful `std::size_t' is implementation-defined.  Meaning that the binary
- * representation of DMatrix might not be portable across platform.  Booster model should
- * be portable as parameters are floating points.
+using bst_feature_t = std::uint32_t;  // NOLINT
+/**
+ * @brief Type for histogram bin index.  We sometimes use -1 to indicate invalid bin.
 */
-using bst_row_t = std::size_t;   // NOLINT
+using bst_bin_t = std::int32_t;  // NOLINT
+/**
+ * @brief Type for data row index (sample).
+ */
+using bst_idx_t = std::uint64_t;  // NOLINT
 /*! \brief Type for tree node index. */
 using bst_node_t = std::int32_t;      // NOLINT
 /*! \brief Type for ranking group index. */
 using bst_group_t = std::uint32_t;  // NOLINT
 /**
- * \brief Type for indexing into output targets.
+ * @brief Type for indexing into output targets.
 */
 using bst_target_t = std::uint32_t;  // NOLINT
 /**
@@ -306,8 +301,7 @@ class GradientPairInt64 {
  XGBOOST_DEVICE bool operator==(const GradientPairInt64 &rhs) const {
    return grad_ == rhs.grad_ && hess_ == rhs.hess_;
  }
-  friend std::ostream &operator<<(std::ostream &os,
-                                  const GradientPairInt64 &g) {
+  friend std::ostream &operator<<(std::ostream &os, const GradientPairInt64 &g) {
    os << g.GetQuantisedGrad() << "/" << g.GetQuantisedHess();
    return os;
  }
@@ -323,7 +317,7 @@ using omp_ulong = dmlc::omp_ulong;  // NOLINT
 /*! \brief define unsigned int for openmp loop */
 using bst_omp_uint = dmlc::omp_uint;  // NOLINT
 /*! \brief Type used for representing version number in binary form.*/
-using XGBoostVersionT = int32_t;
+using XGBoostVersionT = std::int32_t;
 }  // namespace xgboost

 #endif  // XGBOOST_BASE_H_
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2015~2023 by XGBoost Contributors
+ * Copyright 2015-2024, XGBoost Contributors
 * \file c_api.h
 * \author Tianqi Chen
 * \brief C API of XGBoost, used for interfacing to other languages.
@@ -639,21 +639,14 @@ XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle,
 * \param len length of array
 * \return 0 when success, -1 when failure happens
 */
-XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
-                                  const char *field,
-                                  const float *array,
+XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array,
                                  bst_ulong len);
-/*!
- * \brief set uint32 vector to a content in info
- * \param handle a instance of data matrix
- * \param field field name
- * \param array pointer to unsigned int vector
- * \param len length of array
- * \return 0 when success, -1 when failure happens
+/**
+ * @deprecated since 2.1.0
+ *
+ * Use @ref XGDMatrixSetInfoFromInterface instead.
 */
-XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
-                                 const char *field,
-                                 const unsigned *array,
+XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array,
                                 bst_ulong len);

 /*!
@@ -725,42 +718,13 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
                                       bst_ulong *size,
                                       const char ***out_features);

-/*!
- * \brief Set meta info from dense matrix.  Valid field names are:
+/**
+ * @deprecated since 2.1.0
 *
- *  - label
- *  - weight
- *  - base_margin
- *  - group
- *  - label_lower_bound
- *  - label_upper_bound
- *  - feature_weights
- *
- * \param handle An instance of data matrix
- * \param field  Field name
- * \param data   Pointer to consecutive memory storing data.
- * \param size   Size of the data, this is relative to size of type.  (Meaning NOT number
- *               of bytes.)
- * \param type   Indicator of data type.  This is defined in xgboost::DataType enum class.
- *    - float    = 1
- *    - double   = 2
- *    - uint32_t = 3
- *    - uint64_t = 4
- * \return 0 when success, -1 when failure happens
+ * Use @ref XGDMatrixSetInfoFromInterface instead.
 */
-XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field,
-                                  void const *data, bst_ulong size, int type);
-
-/*!
- * \brief (deprecated) Use XGDMatrixSetUIntInfo instead. Set group of the training matrix
- * \param handle a instance of data matrix
- * \param group pointer to group size
- * \param len length of array
- * \return 0 when success, -1 when failure happens
- */
-XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
-                              const unsigned *group,
-                              bst_ulong len);
+XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,
+                                  bst_ulong size, int type);

 /*!
 * \brief get float info vector from matrix.
@@ -1591,7 +1555,7 @@ XGB_DLL int XGTrackerCreate(char const *config, TrackerHandle *handle);

 /**
 * @brief Get the arguments needed for running workers. This should be called after
- *        XGTrackerRun() and XGTrackerWait()
+ *        XGTrackerRun().
 *
 * @param handle The handle to the tracker.
 * @param args The arguments returned as a JSON document.
@@ -1601,16 +1565,19 @@ XGB_DLL int XGTrackerCreate(char const *config, TrackerHandle *handle);
 XGB_DLL int XGTrackerWorkerArgs(TrackerHandle handle, char const **args);

 /**
- * @brief Run the tracker.
+ * @brief Start the tracker. The tracker runs in the background and this function returns
+ *        once the tracker is started.
 *
 * @param handle The handle to the tracker.
+ * @param config Unused at the moment, preserved for the future.
 *
 * @return 0 for success, -1 for failure.
 */
-XGB_DLL int XGTrackerRun(TrackerHandle handle);
+XGB_DLL int XGTrackerRun(TrackerHandle handle, char const *config);

 /**
- * @brief Wait for the tracker to finish, should be called after XGTrackerRun().
+ * @brief Wait for the tracker to finish, should be called after XGTrackerRun(). This
+ *        function will block until the tracker task is finished or timeout is reached.
 *
 * @param handle The handle to the tracker.
 * @param config JSON encoded configuration. No argument is required yet, preserved for
@@ -1618,11 +1585,12 @@ XGB_DLL int XGTrackerRun(TrackerHandle handle);
 *
 * @return 0 for success, -1 for failure.
 */
-XGB_DLL int XGTrackerWait(TrackerHandle handle, char const *config);
+XGB_DLL int XGTrackerWaitFor(TrackerHandle handle, char const *config);

 /**
- * @brief Free a tracker instance. XGTrackerWait() is called internally. If the tracker
- *        cannot close properly, manual interruption is required.
+ * @brief Free a tracker instance. This should be called after XGTrackerWaitFor(). If the
+ *        tracker is not properly waited, this function will shutdown all connections with
+ *        the tracker, potentially leading to undefined behavior.
 *
 * @param handle The handle to the tracker.
 *
--- a/include/xgboost/collective/result.h
+++ b/include/xgboost/collective/result.h
@@ -1,13 +1,13 @@
 /**
- *  Copyright 2023, XGBoost Contributors
+ *  Copyright 2023-2024, XGBoost Contributors
 */
 #pragma once

-#include <memory>   // for unique_ptr
-#include <sstream>  // for stringstream
-#include <stack>    // for stack
-#include <string>   // for string
-#include <utility>  // for move
+#include <cstdint>       // for int32_t
+#include <memory>        // for unique_ptr
+#include <string>        // for string
+#include <system_error>  // for error_code
+#include <utility>       // for move

 namespace xgboost::collective {
 namespace detail {
@@ -46,48 +46,19 @@ struct ResultImpl {
    return cur_eq;
  }

-  [[nodiscard]] std::string Report() {
-    std::stringstream ss;
-    ss << "\n- " << this->message;
-    if (this->errc != std::error_code{}) {
-      ss << " system error:" << this->errc.message();
-    }
+  [[nodiscard]] std::string Report() const;
+  [[nodiscard]] std::error_code Code() const;

-    auto ptr = prev.get();
-    while (ptr) {
-      ss << "\n- ";
-      ss << ptr->message;
-
-      if (ptr->errc != std::error_code{}) {
-        ss << " " << ptr->errc.message();
-      }
-      ptr = ptr->prev.get();
-    }
-
-    return ss.str();
-  }
-  [[nodiscard]] auto Code() const {
-    // Find the root error.
-    std::stack<ResultImpl const*> stack;
-    auto ptr = this;
-    while (ptr) {
-      stack.push(ptr);
-      if (ptr->prev) {
-        ptr = ptr->prev.get();
-      } else {
-        break;
-      }
-    }
-    while (!stack.empty()) {
-      auto frame = stack.top();
-      stack.pop();
-      if (frame->errc != std::error_code{}) {
-        return frame->errc;
-      }
-    }
-    return std::error_code{};
-  }
+  void Concat(std::unique_ptr<ResultImpl> rhs);
 };
+
+#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__MINGW32__)
+#define __builtin_FILE() nullptr
+#define __builtin_LINE() (-1)
+std::string MakeMsg(std::string&& msg, char const*, std::int32_t);
+#else
+std::string MakeMsg(std::string&& msg, char const* file, std::int32_t line);
+#endif
 }  // namespace detail

 /**
@@ -129,8 +100,21 @@ struct Result {
    }
    return *impl_ == *that.impl_;
  }
+
+  friend Result operator+(Result&& lhs, Result&& rhs);
 };

+[[nodiscard]] inline Result operator+(Result&& lhs, Result&& rhs) {
+  if (lhs.OK()) {
+    return std::forward<Result>(rhs);
+  }
+  if (rhs.OK()) {
+    return std::forward<Result>(lhs);
+  }
+  lhs.impl_->Concat(std::move(rhs.impl_));
+  return std::forward<Result>(lhs);
+}
+
 /**
 * @brief Return success.
 */
@@ -138,32 +122,43 @@ struct Result {
 /**
 * @brief Return failure.
 */
-[[nodiscard]] inline auto Fail(std::string msg) { return Result{std::move(msg)}; }
+[[nodiscard]] inline auto Fail(std::string msg, char const* file = __builtin_FILE(),
+                               std::int32_t line = __builtin_LINE()) {
+  return Result{detail::MakeMsg(std::move(msg), file, line)};
+}
 /**
 * @brief Return failure with `errno`.
 */
-[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc) {
-  return Result{std::move(msg), std::move(errc)};
+[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc,
+                               char const* file = __builtin_FILE(),
+                               std::int32_t line = __builtin_LINE()) {
+  return Result{detail::MakeMsg(std::move(msg), file, line), std::move(errc)};
 }
 /**
 * @brief Return failure with a previous error.
 */
-[[nodiscard]] inline auto Fail(std::string msg, Result&& prev) {
-  return Result{std::move(msg), std::forward<Result>(prev)};
+[[nodiscard]] inline auto Fail(std::string msg, Result&& prev, char const* file = __builtin_FILE(),
+                               std::int32_t line = __builtin_LINE()) {
+  return Result{detail::MakeMsg(std::move(msg), file, line), std::forward<Result>(prev)};
 }
 /**
 * @brief Return failure with a previous error and a new `errno`.
 */
-[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc, Result&& prev) {
-  return Result{std::move(msg), std::move(errc), std::forward<Result>(prev)};
+[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc, Result&& prev,
+                               char const* file = __builtin_FILE(),
+                               std::int32_t line = __builtin_LINE()) {
+  return Result{detail::MakeMsg(std::move(msg), file, line), std::move(errc),
+                std::forward<Result>(prev)};
 }

 // We don't have monad, a simple helper would do.
 template <typename Fn>
-Result operator<<(Result&& r, Fn&& fn) {
+[[nodiscard]] std::enable_if_t<std::is_invocable_v<Fn>, Result> operator<<(Result&& r, Fn&& fn) {
  if (!r.OK()) {
    return std::forward<Result>(r);
  }
  return fn();
 }
+
+void SafeColl(Result const& rc);
 }  // namespace xgboost::collective
--- a/include/xgboost/collective/socket.h
+++ b/include/xgboost/collective/socket.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2022-2023, XGBoost Contributors
+ * Copyright (c) 2022-2024, XGBoost Contributors
 */
 #pragma once

@@ -12,7 +12,6 @@
 #include <cstddef>       // std::size_t
 #include <cstdint>       // std::int32_t, std::uint16_t
 #include <cstring>       // memset
-#include <limits>        // std::numeric_limits
 #include <string>        // std::string
 #include <system_error>  // std::error_code, std::system_category
 #include <utility>       // std::swap
@@ -125,6 +124,21 @@ inline std::int32_t CloseSocket(SocketT fd) {
 #endif
 }

+inline std::int32_t ShutdownSocket(SocketT fd) {
+#if defined(_WIN32)
+  auto rc = shutdown(fd, SD_BOTH);
+  if (rc != 0 && LastError() == WSANOTINITIALISED) {
+    return 0;
+  }
+#else
+  auto rc = shutdown(fd, SHUT_RDWR);
+  if (rc != 0 && LastError() == ENOTCONN) {
+    return 0;
+  }
+#endif
+  return rc;
+}
+
 inline bool ErrorWouldBlock(std::int32_t errsv) noexcept(true) {
 #ifdef _WIN32
  return errsv == WSAEWOULDBLOCK;
@@ -436,41 +450,62 @@ class TCPSocket {
   * \brief Accept new connection, returns a new TCP socket for the new connection.
   */
  TCPSocket Accept() {
-    HandleT newfd = accept(Handle(), nullptr, nullptr);
+    SockAddress addr;
+    TCPSocket newsock;
+    auto rc = this->Accept(&newsock, &addr);
+    SafeColl(rc);
+    return newsock;
+  }
+
+  [[nodiscard]] Result Accept(TCPSocket *out, SockAddress *addr) {
 #if defined(_WIN32)
    auto interrupt = WSAEINTR;
 #else
    auto interrupt = EINTR;
 #endif
-    if (newfd == InvalidSocket() && system::LastError() != interrupt) {
-      system::ThrowAtError("accept");
+    if (this->Domain() == SockDomain::kV4) {
+      struct sockaddr_in caddr;
+      socklen_t caddr_len = sizeof(caddr);
+      HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);
+      if (newfd == InvalidSocket() && system::LastError() != interrupt) {
+        return system::FailWithCode("Failed to accept.");
+      }
+      *addr = SockAddress{SockAddrV4{caddr}};
+      *out = TCPSocket{newfd};
+    } else {
+      struct sockaddr_in6 caddr;
+      socklen_t caddr_len = sizeof(caddr);
+      HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);
+      if (newfd == InvalidSocket() && system::LastError() != interrupt) {
+        return system::FailWithCode("Failed to accept.");
+      }
+      *addr = SockAddress{SockAddrV6{caddr}};
+      *out = TCPSocket{newfd};
    }
-    TCPSocket newsock{newfd};
-    return newsock;
-  }
-
-  [[nodiscard]] Result Accept(TCPSocket *out, SockAddrV4 *addr) {
-    struct sockaddr_in caddr;
-    socklen_t caddr_len = sizeof(caddr);
-    HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);
-    if (newfd == InvalidSocket()) {
-      return system::FailWithCode("Failed to accept.");
+    // On MacOS, this is automatically set to async socket if the parent socket is async
+    // We make sure all socket are blocking by default.
+    //
+    // On Windows, a closed socket is returned during shutdown. We guard against it when
+    // setting non-blocking.
+    if (!out->IsClosed()) {
+      return out->NonBlocking(false);
    }
-    *addr = SockAddrV4{caddr};
-    *out = TCPSocket{newfd};
    return Success();
  }

  ~TCPSocket() {
    if (!IsClosed()) {
-      Close();
+      auto rc = this->Close();
+      if (!rc.OK()) {
+        LOG(WARNING) << rc.Report();
+      }
    }
  }

  TCPSocket(TCPSocket const &that) = delete;
  TCPSocket(TCPSocket &&that) noexcept(true) { std::swap(this->handle_, that.handle_); }
  TCPSocket &operator=(TCPSocket const &that) = delete;
-  TCPSocket &operator=(TCPSocket &&that) {
+  TCPSocket &operator=(TCPSocket &&that) noexcept(true) {
    std::swap(this->handle_, that.handle_);
    return *this;
  }
@@ -479,36 +514,49 @@ class TCPSocket {
   */
  [[nodiscard]] HandleT const &Handle() const { return handle_; }
  /**
-   * \brief Listen to incoming requests. Should be called after bind.
+   * @brief Listen to incoming requests. Should be called after bind.
   */
-  void Listen(std::int32_t backlog = 16) { xgboost_CHECK_SYS_CALL(listen(handle_, backlog), 0); }
+  [[nodiscard]] Result Listen(std::int32_t backlog = 16) {
+    if (listen(handle_, backlog) != 0) {
+      return system::FailWithCode("Failed to listen.");
+    }
+    return Success();
+  }
  /**
-   * \brief Bind socket to INADDR_ANY, return the port selected by the OS.
+   * @brief Bind socket to INADDR_ANY, return the port selected by the OS.
   */
-  [[nodiscard]] in_port_t BindHost() {
+  [[nodiscard]] Result BindHost(std::int32_t* p_out) {
+    // Use int32 instead of in_port_t for consistency. We take port as parameter from
+    // users using other languages, the port is usually stored and passed around as int.
    if (Domain() == SockDomain::kV6) {
      auto addr = SockAddrV6::InaddrAny();
      auto handle = reinterpret_cast<sockaddr const *>(&addr.Handle());
-      xgboost_CHECK_SYS_CALL(
-          bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.Handle())>)), 0);
+      if (bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.Handle())>)) != 0) {
+        return system::FailWithCode("bind failed.");
+      }

      sockaddr_in6 res_addr;
      socklen_t addrlen = sizeof(res_addr);
-      xgboost_CHECK_SYS_CALL(
-          getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen), 0);
-      return ntohs(res_addr.sin6_port);
+      if (getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen) != 0) {
+        return system::FailWithCode("getsockname failed.");
+      }
+      *p_out = ntohs(res_addr.sin6_port);
    } else {
      auto addr = SockAddrV4::InaddrAny();
      auto handle = reinterpret_cast<sockaddr const *>(&addr.Handle());
-      xgboost_CHECK_SYS_CALL(
-          bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.Handle())>)), 0);
+      if (bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.Handle())>)) != 0) {
+        return system::FailWithCode("bind failed.");
+      }

      sockaddr_in res_addr;
      socklen_t addrlen = sizeof(res_addr);
-      xgboost_CHECK_SYS_CALL(
-          getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen), 0);
-      return ntohs(res_addr.sin_port);
+      if (getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen) != 0) {
+        return system::FailWithCode("getsockname failed.");
+      }
+      *p_out = ntohs(res_addr.sin_port);
    }
+
+    return Success();
  }

  [[nodiscard]] auto Port() const {
@@ -621,26 +669,49 @@ class TCPSocket {
   */
  std::size_t Send(StringView str);
  /**
-   * \brief Receive string, format is matched with the Python socket wrapper in RABIT.
+   * @brief Receive string, format is matched with the Python socket wrapper in RABIT.
   */
-  std::size_t Recv(std::string *p_str);
+  [[nodiscard]] Result Recv(std::string *p_str);
  /**
-   * \brief Close the socket, called automatically in destructor if the socket is not closed.
+   * @brief Close the socket, called automatically in destructor if the socket is not closed.
   */
-  void Close() {
+  [[nodiscard]] Result Close() {
    if (InvalidSocket() != handle_) {
-#if defined(_WIN32)
      auto rc = system::CloseSocket(handle_);
+#if defined(_WIN32)
      // it's possible that we close TCP sockets after finalizing WSA due to detached thread.
      if (rc != 0 && system::LastError() != WSANOTINITIALISED) {
-        system::ThrowAtError("close", rc);
+        return system::FailWithCode("Failed to close the socket.");
      }
 #else
-      xgboost_CHECK_SYS_CALL(system::CloseSocket(handle_), 0);
+      if (rc != 0) {
+        return system::FailWithCode("Failed to close the socket.");
+      }
 #endif
      handle_ = InvalidSocket();
    }
+    return Success();
  }
+  /**
+   * @brief Call shutdown on the socket.
+   */
+  [[nodiscard]] Result Shutdown() {
+    if (this->IsClosed()) {
+      return Success();
+    }
+    auto rc = system::ShutdownSocket(this->Handle());
+#if defined(_WIN32)
+    // Windows cannot shutdown a socket if it's not connected.
+    if (rc == -1 && system::LastError() == WSAENOTCONN) {
+      return Success();
+    }
+#endif
+    if (rc != 0) {
+      return system::FailWithCode("Failed to shutdown socket.");
+    }
+    return Success();
+  }
+
  /**
   * \brief Create a TCP socket on specified domain.
   */
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -19,7 +19,6 @@
 #include <algorithm>
 #include <limits>
 #include <memory>
-#include <numeric>
 #include <string>
 #include <utility>
 #include <vector>
@@ -137,14 +136,6 @@ class MetaInfo {
   * \param fo The output stream.
   */
  void SaveBinary(dmlc::Stream* fo) const;
-  /*!
-   * \brief Set information in the meta info.
-   * \param key The key of the information.
-   * \param dptr The data pointer of the source array.
-   * \param dtype The type of the source data.
-   * \param num Number of elements in the source array.
-   */
-  void SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype, size_t num);
  /*!
   * \brief Set information in the meta info with array interface.
   * \param key The key of the information.
@@ -315,7 +306,7 @@ struct BatchParam {
 struct HostSparsePageView {
  using Inst = common::Span<Entry const>;

-  common::Span<bst_row_t const> offset;
+  common::Span<bst_idx_t const> offset;
  common::Span<Entry const> data;

  Inst operator[](size_t i) const {
@@ -333,7 +324,7 @@ struct HostSparsePageView {
 class SparsePage {
 public:
  // Offset for each row.
-  HostDeviceVector<bst_row_t> offset;
+  HostDeviceVector<bst_idx_t> offset;
  /*! \brief the data of the segments */
  HostDeviceVector<Entry> data;

@@ -517,10 +508,6 @@ class DMatrix {
  DMatrix()  = default;
  /*! \brief meta information of the dataset */
  virtual MetaInfo& Info() = 0;
-  virtual void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
-    auto const& ctx = *this->Ctx();
-    this->Info().SetInfo(ctx, key, dptr, dtype, num);
-  }
  virtual void SetInfo(const char* key, std::string const& interface_str) {
    auto const& ctx = *this->Ctx();
    this->Info().SetInfo(ctx, key, StringView{interface_str});
--- a/Show More
+++ b/Show More