enable ROCm on latest XGBoost

2023-10-23 11:07:08 -07:00
parent fb19e15ce3 3b86260b50
commit 15421e40d9
328 changed files with 8028 additions and 3642 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -141,18 +141,14 @@ jobs:
        architecture: 'x64'
    - name: Install Python packages
      run: |
-        python -m pip install wheel setuptools cpplint pylint
+        python -m pip install wheel setuptools cmakelint cpplint pylint
    - name: Run lint
      run: |
-        python3 dmlc-core/scripts/lint.py xgboost cpp R-package/src
+        python3 tests/ci_build/lint_cpp.py xgboost cpp R-package/src

-        python3 dmlc-core/scripts/lint.py --exclude_path \
-            python-package/xgboost/dmlc-core \
-            python-package/xgboost/include \
-            python-package/xgboost/lib \
-            python-package/xgboost/rabit \
-            python-package/xgboost/src \
-            --pylint-rc python-package/.pylintrc \
-            xgboost \
-            cpp \
-            include src python-package
+        python3 tests/ci_build/lint_cpp.py xgboost cpp include src python-package \
+            --exclude_path python-package/xgboost/dmlc-core python-package/xgboost/include \
+                           python-package/xgboost/lib python-package/xgboost/rabit \
+                           python-package/xgboost/src
+
+        sh ./tests/ci_build/lint_cmake.sh || true
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -190,7 +190,7 @@ jobs:
      run: |
        mkdir build_msvc
        cd build_msvc
-        cmake .. -G"Visual Studio 17 2022" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON  -DUSE_DMLC_GTEST=ON -DBUILD_DEPRECATED_CLI=ON
+        cmake .. -G"Visual Studio 17 2022" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DBUILD_DEPRECATED_CLI=ON
        cmake --build . --config Release --parallel $(nproc)

    - name: Install Python package
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,9 +8,9 @@ cmake_policy(SET CMP0076 NEW)
 set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
 cmake_policy(SET CMP0063 NEW)

-if ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
+if((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
  cmake_policy(SET CMP0077 NEW)
-endif ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
+endif()

 message(STATUS "CMake version ${CMAKE_VERSION}")

@@ -90,108 +90,99 @@ option(PLUGIN_UPDATER_ONEAPI "DPC++ updater" OFF)
 option(ADD_PKGCONFIG "Add xgboost.pc into system." ON)

 #-- Checks for building XGBoost
-if (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
+if(USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
  message(SEND_ERROR "Do not enable `USE_DEBUG_OUTPUT' with release build.")
-endif (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
-
-if (USE_NCCL AND NOT (USE_CUDA))
+endif()
+if(USE_NCCL AND NOT (USE_CUDA))
  message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
-endif (USE_NCCL AND NOT (USE_CUDA))
-if (USE_DEVICE_DEBUG AND NOT (USE_CUDA))
+endif()
+if(USE_DEVICE_DEBUG AND NOT (USE_CUDA))
  message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_CUDA` flag.")
-endif (USE_DEVICE_DEBUG AND NOT (USE_CUDA))
-if (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
+endif()
+if(BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
  message(SEND_ERROR "Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.")
-endif (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
-
-if (USE_RCCL AND NOT (USE_HIP))
-  message(SEND_ERROR "`USE_RCCL` must be enabled with `USE_HIP` flag.")
-endif (USE_RCCL AND NOT (USE_HIP))
-if (USE_DEVICE_DEBUG AND NOT (USE_HIP))
-  message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_HIP` flag.")
-endif (USE_DEVICE_DEBUG AND NOT (USE_HIP))
-if (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
+endif()
+if(USE_RCCL AND NOT (USE_HIP))
+    message(SEND_ERROR "`USE_RCCL` must be enabled with `USE_HIP` flag.")
+endif()
+if(BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
  message(SEND_ERROR "Build XGBoost with -DUSE_RCCL=ON to enable BUILD_WITH_SHARED_RCCL.")
-endif (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
-
-if (JVM_BINDINGS AND R_LIB)
+endif()
+if(JVM_BINDINGS AND R_LIB)
  message(SEND_ERROR "`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.")
-endif (JVM_BINDINGS AND R_LIB)
-if (R_LIB AND GOOGLE_TEST)
-  message(WARNING "Some C++ unittests will fail with `R_LIB` enabled,
- as R package redirects some functions to R runtime implementation.")
-endif (R_LIB AND GOOGLE_TEST)
-if (PLUGIN_RMM AND NOT (USE_CUDA))
+endif()
+if(R_LIB AND GOOGLE_TEST)
+  message(
+    WARNING
+    "Some C++ tests will fail with `R_LIB` enabled, as R package redirects some functions to R runtime implementation."
+  )
+endif()
+if(PLUGIN_RMM AND NOT (USE_CUDA))
  message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
-endif (PLUGIN_RMM AND NOT (USE_CUDA))
-
-if (PLUGIN_RMM AND NOT (USE_HIP))
-  message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_HIP` flag.")
-endif (PLUGIN_RMM AND NOT (USE_HIP))
-
-if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
+endif()
+if(PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
  message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
-endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
-if (PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
+endif()
+if(PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
  message(SEND_ERROR "`PLUGIN_RMM` must be used with Linux.")
-endif (PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
-if (ENABLE_ALL_WARNINGS)
-  if ((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
+endif()
+if(ENABLE_ALL_WARNINGS)
+  if((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
    message(SEND_ERROR "ENABLE_ALL_WARNINGS is only available for Clang and GCC.")
-  endif ((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
-endif (ENABLE_ALL_WARNINGS)
-if (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
+  endif()
+endif()
+if(BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
  message(SEND_ERROR "Cannot build a static library libxgboost.a when R or JVM packages are enabled.")
-endif (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
-if (PLUGIN_FEDERATED)
-  if (CMAKE_CROSSCOMPILING)
+endif()
+if(PLUGIN_FEDERATED)
+  if(CMAKE_CROSSCOMPILING)
    message(SEND_ERROR "Cannot cross compile with federated learning support")
-  endif ()
-  if (BUILD_STATIC_LIB)
+  endif()
+  if(BUILD_STATIC_LIB)
    message(SEND_ERROR "Cannot build static lib with federated learning support")
-  endif ()
-  if (R_LIB OR JVM_BINDINGS)
+  endif()
+  if(R_LIB OR JVM_BINDINGS)
    message(SEND_ERROR "Cannot enable federated learning support when R or JVM packages are enabled.")
-  endif ()
-  if (WIN32)
+  endif()
+  if(WIN32)
    message(SEND_ERROR "Federated learning not supported for Windows platform")
-  endif ()
-endif ()
+  endif()
+endif()

 #-- Removed options
-if (USE_AVX)
+if(USE_AVX)
  message(SEND_ERROR  "The option `USE_AVX` is deprecated as experimental AVX features have been removed from XGBoost.")
-endif (USE_AVX)
-if (PLUGIN_LZ4)
+endif()
+if(PLUGIN_LZ4)
  message(SEND_ERROR  "The option `PLUGIN_LZ4` is removed from XGBoost.")
-endif (PLUGIN_LZ4)
-if (RABIT_BUILD_MPI)
+endif()
+if(RABIT_BUILD_MPI)
  message(SEND_ERROR "The option `RABIT_BUILD_MPI` has been removed from XGBoost.")
-endif (RABIT_BUILD_MPI)
-if (USE_S3)
+endif()
+if(USE_S3)
  message(SEND_ERROR "The option `USE_S3` has been removed from XGBoost")
-endif (USE_S3)
-if (USE_AZURE)
+endif()
+if(USE_AZURE)
  message(SEND_ERROR "The option `USE_AZURE` has been removed from XGBoost")
-endif (USE_AZURE)
-if (USE_HDFS)
+endif()
+if(USE_HDFS)
  message(SEND_ERROR "The option `USE_HDFS` has been removed from XGBoost")
-endif (USE_HDFS)
+endif()

 #-- Sanitizer
-if (USE_SANITIZER)
+if(USE_SANITIZER)
  include(cmake/Sanitizer.cmake)
  enable_sanitizers("${ENABLED_SANITIZERS}")
-endif (USE_SANITIZER)
+endif()

-if (USE_CUDA)
+if(USE_CUDA)
  set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
  # `export CXX=' is ignored by CMake CUDA.
  set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
  message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")

  enable_language(CUDA)
-  if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
    message(FATAL_ERROR "CUDA version must be at least 11.0!")
  endif()
  set(GEN_CODE "")
@@ -199,7 +190,7 @@ if (USE_CUDA)
  add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)

  find_package(CUDAToolkit REQUIRED)
-endif (USE_CUDA)
+endif()

 if (USE_HIP)
  set(USE_OPENMP ON CACHE BOOL "HIP requires OpenMP" FORCE)
@@ -218,7 +209,7 @@ if (USE_HIP)
  add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
 endif (USE_HIP)

-if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
+if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
    ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
      (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
@@ -226,10 +217,10 @@ endif()

 find_package(Threads REQUIRED)

-if (USE_OPENMP)
-  if (APPLE)
+if(USE_OPENMP)
+  if(APPLE)
    find_package(OpenMP)
-    if (NOT OpenMP_FOUND)
+    if(NOT OpenMP_FOUND)
      # Try again with extra path info; required for libomp 15+ from Homebrew
      execute_process(COMMAND brew --prefix libomp
                      OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
@@ -242,20 +233,20 @@ if (USE_OPENMP)
      set(OpenMP_CXX_LIB_NAMES omp)
      set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
      find_package(OpenMP REQUIRED)
-    endif ()
-  else ()
+    endif()
+  else()
    find_package(OpenMP REQUIRED)
-  endif ()
-endif (USE_OPENMP)
+  endif()
+endif()
 #Add for IBM i
-if (${CMAKE_SYSTEM_NAME} MATCHES "OS400")
+if(${CMAKE_SYSTEM_NAME} MATCHES "OS400")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
  set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> -X64 qc <TARGET> <OBJECTS>")
 endif()

-if (USE_NCCL)
+if(USE_NCCL)
  find_package(Nccl REQUIRED)
-endif (USE_NCCL)
+endif()

 if (USE_RCCL)
  find_package(rccl REQUIRED)
@@ -263,17 +254,19 @@ endif (USE_RCCL)

 # dmlc-core
 msvc_use_static_runtime()
-if (FORCE_SHARED_CRT)
+if(FORCE_SHARED_CRT)
  set(DMLC_FORCE_SHARED_CRT ON)
-endif ()
+endif()
 add_subdirectory(${xgboost_SOURCE_DIR}/dmlc-core)

-if (MSVC)
-  if (TARGET dmlc_unit_tests)
-    target_compile_options(dmlc_unit_tests PRIVATE
-                           -D_CRT_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_DEPRECATE)
-  endif (TARGET dmlc_unit_tests)
-endif (MSVC)
+if(MSVC)
+  if(TARGET dmlc_unit_tests)
+    target_compile_options(
+        dmlc_unit_tests PRIVATE
+        -D_CRT_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_DEPRECATE
+    )
+  endif()
+endif()

 # rabit
 add_subdirectory(rabit)
@@ -282,20 +275,25 @@ add_subdirectory(rabit)
 add_subdirectory(${xgboost_SOURCE_DIR}/src)
 target_link_libraries(objxgboost PUBLIC dmlc)

+# Link -lstdc++fs for GCC 8.x
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0")
+  target_link_libraries(objxgboost PUBLIC stdc++fs)
+endif()
+
 # Exports some R specific definitions and objects
-if (R_LIB)
+if(R_LIB)
  add_subdirectory(${xgboost_SOURCE_DIR}/R-package)
-endif (R_LIB)
+endif()

 # This creates its own shared library `xgboost4j'.
-if (JVM_BINDINGS)
+if(JVM_BINDINGS)
  add_subdirectory(${xgboost_SOURCE_DIR}/jvm-packages)
-endif (JVM_BINDINGS)
+endif()

 # Plugin
 add_subdirectory(${xgboost_SOURCE_DIR}/plugin)

-if (PLUGIN_RMM)
+if(PLUGIN_RMM)
  find_package(rmm REQUIRED)

  # Patch the rmm targets so they reference the static cudart
@@ -306,14 +304,14 @@ if (PLUGIN_RMM)
  list(APPEND rmm_link_libs CUDA::cudart_static)
  set_target_properties(rmm::rmm PROPERTIES INTERFACE_LINK_LIBRARIES "${rmm_link_libs}")
  get_target_property(rmm_link_libs rmm::rmm INTERFACE_LINK_LIBRARIES)
-endif (PLUGIN_RMM)
+endif()

 #-- library
-if (BUILD_STATIC_LIB)
+if(BUILD_STATIC_LIB)
  add_library(xgboost STATIC)
-else (BUILD_STATIC_LIB)
+else()
  add_library(xgboost SHARED)
-endif (BUILD_STATIC_LIB)
+endif()
 target_link_libraries(xgboost PRIVATE objxgboost)
 target_include_directories(xgboost
  INTERFACE
@@ -322,7 +320,7 @@ target_include_directories(xgboost
 #-- End shared library

 #-- CLI for xgboost
-if (BUILD_DEPRECATED_CLI)
+if(BUILD_DEPRECATED_CLI)
  add_executable(runxgboost ${xgboost_SOURCE_DIR}/src/cli_main.cc)
  target_link_libraries(runxgboost PRIVATE objxgboost)
  target_include_directories(runxgboost
@@ -336,12 +334,12 @@ if (BUILD_DEPRECATED_CLI)
  xgboost_target_link_libraries(runxgboost)
  xgboost_target_defs(runxgboost)

-  if (KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
+  if(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
    set_output_directory(runxgboost ${xgboost_BINARY_DIR})
-  else ()
+  else()
    set_output_directory(runxgboost ${xgboost_SOURCE_DIR})
-  endif (KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
-endif (BUILD_DEPRECATED_CLI)
+  endif()
+endif()
 #-- End CLI for xgboost

 # Common setup for all targets
@@ -351,41 +349,41 @@ foreach(target xgboost objxgboost dmlc)
  xgboost_target_defs(${target})
 endforeach()

-if (JVM_BINDINGS)
+if(JVM_BINDINGS)
  xgboost_target_properties(xgboost4j)
  xgboost_target_link_libraries(xgboost4j)
  xgboost_target_defs(xgboost4j)
-endif (JVM_BINDINGS)
+endif()

-if (KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
+if(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
  set_output_directory(xgboost ${xgboost_BINARY_DIR}/lib)
-else ()
+else()
  set_output_directory(xgboost ${xgboost_SOURCE_DIR}/lib)
-endif ()
+endif()

 # Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names
-if (BUILD_DEPRECATED_CLI)
+if(BUILD_DEPRECATED_CLI)
  add_dependencies(xgboost runxgboost)
-endif (BUILD_DEPRECATED_CLI)
+endif()

 #-- Installing XGBoost
-if (R_LIB)
+if(R_LIB)
  include(cmake/RPackageInstallTargetSetup.cmake)
  set_target_properties(xgboost PROPERTIES PREFIX "")
-  if (APPLE)
+  if(APPLE)
    set_target_properties(xgboost PROPERTIES SUFFIX ".so")
-  endif (APPLE)
+  endif()
  setup_rpackage_install_target(xgboost "${CMAKE_CURRENT_BINARY_DIR}/R-package-install")
  set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/dummy_inst")
-endif (R_LIB)
-if (MINGW)
+endif()
+if(MINGW)
  set_target_properties(xgboost PROPERTIES PREFIX "")
-endif (MINGW)
+endif()

-if (BUILD_C_DOC)
+if(BUILD_C_DOC)
  include(cmake/Doc.cmake)
  run_doxygen()
-endif (BUILD_C_DOC)
+endif()

 include(CPack)

@@ -401,19 +399,19 @@ install(DIRECTORY ${xgboost_SOURCE_DIR}/include/xgboost
 #  > in any export set.
 #
 # https://github.com/dmlc/xgboost/issues/6085
-if (BUILD_STATIC_LIB)
-  if (BUILD_DEPRECATED_CLI)
+if(BUILD_STATIC_LIB)
+  if(BUILD_DEPRECATED_CLI)
    set(INSTALL_TARGETS xgboost runxgboost objxgboost dmlc)
  else()
    set(INSTALL_TARGETS xgboost objxgboost dmlc)
-  endif (BUILD_DEPRECATED_CLI)
-else (BUILD_STATIC_LIB)
-  if (BUILD_DEPRECATED_CLI)
+  endif()
+else()
+  if(BUILD_DEPRECATED_CLI)
    set(INSTALL_TARGETS xgboost runxgboost)
-  else(BUILD_DEPRECATED_CLI)
+  else()
    set(INSTALL_TARGETS xgboost)
-  endif (BUILD_DEPRECATED_CLI)
-endif (BUILD_STATIC_LIB)
+  endif()
+endif()

 install(TARGETS ${INSTALL_TARGETS}
  EXPORT XGBoostTargets
@@ -442,7 +440,7 @@ install(
  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/xgboost)

 #-- Test
-if (GOOGLE_TEST)
+if(GOOGLE_TEST)
  enable_testing()
  # Unittests.
  add_executable(testxgboost)
@@ -462,7 +460,7 @@ if (GOOGLE_TEST)
    ${xgboost_SOURCE_DIR}/tests/cli/machine.conf.in
    ${xgboost_BINARY_DIR}/tests/cli/machine.conf
    @ONLY)
-  if (BUILD_DEPRECATED_CLI)
+  if(BUILD_DEPRECATED_CLI)
    add_test(
      NAME TestXGBoostCLI
      COMMAND runxgboost ${xgboost_BINARY_DIR}/tests/cli/machine.conf
@@ -470,8 +468,8 @@ if (GOOGLE_TEST)
    set_tests_properties(TestXGBoostCLI
      PROPERTIES
      PASS_REGULAR_EXPRESSION ".*test-rmse:0.087.*")
-  endif (BUILD_DEPRECATED_CLI)
-endif (GOOGLE_TEST)
+  endif()
+endif()

 # For MSVC: Call msvc_use_static_runtime() once again to completely
 # replace /MD with /MT. See https://github.com/dmlc/xgboost/issues/4462
@@ -479,10 +477,10 @@ endif (GOOGLE_TEST)
 msvc_use_static_runtime()

 # Add xgboost.pc
-if (ADD_PKGCONFIG)
+if(ADD_PKGCONFIG)
  configure_file(${xgboost_SOURCE_DIR}/cmake/xgboost.pc.in ${xgboost_BINARY_DIR}/xgboost.pc @ONLY)

  install(
    FILES ${xgboost_BINARY_DIR}/xgboost.pc
    DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
-endif (ADD_PKGCONFIG)
+endif()
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,207 @@ XGBoost Change Log

 This file records the changes in xgboost library in reverse chronological order.

+## 2.0.0 (2023 Aug 16)
+
+We are excited to announce the release of XGBoost 2.0. This note will begin by covering some overall changes and then highlight specific updates to the package.
+
+### Initial work on multi-target trees with vector-leaf outputs
+We have been working on vector-leaf tree models for multi-target regression, multi-label classification, and multi-class classification in version 2.0. Previously, XGBoost would build a separate model for each target. However, with this new feature that's still being developed, XGBoost can build one tree for all targets. The feature has multiple benefits and trade-offs compared to the existing approach. It can help prevent overfitting, produce smaller models, and build trees that consider the correlation between targets. In addition, users can combine vector leaf and scalar leaf trees during a training session using a callback. Please note that the feature is still a working in progress, and many parts are not yet available. See #9043 for the current status. Related PRs: (#8538, #8697, #8902, #8884, #8895, #8898, #8612, #8652, #8698, #8908, #8928, #8968, #8616, #8922, #8890, #8872, #8889, #9509) Please note that, only the `hist` (default) tree method on CPU can be used for building vector leaf trees at the moment.
+
+### New `device` parameter.
+
+A new `device` parameter is set to replace the existing `gpu_id`, `gpu_hist`, `gpu_predictor`, `cpu_predictor`, `gpu_coord_descent`, and the PySpark specific parameter `use_gpu`. Onward, users need only the `device` parameter to select which device to run along with the ordinal of the device. For more information, please see our document page (https://xgboost.readthedocs.io/en/stable/parameter.html#general-parameters) . For example, with  `device="cuda", tree_method="hist"`, XGBoost will run the `hist` tree method on GPU. (#9363, #8528, #8604, #9354, #9274, #9243, #8896, #9129, #9362, #9402, #9385, #9398, #9390, #9386, #9412, #9507, #9536). The old behavior of ``gpu_hist``  is preserved but deprecated. In addition, the `predictor` parameter is removed.
+
+
+### `hist` is now the default tree method
+Starting from 2.0, the `hist` tree method will be the default. In previous versions, XGBoost chooses `approx` or `exact` depending on the input data and training environment. The new default can help XGBoost train models more efficiently and consistently. (#9320, #9353)
+
+### GPU-based approx tree method
+There's initial support for using the `approx` tree method on GPU. The performance of the `approx` is not yet well optimized but is feature complete except for the JVM packages. It can be accessed through the use of the parameter combination `device="cuda", tree_method="approx"`. (#9414, #9399, #9478). Please note that the Scala-based Spark interface is not yet supported.
+
+### Optimize and bound the size of the histogram on CPU, to control memory footprint
+
+XGBoost has a new parameter `max_cached_hist_node` for users to limit the CPU cache size for histograms. It can help prevent XGBoost from caching histograms too aggressively. Without the cache, performance is likely to decrease. However, the size of the cache grows exponentially with the depth of the tree. The limit can be crucial when growing deep trees. In most cases, users need not configure this parameter as it does not affect the model's accuracy. (#9455, #9441, #9440, #9427, #9400).
+
+Along with the cache limit, XGBoost also reduces the memory usage of the `hist` and `approx` tree method on distributed systems by cutting the size of the cache by half. (#9433)
+
+### Improved external memory support
+There is some exciting development around external memory support in XGBoost. It's still an experimental feature, but the performance has been significantly improved with the default `hist` tree method. We replaced the old file IO logic with memory map. In addition to performance, we have reduced CPU memory usage and added extensive documentation. Beginning from 2.0.0, we encourage users to try it with the `hist` tree method when the memory saving by `QuantileDMatrix` is not sufficient. (#9361, #9317, #9282, #9315, #8457)
+
+### Learning to rank
+We created a brand-new implementation for the learning-to-rank task. With the latest version, XGBoost gained a set of new features for ranking task including:
+
+- A new parameter `lambdarank_pair_method` for choosing the pair construction strategy.
+- A new parameter `lambdarank_num_pair_per_sample` for controlling the number of samples for each group.
+- An experimental implementation of unbiased learning-to-rank, which can be accessed using the `lambdarank_unbiased` parameter.
+- Support for custom gain function with `NDCG` using the `ndcg_exp_gain` parameter.
+- Deterministic GPU computation for all objectives and metrics.
+- `NDCG` is now the default objective function.
+- Improved performance of metrics using caches.
+- Support scikit-learn utilities for `XGBRanker`.
+- Extensive documentation on how learning-to-rank works with XGBoost.
+
+For more information, please see the [tutorial](https://xgboost.readthedocs.io/en/latest/tutorials/learning_to_rank.html). Related PRs: (#8771, #8692, #8783, #8789, #8790, #8859, #8887, #8893, #8906, #8931, #9075, #9015, #9381, #9336, #8822, #9222, #8984, #8785, #8786, #8768)
+
+### Automatically estimated intercept
+
+In the previous version, `base_score` was a constant that could be set as a training parameter. In the new version, XGBoost can automatically estimate this parameter based on input labels for optimal accuracy. (#8539, #8498, #8272, #8793, #8607)
+
+### Quantile regression
+The XGBoost algorithm now supports quantile regression, which involves minimizing the quantile loss (also called "pinball loss"). Furthermore, XGBoost allows for training with multiple target quantiles simultaneously with one tree per quantile. (#8775, #8761, #8760, #8758, #8750)
+
+### L1 and Quantile regression now supports learning rate
+Both objectives use adaptive trees due to the lack of proper Hessian values. In the new version, XGBoost can scale the leaf value with the learning rate accordingly. (#8866)
+
+### Export cut value
+
+Using the Python or the C package, users can export the quantile values (not to be confused with quantile regression) used for the `hist` tree method. (#9356)
+
+### column-based split and federated learning
+We made progress on column-based split for federated learning. In 2.0, both `approx`, `hist`, and `hist` with vector leaf can work with column-based data split, along with support for vertical federated learning. Work on GPU support is still on-going, stay tuned. (#8576, #8468, #8442, #8847, #8811, #8985, #8623, #8568, #8828, #8932, #9081, #9102, #9103, #9124, #9120, #9367, #9370, #9343, #9171, #9346, #9270, #9244, #8494, #8434, #8742, #8804, #8710, #8676, #9020, #9002, #9058, #9037, #9018, #9295, #9006, #9300, #8765, #9365, #9060)
+
+### PySpark
+After the initial introduction of the PySpark interface, it has gained some new features and optimizations in 2.0.
+
+- GPU-based prediction. (#9292, #9542)
+- Optimization for data initialization by avoiding the stack operation. (#9088)
+- Support predict feature contribution. (#8633)
+- Python typing support. (#9156, #9172, #9079, #8375)
+- `use_gpu` is deprecated. The `device` parameter is preferred.
+- Update eval_metric validation to support list of strings (#8826)
+- Improved logs for training (#9449)
+- Maintenance, including refactoring and document updates (#8324, #8465, #8605, #9202, #9460, #9302, #8385, #8630, #8525, #8496)
+- Fix for GPU setup. (#9495)
+
+### Other General New Features
+Here's a list of new features that don't have their own section and yet are general to all language bindings.
+
+- Use array interface for CSC matrix. This helps XGBoost to use a consistent number of threads and align the interface of the CSC matrix with other interfaces. In addition, memory usage is likely to decrease with CSC input thanks to on-the-fly type conversion. (#8672)
+- CUDA compute 90 is now part of the default build.. (#9397)
+
+### Other General Optimization
+These optimizations are general to all language bindings. For language-specific optimization, please visit the corresponding sections.
+
+- Performance for input with `array_interface` on CPU (like `numpy`) is significantly improved. (#9090)
+- Some optimization with CUDA for data initialization. (#9199, #9209, #9144)
+- Use the latest thrust policy to prevent synchronizing GPU devices. (#9212)
+- XGBoost now uses a per-thread CUDA stream, which prevents synchronization with other streams. (#9416, #9396, #9413)
+
+### Notable breaking change
+
+Other than the aforementioned change with the `device` parameter, here's a list of breaking changes affecting all packages.
+
+- Users must specify the format for text input (#9077). However, we suggest using third-party data structures such as `numpy.ndarray` instead of relying on text inputs. See https://github.com/dmlc/xgboost/issues/9472 for more info.
+
+### Notable bug fixes
+
+Some noteworthy bug fixes that are not related to specific language bindings are listed in this section.
+
+- Some language environments use a different thread to perform garbage collection, which breaks the thread-local cache used in XGBoost. XGBoost 2.0 implements a new thread-safe cache using a light weight lock to replace the thread-local cache. (#8851)
+- Fix model IO by clearing the prediction cache. (#8904)
+- `inf` is checked during data construction. (#8911)
+- Preserve order of saved updaters configuration. Usually, this is not an issue unless the `updater` parameter is used instead of the `tree_method` parameter (#9355)
+- Fix GPU memory allocation issue with categorical splits. (#9529)
+- Handle escape sequence like `\t\n` in feature names for JSON model dump. (#9474)
+- Normalize file path for model IO and text input. This handles short paths on Windows and paths that contain `~` on Unix (#9463). In addition, all path inputs are required to be encoded in UTF-8 (#9448, #9443)
+- Fix integer overflow on H100. (#9380)
+- Fix weighted sketching on GPU with categorical features. (#9341)
+- Fix metric serialization. The bug might cause some of the metrics to be dropped during evaluation. (#9405)
+- Fixes compilation errors on MSVC x86 targets (#8823)
+- Pick up the dmlc-core fix for the CSV parser. (#8897)
+
+
+### Documentation
+Aside from documents for new features, we have many smaller updates to improve user experience, from troubleshooting guides to typo fixes.
+
+- Explain CPU/GPU interop. (#8450)
+- Guide to troubleshoot NCCL errors. (#8943, #9206)
+- Add a note for rabit port selection. (#8879)
+- How to build the docs using conda (#9276)
+- Explain how to obtain reproducible results on distributed systems. (#8903)
+
+* Fixes and small updates to document and demonstration scripts. (#8626, #8436, #8995, #8907, #8923, #8926, #9358, #9232, #9201, #9469, #9462, #9458, #8543, #8597, #8401, #8784, #9213, #9098, #9008, #9223, #9333, #9434, #9435, #9415, #8773, #8752, #9291, #9549)
+
+### Python package
+* New Features and Improvements
+- Support primitive types of pyarrow-backed pandas dataframe. (#8653)
+- Warning messages emitted by XGBoost are now emitted using Python warnings. (#9387)
+- User can now format the value printed near the bars on the `plot_importance` plot (#8540)
+- XGBoost has improved half-type support (float16) with pandas, cupy, and cuDF. With GPU input, the handling is through CUDA `__half` type, and no data copy is made. (#8487, #9207, #8481)
+- Support `Series` and Python primitive types in `inplace_predict` and `QuantileDMatrix` (#8547, #8542)
+- Support all pandas' nullable integer types. (#8480)
+- Custom metric with the scikit-learn interface now supports `sample_weight`. (#8706)
+- Enable Installation of Python Package with System lib in a Virtual Environment (#9349)
+- Raise if expected workers are not alive in `xgboost.dask.train` (#9421)
+
+* Optimization
+- Cache transformed data in `QuantileDMatrix` for efficiency. (#8666, #9445)
+- Take datatable as row-major input. (#8472)
+- Remove unnecessary conversions between data structures (#8546)
+
+* Adopt modern Python packaging conventions (PEP 517, PEP 518, PEP 621)
+-  XGBoost adopted the modern Python packaging conventions. The old setup script `setup.py` is now replaced with the new configuration file `pyproject.toml`. Along with this, XGBoost now supports Python 3.11. (#9021, #9112, #9114, #9115) Consult the latest documentation for the updated instructions to build and install XGBoost.
+
+* Fixes
+- `DataIter` now accepts only keyword arguments. (#9431)
+- Fix empty DMatrix with categorical features. (#8739)
+- Convert ``DaskXGBClassifier.classes_`` to an array (#8452)
+- Define `best_iteration` only if early stopping is used to be consistent with documented behavior. (#9403)
+- Make feature validation immutable. (#9388)
+
+* Breaking changes
+- Discussed in the new `device` parameter section,  the `predictor` parameter is now removed. (#9129)
+- Remove support for single-string feature info. Feature type and names should be a sequence of strings (#9401)
+- Remove parameters in the `save_model` call for the scikit-learn interface. (#8963)
+- Remove the `ntree_limit` in the python package. This has been deprecated in previous versions. (#8345)
+
+* Maintenance including formatting and refactoring along with type hints.
+- More consistent use of `black` and `isort` for code formatting (#8420, #8748, #8867)
+- Improved type support. Most of the type changes happen in the PySpark module; here, we list the remaining changes. (#8444, #8617, #9197, #9005)
+- Set `enable_categorical` to True in predict. (#8592)
+- Some refactoring and updates for tests (#8395, #8372, #8557, #8379, #8702, #9459, #9316, #8446, #8695, #8409, #8993, #9480)
+
+* Documentation
+- Add introduction and notes for the sklearn interface. (#8948)
+- Demo for using dask for hyper-parameter optimization. (#8891)
+- Document all supported Python input types. (#8643)
+- Other documentation updates (#8944, #9304)
+
+### R package
+- Use the new data consumption interface for CSR and CSC. This provides better control for the number of threads and improves performance. (#8455, #8673)
+- Accept multiple evaluation metrics during training. (#8657)
+- Fix integer inputs with `NA`. (#9522)
+- Some refactoring for the R package (#8545, #8430, #8614, #8624, #8613, #9457, #8689, #8563, #9461, #8647, #8564, #8565, #8736, #8610, #8609, #8599, #8704, #9456, #9450, #9476, #9477, #9481). Special thanks to @jameslamb.
+- Document updates (#8886, #9323, #9437, #8998)
+
+### JVM packages
+Following are changes specific to various JVM-based packages.
+
+- Stop using Rabit in prediction (#9054)
+- Set feature_names and feature_types in jvm-packages. This is to prepare support for categorical features (#9364)
+- Scala 2.13 support. (#9099)
+- Change training stage from `ResultStage` to `ShuffleMapStage` (#9423)
+- Automatically set the max/min direction for the best score during early stopping. (#9404)
+* Revised support for `flink` (#9046)
+
+* Breaking changes
+- Scala-based tracker is removed. (#9078, #9045)
+- Change `DeviceQuantileDmatrix` into `QuantileDMatrix` (#8461)
+
+* Maintenance (#9253, #9166, #9395, #9389, #9224, #9233, #9351, #9479)
+
+* CI bot PRs
+We employed GitHub dependent bot to help us keep the dependencies up-to-date for JVM packages. With the help from the bot, we have cleared up all the dependencies that are lagging behind (#8501, #8507).
+
+Here's a list of dependency update PRs including those made by dependent bots (#8456, #8560, #8571, #8561, #8562, #8600, #8594, #8524, #8509, #8548, #8549, #8533, #8521, #8534, #8532, #8516, #8503, #8531, #8530, #8518, #8512, #8515, #8517, #8506, #8504, #8502, #8629, #8815, #8813, #8814, #8877, #8876, #8875, #8874, #8873, #9049, #9070, #9073, #9039, #9083, #8917, #8952, #8980, #8973, #8962, #9252, #9208, #9131, #9136, #9219, #9160, #9158, #9163, #9184, #9192, #9265, #9268, #8882, #8837, #8662, #8661, #8390, #9056, #8508, #8925, #8920, #9149, #9230, #9097, #8648, #9203, #8593).
+
+### Maintenance
+Maintenance work includes refactoring, fixing small issues that don't affect end users. (#9256, #8627, #8756, #8735, #8966, #8864, #8747, #8892, #9057, #8921, #8949, #8941, #8942, #9108, #9125, #9155, #9153, #9176, #9447, #9444, #9436, #9438, #9430, #9200, #9210, #9055, #9014, #9004, #8999, #9154, #9148, #9283, #9246, #8888, #8900, #8871, #8861, #8858, #8791, #8807, #8751, #8703, #8696, #8693, #8677, #8686, #8665, #8660, #8386, #8371, #8410, #8578, #8574, #8483, #8443, #8454, #8733)
+
+### CI
+- Build pip wheel with RMM support (#9383)
+- Other CI updates including updating dependencies and work on the CI infrastructure. (#9464, #9428, #8767, #9394, #9278, #9214, #9234, #9205, #9034, #9104, #8878, #9294, #8625, #8806, #8741, #8707, #8381, #8382, #8388, #8402, #8397, #8445, #8602, #8628, #8583, #8460, #9544)
+
 ## 1.7.6 (2023 Jun 16)

 This is a patch release for bug fixes. The CRAN package for the R binding is kept at 1.7.5.
--- a/R-package/.Rbuildignore
+++ b/R-package/.Rbuildignore
@@ -4,3 +4,5 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
 README.md
+^doc$
+^Meta$
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -70,7 +70,7 @@ cb.print.evaluation <- function(period = 1, showsd = TRUE) {
        i == env$begin_iteration ||
        i == env$end_iteration) {
      stdev <- if (showsd) env$bst_evaluation_err else NULL
-      msg <- format.eval.string(i, env$bst_evaluation, stdev)
+      msg <- .format_eval_string(i, env$bst_evaluation, stdev)
      cat(msg, '\n')
    }
  }
@@ -380,7 +380,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
    if ((maximize && score > best_score) ||
        (!maximize && score < best_score)) {

-      best_msg <<- format.eval.string(i, env$bst_evaluation, env$bst_evaluation_err)
+      best_msg <<- .format_eval_string(
+        i, env$bst_evaluation, env$bst_evaluation_err
+      )
      best_score <<- score
      best_iteration <<- i
      best_ntreelimit <<- best_iteration * env$num_parallel_tree
@@ -555,14 +557,18 @@ cb.cv.predict <- function(save_models = FALSE) {
 #'
 #' @examples
 #' #### Binary classification:
-#' #
+#'
+#' ## Keep the number of threads to 1 for examples
+#' nthread <- 1
+#' data.table::setDTthreads(nthread)
+#'
 #' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
 #' # without considering the 2nd order interactions:
 #' x <- model.matrix(Species ~ .^2, iris)[,-1]
 #' colnames(x)
-#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
+#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
 #' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
-#'               lambda = 0.0003, alpha = 0.0003, nthread = 2)
+#'               lambda = 0.0003, alpha = 0.0003, nthread = nthread)
 #' # For 'shotgun', which is a default linear updater, using high eta values may result in
 #' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
 #' # rate does not break the convergence, but allows us to illustrate the typical pattern of
@@ -592,9 +598,9 @@ cb.cv.predict <- function(save_models = FALSE) {
 #'
 #' #### Multiclass classification:
 #' #
-#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
+#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
 #' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
-#'               lambda = 0.0003, alpha = 0.0003, nthread = 1)
+#'               lambda = 0.0003, alpha = 0.0003, nthread = nthread)
 #' # For the default linear updater 'shotgun' it sometimes is helpful
 #' # to use smaller eta to reduce instability
 #' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
@@ -754,7 +760,7 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
 #

 # Format the evaluation metric string
-format.eval.string <- function(iter, eval_res, eval_err = NULL) {
+.format_eval_string <- function(iter, eval_res, eval_err = NULL) {
  if (length(eval_res) == 0)
    stop('no evaluation results')
  enames <- names(eval_res)
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -21,13 +21,13 @@ xgb.Booster.handle <- function(params, cachelist, modelfile, handle) {
      ## A memory buffer
      bst <- xgb.unserialize(modelfile, handle)
      xgb.parameters(bst) <- params
-      return (bst)
+      return(bst)
    } else if (inherits(modelfile, "xgb.Booster")) {
      ## A booster object
      bst <- xgb.Booster.complete(modelfile, saveraw = TRUE)
      bst <- xgb.unserialize(bst$raw)
      xgb.parameters(bst) <- params
-      return (bst)
+      return(bst)
    } else {
      stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
    }
@@ -267,11 +267,16 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #'
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
+#'
+#' ## Keep the number of threads to 2 for examples
+#' nthread <- 2
+#' data.table::setDTthreads(nthread)
+#'
 #' train <- agaricus.train
 #' test <- agaricus.test
 #'
 #' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
+#'                eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
 #' # use all trees by default
 #' pred <- predict(bst, test$data)
 #' # use only the 1st tree
@@ -337,8 +342,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
                                reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
  object <- xgb.Booster.complete(object, saveraw = FALSE)

-  if (!inherits(newdata, "xgb.DMatrix"))
-    newdata <- xgb.DMatrix(newdata, missing = missing, nthread = NVL(object$params[["nthread"]], -1))
+  if (!inherits(newdata, "xgb.DMatrix")) {
+    config <- jsonlite::fromJSON(xgb.config(object))
+    nthread <- strtoi(config$learner$generic_param$nthread)
+    newdata <- xgb.DMatrix(
+      newdata,
+      missing = missing, nthread = NVL(nthread, -1)
+    )
+  }
  if (!is.null(object[["feature_names"]]) &&
      !is.null(colnames(newdata)) &&
      !identical(object[["feature_names"]], colnames(newdata)))
@@ -371,7 +382,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
      cval[0] <- val
      return(cval)
    }
-    return (val)
+    return(val)
  }

  ## We set strict_shape to TRUE then drop the dimensions conditionally
@@ -628,10 +639,15 @@ xgb.attributes <- function(object) {
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
+#' ## Keep the number of threads to 1 for examples
+#' nthread <- 1
+#' data.table::setDTthreads(nthread)
 #' train <- agaricus.train
 #'
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' bst <- xgboost(
+#'   data = train$data, label = train$label, max_depth = 2,
+#'   eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+#' )
 #' config <- xgb.config(bst)
 #'
 #' @rdname xgb.config
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -18,7 +18,12 @@
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
+#' ## Keep the number of threads to 1 for examples
+#' nthread <- 1
+#' data.table::setDTthreads(nthread)
+#' dtrain <- with(
+#'   agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
+#' )
 #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
 #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
 #' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
@@ -112,7 +117,7 @@ xgb.get.DMatrix <- function(data, label, missing, weight, nthread) {
      stop("xgboost: invalid input data")
    }
  }
-  return (dtrain)
+  return(dtrain)
 }


--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -22,14 +22,23 @@
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
+#'
+#' ## Keep the number of threads to 1 for examples
+#' nthread <- 1
+#' data.table::setDTthreads(nthread)
+#'
 #' train <- agaricus.train
 #' test <- agaricus.test
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+#' bst <- xgboost(
+#'   data = train$data, label = train$label, max_depth = 2, eta = 1,
+#'   nthread = nthread,
+#'   nrounds = 2,
+#'   objective = "binary:logistic"
+#' )
+#'
 #' xgb.save(bst, 'xgb.model')
 #' bst <- xgb.load('xgb.model')
 #' if (file.exists('xgb.model')) file.remove('xgb.model')
-#' pred <- predict(bst, test$data)
 #' @export
 xgb.load <- function(modelfile) {
  if (is.null(modelfile))
--- a/R-package/R/xgb.load.raw.R
+++ b/R-package/R/xgb.load.raw.R
@@ -18,6 +18,6 @@ xgb.load.raw <- function(buffer, as_booster = FALSE) {
    booster <- xgb.Booster.complete(booster, saveraw = TRUE)
    return(booster)
  } else {
-    return (handle)
+    return(handle)
  }
 }
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@@ -46,9 +46,12 @@
 #' # Basic use:
 #'
 #' data(agaricus.train, package='xgboost')
+#' ## Keep the number of threads to 1 for examples
+#' nthread <- 1
+#' data.table::setDTthreads(nthread)
 #'
 #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+#'                eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
 #'
 #' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
 #'
--- a/R-package/R/xgb.plot.deepness.R
+++ b/R-package/R/xgb.plot.deepness.R
@@ -45,10 +45,13 @@
 #' @examples
 #'
 #' data(agaricus.train, package='xgboost')
+#' ## Keep the number of threads to 2 for examples
+#' nthread <- 2
+#' data.table::setDTthreads(nthread)
 #'
-#' # Change max_depth to a higher number to get a more significant result
+#' ## Change max_depth to a higher number to get a more significant result
 #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
-#'                eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
+#'                eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
 #'                subsample = 0.5, min_child_weight = 2)
 #'
 #' xgb.plot.deepness(bst)
--- a/R-package/R/xgb.plot.importance.R
+++ b/R-package/R/xgb.plot.importance.R
@@ -45,9 +45,14 @@
 #'
 #' @examples
 #' data(agaricus.train)
+#' ## Keep the number of threads to 2 for examples
+#' nthread <- 2
+#' data.table::setDTthreads(nthread)
 #'
-#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' bst <- xgboost(
+#'   data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
+#'   eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+#' )
 #'
 #' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
 #'
--- a/R-package/R/xgb.plot.multi.trees.R
+++ b/R-package/R/xgb.plot.multi.trees.R
@@ -43,10 +43,15 @@
 #' @examples
 #'
 #' data(agaricus.train, package='xgboost')
+#' ## Keep the number of threads to 2 for examples
+#' nthread <- 2
+#' data.table::setDTthreads(nthread)
 #'
-#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
-#'                eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
-#'                min_child_weight = 50, verbose = 0)
+#' bst <- xgboost(
+#'   data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
+#'   eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
+#'   min_child_weight = 50, verbose = 0
+#' )
 #'
 #' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
 #' print(p)
--- a/R-package/R/xgb.plot.shap.R
+++ b/R-package/R/xgb.plot.shap.R
@@ -74,9 +74,14 @@
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #'
-#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
+#' ## Keep the number of threads to 1 for examples
+#' nthread <- 1
+#' data.table::setDTthreads(nthread)
+#' nrounds <- 20
+#'
+#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
 #'                eta = 0.1, max_depth = 3, subsample = .5,
-#'                method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
+#'                method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
 #'
 #' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
 #' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
@@ -85,12 +90,11 @@
 #'
 #' # multiclass example - plots for each class separately:
 #' nclass <- 3
-#' nrounds <- 20
 #' x <- as.matrix(iris[, -5])
 #' set.seed(123)
 #' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
 #' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
-#'                 max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
+#'                 max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
 #'                 objective = "multi:softprob", num_class = nclass, verbose = 0)
 #' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
 #' col <- rgb(0, 0, 1, 0.5)
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@@ -25,14 +25,22 @@
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
+#'
+#' ## Keep the number of threads to 1 for examples
+#' nthread <- 1
+#' data.table::setDTthreads(nthread)
+#'
 #' train <- agaricus.train
 #' test <- agaricus.test
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+#' bst <- xgboost(
+#'   data = train$data, label = train$label, max_depth = 2, eta = 1,
+#'   nthread = nthread,
+#'   nrounds = 2,
+#'   objective = "binary:logistic"
+#' )
 #' xgb.save(bst, 'xgb.model')
 #' bst <- xgb.load('xgb.model')
 #' if (file.exists('xgb.model')) file.remove('xgb.model')
-#' pred <- predict(bst, test$data)
 #' @export
 xgb.save <- function(model, fname) {
  if (typeof(fname) != "character")
--- a/R-package/R/xgb.save.raw.R
+++ b/R-package/R/xgb.save.raw.R
@@ -16,13 +16,18 @@
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
+#'
+#' ## Keep the number of threads to 2 for examples
+#' nthread <- 2
+#' data.table::setDTthreads(nthread)
+#'
 #' train <- agaricus.train
 #' test <- agaricus.test
 #' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+#'                eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
+#'
 #' raw <- xgb.save.raw(bst)
 #' bst <- xgb.load.raw(raw)
-#' pred <- predict(bst, test$data)
 #'
 #' @export
 xgb.save.raw <- function(model, raw_format = "deprecated") {
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -168,7 +168,8 @@
 #' than the \code{xgboost} interface.
 #'
 #' Parallelization is automatically enabled if \code{OpenMP} is present.
-#' Number of threads can also be manually specified via \code{nthread} parameter.
+#' Number of threads can also be manually specified via the \code{nthread}
+#' parameter.
 #'
 #' The evaluation metric is chosen automatically by XGBoost (according to the objective)
 #' when the \code{eval_metric} parameter is not provided.
@@ -237,17 +238,25 @@
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #'
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
-#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
+#' ## Keep the number of threads to 1 for examples
+#' nthread <- 1
+#' data.table::setDTthreads(nthread)
+#'
+#' dtrain <- with(
+#'   agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
+#' )
+#' dtest <- with(
+#'   agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
+#' )
 #' watchlist <- list(train = dtrain, eval = dtest)
 #'
 #' ## A simple xgb.train example:
-#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
+#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
 #' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
 #'
-#'
-#' ## An xgb.train example where custom objective and evaluation metric are used:
+#' ## An xgb.train example where custom objective and evaluation metric are
+#' ## used:
 #' logregobj <- function(preds, dtrain) {
 #'    labels <- getinfo(dtrain, "label")
 #'    preds <- 1/(1 + exp(-preds))
@@ -263,12 +272,12 @@
 #'
 #' # These functions could be used by passing them either:
 #' #  as 'objective' and 'eval_metric' parameters in the params list:
-#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
+#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
 #'               objective = logregobj, eval_metric = evalerror)
 #' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
 #'
 #' #  or through the ... arguments:
-#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
+#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
 #' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 #'                  objective = logregobj, eval_metric = evalerror)
 #'
@@ -278,7 +287,7 @@
 #'
 #'
 #' ## An xgb.train example of using variable learning rates at each iteration:
-#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
+#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
 #' my_etas <- list(eta = c(0.5, 0.1))
 #' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
@@ -290,7 +299,7 @@
 #'
 #' ## An 'xgboost' interface example:
 #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
-#'                max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
+#'                max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
 #'                objective = "binary:logistic")
 #' pred <- predict(bst, agaricus.test$data)
 #'
--- a/R-package/R/xgb.unserialize.R
+++ b/R-package/R/xgb.unserialize.R
@@ -37,5 +37,5 @@ xgb.unserialize <- function(buffer, handle = NULL) {
      }
    })
  class(handle) <- "xgb.Booster.handle"
-  return (handle)
+  return(handle)
 }
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -24,7 +24,7 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
                   early_stopping_rounds = early_stopping_rounds, maximize = maximize,
                   save_period = save_period, save_name = save_name,
                   xgb_model = xgb_model, callbacks = callbacks, ...)
-  return (bst)
+  return(bst)
 }

 #' Training part from Mushroom Data Set
--- a/R-package/demo/cross_validation.R
+++ b/R-package/demo/cross_validation.R
@@ -25,7 +25,7 @@ xgb.cv(param, dtrain, nrounds, nfold = 5,
 # you can also do cross validation with customized loss function
 # See custom_objective.R
 ##
-print ('running cross validation, with customized loss function')
+print('running cross validation, with customized loss function')

 logregobj <- function(preds, dtrain) {
  labels <- getinfo(dtrain, "label")
--- a/R-package/demo/custom_objective.R
+++ b/R-package/demo/custom_objective.R
@@ -35,7 +35,7 @@ evalerror <- function(preds, dtrain) {

 param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
              objective = logregobj, eval_metric = evalerror)
-print ('start training with user customized objective')
+print('start training with user customized objective')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
 bst <- xgb.train(param, dtrain, num_round, watchlist)
@@ -59,7 +59,7 @@ logregobjattr <- function(preds, dtrain) {
 }
 param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
              objective = logregobjattr, eval_metric = evalerror)
-print ('start training with user customized objective, with additional attributes in DMatrix')
+print('start training with user customized objective, with additional attributes in DMatrix')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
 bst <- xgb.train(param, dtrain, num_round, watchlist)
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@@ -30,7 +30,7 @@ evalerror <- function(preds, dtrain) {
  err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
  return(list(metric = "error", value = err))
 }
-print ('start training with early Stopping setting')
+print('start training with early Stopping setting')

 bst <- xgb.train(param, dtrain, num_round, watchlist,
                 objective = logregobj, eval_metric = evalerror, maximize = FALSE,
--- a/R-package/man/cb.gblinear.history.Rd
+++ b/R-package/man/cb.gblinear.history.Rd
@@ -35,14 +35,18 @@ Callback function expects the following values to be set in its calling frame:
 }
 \examples{
 #### Binary classification:
-#
+
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+
 # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
 # without considering the 2nd order interactions:
 x <- model.matrix(Species ~ .^2, iris)[,-1]
 colnames(x)
-dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
+dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
 param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
-              lambda = 0.0003, alpha = 0.0003, nthread = 2)
+              lambda = 0.0003, alpha = 0.0003, nthread = nthread)
 # For 'shotgun', which is a default linear updater, using high eta values may result in
 # unstable behaviour in some datasets. With this simple dataset, however, the high learning
 # rate does not break the convergence, but allows us to illustrate the typical pattern of
@@ -72,9 +76,9 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')

 #### Multiclass classification:
 #
-dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
+dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
 param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
-              lambda = 0.0003, alpha = 0.0003, nthread = 1)
+              lambda = 0.0003, alpha = 0.0003, nthread = nthread)
 # For the default linear updater 'shotgun' it sometimes is helpful
 # to use smaller eta to reduce instability
 bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -132,11 +132,16 @@ Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple th

 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
+
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)
+
 train <- agaricus.train
 test <- agaricus.test

 bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
+               eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
 # use all trees by default
 pred <- predict(bst, test$data)
 # use only the 1st tree
--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@@ -38,7 +38,12 @@ Supported input file formats are either a LIBSVM text file or a binary file that
 }
 \examples{
 data(agaricus.train, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+dtrain <- with(
+  agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
+)
 xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
 dtrain <- xgb.DMatrix('xgb.DMatrix.data')
 if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
--- a/R-package/man/xgb.config.Rd
+++ b/R-package/man/xgb.config.Rd
@@ -19,10 +19,15 @@ Accessors for model parameters as JSON string.
 }
 \examples{
 data(agaricus.train, package='xgboost')
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
 train <- agaricus.train

-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+bst <- xgboost(
+  data = train$data, label = train$label, max_depth = 2,
+  eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+)
 config <- xgb.config(bst)

 }
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@@ -27,14 +27,23 @@ not \code{xgb.load}.
 \examples{
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
+
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+bst <- xgboost(
+  data = train$data, label = train$label, max_depth = 2, eta = 1,
+  nthread = nthread,
+  nrounds = 2,
+  objective = "binary:logistic"
+)
+
 xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 if (file.exists('xgb.model')) file.remove('xgb.model')
-pred <- predict(bst, test$data)
 }
 \seealso{
 \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@@ -66,9 +66,12 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
 # Basic use:

 data(agaricus.train, package='xgboost')
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)

 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+               eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")

 (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))

--- a/R-package/man/xgb.plot.deepness.Rd
+++ b/R-package/man/xgb.plot.deepness.Rd
@@ -61,10 +61,13 @@ This function was inspired by the blog post
 \examples{

 data(agaricus.train, package='xgboost')
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)

-# Change max_depth to a higher number to get a more significant result
+## Change max_depth to a higher number to get a more significant result
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
-               eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
+               eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
               subsample = 0.5, min_child_weight = 2)

 xgb.plot.deepness(bst)
--- a/R-package/man/xgb.plot.importance.Rd
+++ b/R-package/man/xgb.plot.importance.Rd
@@ -77,9 +77,14 @@ with bar colors corresponding to different clusters that have somewhat similar i
 }
 \examples{
 data(agaricus.train)
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)

-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+bst <- xgboost(
+  data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
+  eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+)

 importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)

--- a/R-package/man/xgb.plot.multi.trees.Rd
+++ b/R-package/man/xgb.plot.multi.trees.Rd
@@ -63,10 +63,15 @@ This function is inspired by this blog post:
 \examples{

 data(agaricus.train, package='xgboost')
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)

-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
-               eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
-               min_child_weight = 50, verbose = 0)
+bst <- xgboost(
+  data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
+  eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
+  min_child_weight = 50, verbose = 0
+)

 p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
 print(p)
--- a/R-package/man/xgb.plot.shap.Rd
+++ b/R-package/man/xgb.plot.shap.Rd
@@ -124,9 +124,14 @@ a meaningful thing to do.
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

-bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+nrounds <- 20
+
+bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
               eta = 0.1, max_depth = 3, subsample = .5,
-               method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
+               method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)

 xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
 contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
@@ -135,12 +140,11 @@ xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12)  # S

 # multiclass example - plots for each class separately:
 nclass <- 3
-nrounds <- 20
 x <- as.matrix(iris[, -5])
 set.seed(123)
 is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
 mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
-                max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
+                max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
                objective = "multi:softprob", num_class = nclass, verbose = 0)
 trees0 <- seq(from=0, by=nclass, length.out=nrounds)
 col <- rgb(0, 0, 1, 0.5)
--- a/R-package/man/xgb.save.Rd
+++ b/R-package/man/xgb.save.Rd
@@ -31,14 +31,22 @@ releases of XGBoost.
 \examples{
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
+
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+bst <- xgboost(
+  data = train$data, label = train$label, max_depth = 2, eta = 1,
+  nthread = nthread,
+  nrounds = 2,
+  objective = "binary:logistic"
+)
 xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 if (file.exists('xgb.model')) file.remove('xgb.model')
-pred <- predict(bst, test$data)
 }
 \seealso{
 \code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
--- a/R-package/man/xgb.save.raw.Rd
+++ b/R-package/man/xgb.save.raw.Rd
@@ -25,12 +25,17 @@ Save xgboost model from xgboost or xgb.train
 \examples{
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
+
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)
+
 train <- agaricus.train
 test <- agaricus.test
 bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+               eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
+
 raw <- xgb.save.raw(bst)
 bst <- xgb.load.raw(raw)
-pred <- predict(bst, test$data)

 }
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -250,7 +250,8 @@ customized objective and evaluation metric functions, therefore it is more flexi
 than the \code{xgboost} interface.

 Parallelization is automatically enabled if \code{OpenMP} is present.
-Number of threads can also be manually specified via \code{nthread} parameter.
+Number of threads can also be manually specified via the \code{nthread}
+parameter.

 The evaluation metric is chosen automatically by XGBoost (according to the objective)
 when the \code{eval_metric} parameter is not provided.
@@ -286,17 +287,25 @@ The following callbacks are automatically created when certain parameters are se
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
-dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+
+dtrain <- with(
+  agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
+)
+dtest <- with(
+  agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
+)
 watchlist <- list(train = dtrain, eval = dtest)

 ## A simple xgb.train example:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
+param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)

-
-## An xgb.train example where custom objective and evaluation metric are used:
+## An xgb.train example where custom objective and evaluation metric are
+## used:
 logregobj <- function(preds, dtrain) {
   labels <- getinfo(dtrain, "label")
   preds <- 1/(1 + exp(-preds))
@@ -312,12 +321,12 @@ evalerror <- function(preds, dtrain) {

 # These functions could be used by passing them either:
 #  as 'objective' and 'eval_metric' parameters in the params list:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
+param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
              objective = logregobj, eval_metric = evalerror)
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)

 #  or through the ... arguments:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
+param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
                 objective = logregobj, eval_metric = evalerror)

@@ -327,7 +336,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,


 ## An xgb.train example of using variable learning rates at each iteration:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
+param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
@@ -339,7 +348,7 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,

 ## An 'xgboost' interface example:
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
-               max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
+               max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
               objective = "binary:logistic")
 pred <- predict(bst, agaricus.test$data)

--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -62,6 +62,7 @@ OBJECTS= \
    $(PKGROOT)/src/gbm/gbtree_model.o \
    $(PKGROOT)/src/gbm/gblinear.o \
    $(PKGROOT)/src/gbm/gblinear_model.o \
+    $(PKGROOT)/src/data/adapter.o \
    $(PKGROOT)/src/data/simple_dmatrix.o \
    $(PKGROOT)/src/data/data.o \
    $(PKGROOT)/src/data/sparse_page_raw_format.o \
@@ -97,9 +98,15 @@ OBJECTS= \
    $(PKGROOT)/src/context.o \
    $(PKGROOT)/src/logging.o \
    $(PKGROOT)/src/global_config.o \
+    $(PKGROOT)/src/collective/allgather.o \
+    $(PKGROOT)/src/collective/allreduce.o \
+    $(PKGROOT)/src/collective/broadcast.o \
+    $(PKGROOT)/src/collective/comm.o \
+    $(PKGROOT)/src/collective/tracker.o \
    $(PKGROOT)/src/collective/communicator.o \
    $(PKGROOT)/src/collective/in_memory_communicator.o \
    $(PKGROOT)/src/collective/in_memory_handler.o \
+    $(PKGROOT)/src/collective/loop.o \
    $(PKGROOT)/src/collective/socket.o \
    $(PKGROOT)/src/common/charconv.o \
    $(PKGROOT)/src/common/column_matrix.o \
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -62,6 +62,7 @@ OBJECTS= \
    $(PKGROOT)/src/gbm/gbtree_model.o \
    $(PKGROOT)/src/gbm/gblinear.o \
    $(PKGROOT)/src/gbm/gblinear_model.o \
+    $(PKGROOT)/src/data/adapter.o \
    $(PKGROOT)/src/data/simple_dmatrix.o \
    $(PKGROOT)/src/data/data.o \
    $(PKGROOT)/src/data/sparse_page_raw_format.o \
@@ -97,9 +98,15 @@ OBJECTS= \
    $(PKGROOT)/src/context.o \
    $(PKGROOT)/src/logging.o \
    $(PKGROOT)/src/global_config.o \
+    $(PKGROOT)/src/collective/allgather.o \
+    $(PKGROOT)/src/collective/allreduce.o \
+    $(PKGROOT)/src/collective/broadcast.o \
+    $(PKGROOT)/src/collective/comm.o \
+    $(PKGROOT)/src/collective/tracker.o \
    $(PKGROOT)/src/collective/communicator.o \
    $(PKGROOT)/src/collective/in_memory_communicator.o \
    $(PKGROOT)/src/collective/in_memory_handler.o \
+    $(PKGROOT)/src/collective/loop.o \
    $(PKGROOT)/src/collective/socket.o \
    $(PKGROOT)/src/common/charconv.o \
    $(PKGROOT)/src/common/column_matrix.o \
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@@ -5,7 +5,6 @@
 * and edited to conform to xgboost C linter requirements. For details, see
 * https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines
 */
-#include <R.h>
 #include <Rinternals.h>
 #include <stdlib.h>
 #include <R_ext/Rdynload.h>
--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@@ -20,7 +20,6 @@
 #include "../../src/common/threading_utils.h"

 #include "./xgboost_R.h"  // Must follow other includes.
-#include "Rinternals.h"

 /*!
 * \brief macro to annotate begin of api
--- a/R-package/tests/helper_scripts/generate_models.R
+++ b/R-package/tests/helper_scripts/generate_models.R
@@ -19,15 +19,15 @@ w <- runif(metadata$kRows)
 version <- packageVersion('xgboost')
 target_dir <- 'models'

-save_booster <- function (booster, model_name) {
-  booster_bin <- function (model_name) {
-    return (file.path(target_dir, paste('xgboost-', version, '.', model_name, '.bin', sep = '')))
+save_booster <- function(booster, model_name) {
+  booster_bin <- function(model_name) {
+    return(file.path(target_dir, paste('xgboost-', version, '.', model_name, '.bin', sep = '')))
  }
-  booster_json <- function (model_name) {
-    return (file.path(target_dir, paste('xgboost-', version, '.', model_name, '.json', sep = '')))
+  booster_json <- function(model_name) {
+    return(file.path(target_dir, paste('xgboost-', version, '.', model_name, '.json', sep = '')))
  }
-  booster_rds <- function (model_name) {
-    return (file.path(target_dir, paste('xgboost-', version, '.', model_name, '.rds', sep = '')))
+  booster_rds <- function(model_name) {
+    return(file.path(target_dir, paste('xgboost-', version, '.', model_name, '.rds', sep = '')))
  }
  xgb.save(booster, booster_bin(model_name))
  saveRDS(booster, booster_rds(model_name))
@@ -36,7 +36,7 @@ save_booster <- function (booster, model_name) {
  }
 }

-generate_regression_model <- function () {
+generate_regression_model <- function() {
  print('Regression')
  y <- rnorm(metadata$kRows)

@@ -47,7 +47,7 @@ generate_regression_model <- function () {
  save_booster(booster, 'reg')
 }

-generate_logistic_model <- function () {
+generate_logistic_model <- function() {
  print('Binary classification with logistic loss')
  y <- sample(0:1, size = metadata$kRows, replace = TRUE)
  stopifnot(max(y) == 1, min(y) == 0)
@@ -64,7 +64,7 @@ generate_logistic_model <- function () {
  }
 }

-generate_classification_model <- function () {
+generate_classification_model <- function() {
  print('Multi-class classification')
  y <- sample(0:(metadata$kClasses - 1), size = metadata$kRows, replace = TRUE)
  stopifnot(max(y) == metadata$kClasses - 1, min(y) == 0)
@@ -77,7 +77,7 @@ generate_classification_model <- function () {
  save_booster(booster, 'cls')
 }

-generate_ranking_model <- function () {
+generate_ranking_model <- function() {
  print('Learning to rank')
  y <- sample(0:4, size = metadata$kRows, replace = TRUE)
  stopifnot(max(y) == 4, min(y) == 0)
--- a/R-package/tests/helper_scripts/run-examples.R
+++ b/R-package/tests/helper_scripts/run-examples.R
@@ -0,0 +1,25 @@
+## Helper script for running individual examples.
+library(pkgload)
+library(xgboost)
+
+files <- list.files("./man")
+
+
+run_example_timeit <- function(f) {
+  path <- paste("./man/", f, sep = "")
+  print(paste("Test", f))
+  flush.console()
+  t0 <- proc.time()
+  run_example(path)
+  t1 <- proc.time()
+  list(file = f, time = t1 - t0)
+}
+
+timings <- lapply(files, run_example_timeit)
+
+for (t in timings) {
+  ratio <- t$time[1] / t$time[3]
+  if (!is.na(ratio) && !is.infinite(ratio) && ratio >= 2.5) {
+    print(paste("Offending example:", t$file, ratio))
+  }
+}
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -1,23 +1,28 @@
 context("basic functions")

-data(agaricus.train, package = 'xgboost')
-data(agaricus.test, package = 'xgboost')
+data(agaricus.train, package = "xgboost")
+data(agaricus.test, package = "xgboost")
 train <- agaricus.train
 test <- agaricus.test
 set.seed(1994)

 # disable some tests for Win32
 windows_flag <- .Platform$OS.type == "windows" &&
-               .Machine$sizeof.pointer != 8
-solaris_flag <- (Sys.info()['sysname'] == "SunOS")
+  .Machine$sizeof.pointer != 8
+solaris_flag <- (Sys.info()["sysname"] == "SunOS")
+n_threads <- 1
+

 test_that("train and predict binary classification", {
  nrounds <- 2
  expect_output(
-    bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-                  eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
-                  eval_metric = "error")
-  , "train-error")
+    bst <- xgboost(
+      data = train$data, label = train$label, max_depth = 2,
+      eta = 1, nthread = n_threads, nrounds = nrounds,
+      objective = "binary:logistic", eval_metric = "error"
+    ),
+    "train-error"
+  )
  expect_equal(class(bst), "xgb.Booster")
  expect_equal(bst$niter, nrounds)
  expect_false(is.null(bst$evaluation_log))
@@ -46,26 +51,39 @@ test_that("parameter validation works", {
  d <- cbind(
    x1 = rnorm(10),
    x2 = rnorm(10),
-    x3 = rnorm(10))
+    x3 = rnorm(10)
+  )
  y <- d[, "x1"] + d[, "x2"]^2 +
    ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
    rnorm(10)
-  dtrain <- xgb.DMatrix(data = d, info = list(label = y))
+  dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)

  correct <- function() {
-    params <- list(max_depth = 2, booster = "dart",
-                   rate_drop = 0.5, one_drop = TRUE,
-                   objective = "reg:squarederror")
+    params <- list(
+      max_depth = 2,
+      booster = "dart",
+      rate_drop = 0.5,
+      one_drop = TRUE,
+      nthread = n_threads,
+      objective = "reg:squarederror"
+    )
    xgb.train(params = params, data = dtrain, nrounds = nrounds)
  }
  expect_silent(correct())
  incorrect <- function() {
-    params <- list(max_depth = 2, booster = "dart",
-                   rate_drop = 0.5, one_drop = TRUE,
-                   objective = "reg:squarederror",
-                   foo = "bar", bar = "foo")
+    params <- list(
+      max_depth = 2,
+      booster = "dart",
+      rate_drop = 0.5,
+      one_drop = TRUE,
+      objective = "reg:squarederror",
+      nthread = n_threads,
+      foo = "bar",
+      bar = "foo"
+    )
    output <- capture.output(
-      xgb.train(params = params, data = dtrain, nrounds = nrounds))
+      xgb.train(params = params, data = dtrain, nrounds = nrounds)
+    )
    print(output)
  }
  expect_output(incorrect(), '\\\\"bar\\\\", \\\\"foo\\\\"')
@@ -79,7 +97,8 @@ test_that("dart prediction works", {
  d <- cbind(
    x1 = rnorm(100),
    x2 = rnorm(100),
-    x3 = rnorm(100))
+    x3 = rnorm(100)
+  )
  y <- d[, "x1"] + d[, "x2"]^2 +
    ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
    rnorm(100)
@@ -93,7 +112,7 @@ test_that("dart prediction works", {
    rate_drop = 0.5,
    one_drop = TRUE,
    eta = 1,
-    nthread = 2,
+    nthread = n_threads,
    nrounds = nrounds,
    objective = "reg:squarederror"
  )
@@ -105,7 +124,7 @@ test_that("dart prediction works", {
  expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))

  set.seed(1994)
-  dtrain <- xgb.DMatrix(data = d, info = list(label = y))
+  dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
  booster_by_train <- xgb.train(
    params = list(
      booster = "dart",
@@ -113,7 +132,7 @@ test_that("dart prediction works", {
      eta = 1,
      rate_drop = 0.5,
      one_drop = TRUE,
-      nthread = 1,
+      nthread = n_threads,
      objective = "reg:squarederror"
    ),
    data = dtrain,
@@ -132,10 +151,13 @@ test_that("train and predict softprob", {
  lb <- as.numeric(iris$Species) - 1
  set.seed(11)
  expect_output(
-    bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
-                   max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
-                   objective = "multi:softprob", num_class = 3, eval_metric = "merror")
-  , "train-merror")
+    bst <- xgboost(
+      data = as.matrix(iris[, -5]), label = lb,
+      max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
+      objective = "multi:softprob", num_class = 3, eval_metric = "merror"
+    ),
+    "train-merror"
+  )
  expect_false(is.null(bst$evaluation_log))
  expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
  expect_equal(bst$niter * 3, xgb.ntree(bst))
@@ -164,9 +186,10 @@ test_that("train and predict softprob", {
    x3 = rnorm(100)
  )
  y <- sample.int(10, 100, replace = TRUE) - 1
-  dtrain <- xgb.DMatrix(data = d, info = list(label = y))
+  dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
  booster <- xgb.train(
-    params = list(tree_method = "hist"), data = dtrain, nrounds = 4, num_class = 10,
+    params = list(tree_method = "hist", nthread = n_threads),
+    data = dtrain, nrounds = 4, num_class = 10,
    objective = "multi:softprob"
  )
  predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
@@ -178,10 +201,13 @@ test_that("train and predict softmax", {
  lb <- as.numeric(iris$Species) - 1
  set.seed(11)
  expect_output(
-    bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
-                   max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
-                   objective = "multi:softmax", num_class = 3, eval_metric = "merror")
-  , "train-merror")
+    bst <- xgboost(
+      data = as.matrix(iris[, -5]), label = lb,
+      max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
+      objective = "multi:softmax", num_class = 3, eval_metric = "merror"
+    ),
+    "train-merror"
+  )
  expect_false(is.null(bst$evaluation_log))
  expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
  expect_equal(bst$niter * 3, xgb.ntree(bst))
@@ -196,16 +222,19 @@ test_that("train and predict RF", {
  set.seed(11)
  lb <- train$label
  # single iteration
-  bst <- xgboost(data = train$data, label = lb, max_depth = 5,
-                 nthread = 2, nrounds = 1, objective = "binary:logistic", eval_metric = "error",
-                 num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
+  bst <- xgboost(
+    data = train$data, label = lb, max_depth = 5,
+    nthread = n_threads,
+    nrounds = 1, objective = "binary:logistic", eval_metric = "error",
+    num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1
+  )
  expect_equal(bst$niter, 1)
  expect_equal(xgb.ntree(bst), 20)

  pred <- predict(bst, train$data)
  pred_err <- sum((pred > 0.5) != lb) / length(lb)
  expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
-  #expect_lt(pred_err, 0.03)
+  # expect_lt(pred_err, 0.03)

  pred <- predict(bst, train$data, ntreelimit = 20)
  pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
@@ -219,11 +248,13 @@ test_that("train and predict RF with softprob", {
  lb <- as.numeric(iris$Species) - 1
  nrounds <- 15
  set.seed(11)
-  bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
-                 max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
-                 objective = "multi:softprob", eval_metric = "merror",
-                 num_class = 3, verbose = 0,
-                 num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
+  bst <- xgboost(
+    data = as.matrix(iris[, -5]), label = lb,
+    max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds,
+    objective = "multi:softprob", eval_metric = "merror",
+    num_class = 3, verbose = 0,
+    num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5
+  )
  expect_equal(bst$niter, 15)
  expect_equal(xgb.ntree(bst), 15 * 3 * 4)
  # predict for all iterations:
@@ -240,18 +271,24 @@ test_that("train and predict RF with softprob", {

 test_that("use of multiple eval metrics works", {
  expect_output(
-    bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-                   eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
-                   eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
-  , "train-error.*train-auc.*train-logloss")
+    bst <- xgboost(
+      data = train$data, label = train$label, max_depth = 2,
+      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
+      eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
+    ),
+    "train-error.*train-auc.*train-logloss"
+  )
  expect_false(is.null(bst$evaluation_log))
  expect_equal(dim(bst$evaluation_log), c(2, 4))
  expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
  expect_output(
-    bst2 <- xgboost(data = train$data, label = train$label, max_depth = 2,
-                    eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
-                    eval_metric = list("error", "auc", "logloss"))
-  , "train-error.*train-auc.*train-logloss")
+    bst2 <- xgboost(
+      data = train$data, label = train$label, max_depth = 2,
+      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
+      eval_metric = list("error", "auc", "logloss")
+    ),
+    "train-error.*train-auc.*train-logloss"
+  )
  expect_false(is.null(bst2$evaluation_log))
  expect_equal(dim(bst2$evaluation_log), c(2, 4))
  expect_equal(colnames(bst2$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
@@ -259,9 +296,11 @@ test_that("use of multiple eval metrics works", {


 test_that("training continuation works", {
-  dtrain <- xgb.DMatrix(train$data, label = train$label)
+  dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
  watchlist <- list(train = dtrain)
-  param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
+  param <- list(
+    objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
+  )

  # for the reference, use 4 iterations at once:
  set.seed(11)
@@ -271,30 +310,33 @@ test_that("training continuation works", {
  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
  # continue for two more:
  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
-  if (!windows_flag && !solaris_flag)
+  if (!windows_flag && !solaris_flag) {
    expect_equal(bst$raw, bst2$raw)
+  }
  expect_false(is.null(bst2$evaluation_log))
  expect_equal(dim(bst2$evaluation_log), c(4, 2))
  expect_equal(bst2$evaluation_log, bst$evaluation_log)
  # test continuing from raw model data
  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
-  if (!windows_flag && !solaris_flag)
+  if (!windows_flag && !solaris_flag) {
    expect_equal(bst$raw, bst2$raw)
+  }
  expect_equal(dim(bst2$evaluation_log), c(2, 2))
  # test continuing from a model in file
  xgb.save(bst1, "xgboost.json")
  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.json")
-  if (!windows_flag && !solaris_flag)
+  if (!windows_flag && !solaris_flag) {
    expect_equal(bst$raw, bst2$raw)
+  }
  expect_equal(dim(bst2$evaluation_log), c(2, 2))
  file.remove("xgboost.json")
 })

 test_that("model serialization works", {
  out_path <- "model_serialization"
-  dtrain <- xgb.DMatrix(train$data, label = train$label)
+  dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
  watchlist <- list(train = dtrain)
-  param <- list(objective = "binary:logistic")
+  param <- list(objective = "binary:logistic", nthread = n_threads)
  booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
  raw <- xgb.serialize(booster)
  saveRDS(raw, out_path)
@@ -309,11 +351,14 @@ test_that("model serialization works", {
 test_that("xgb.cv works", {
  set.seed(11)
  expect_output(
-    cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
-                 eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
-                 eval_metric = "error", verbose = TRUE)
-  , "train-error:")
-  expect_is(cv, 'xgb.cv.synchronous')
+    cv <- xgb.cv(
+      data = train$data, label = train$label, max_depth = 2, nfold = 5,
+      eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
+      eval_metric = "error", verbose = TRUE
+    ),
+    "train-error:"
+  )
+  expect_is(cv, "xgb.cv.synchronous")
  expect_false(is.null(cv$evaluation_log))
  expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
  expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
@@ -326,15 +371,19 @@ test_that("xgb.cv works", {
 })

 test_that("xgb.cv works with stratified folds", {
-  dtrain <- xgb.DMatrix(train$data, label = train$label)
+  dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
  set.seed(314159)
-  cv <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
-               eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
-               verbose = TRUE, stratified = FALSE)
+  cv <- xgb.cv(
+    data = dtrain, max_depth = 2, nfold = 5,
+    eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
+    verbose = TRUE, stratified = FALSE
+  )
  set.seed(314159)
-  cv2 <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
-                eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
-                verbose = TRUE, stratified = TRUE)
+  cv2 <- xgb.cv(
+    data = dtrain, max_depth = 2, nfold = 5,
+    eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
+    verbose = TRUE, stratified = TRUE
+  )
  # Stratified folds should result in a different evaluation logs
  expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
 })
@@ -342,40 +391,57 @@ test_that("xgb.cv works with stratified folds", {
 test_that("train and predict with non-strict classes", {
  # standard dense matrix input
  train_dense <- as.matrix(train$data)
-  bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
-                 eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
+  bst <- xgboost(
+    data = train_dense, label = train$label, max_depth = 2,
+    eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
+    verbose = 0
+  )
  pr0 <- predict(bst, train_dense)

  # dense matrix-like input of non-matrix class
-  class(train_dense) <- 'shmatrix'
+  class(train_dense) <- "shmatrix"
  expect_true(is.matrix(train_dense))
  expect_error(
-    bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
-                   eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
-    , regexp = NA)
+    bst <- xgboost(
+      data = train_dense, label = train$label, max_depth = 2,
+      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
+      verbose = 0
+    ),
+    regexp = NA
+  )
  expect_error(pr <- predict(bst, train_dense), regexp = NA)
  expect_equal(pr0, pr)

  # dense matrix-like input of non-matrix class with some inheritance
-  class(train_dense) <- c('pphmatrix', 'shmatrix')
+  class(train_dense) <- c("pphmatrix", "shmatrix")
  expect_true(is.matrix(train_dense))
  expect_error(
-    bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
-                   eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
-    , regexp = NA)
+    bst <- xgboost(
+      data = train_dense, label = train$label, max_depth = 2,
+      eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
+      verbose = 0
+    ),
+    regexp = NA
+  )
  expect_error(pr <- predict(bst, train_dense), regexp = NA)
  expect_equal(pr0, pr)

  # when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster
-  class(bst) <- c('super.Booster', 'xgb.Booster')
+  class(bst) <- c("super.Booster", "xgb.Booster")
  expect_error(pr <- predict(bst, train_dense), regexp = NA)
  expect_equal(pr0, pr)
 })

 test_that("max_delta_step works", {
-  dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
+  dtrain <- xgb.DMatrix(
+    agaricus.train$data, label = agaricus.train$label, nthread = n_threads
+  )
  watchlist <- list(train = dtrain)
-  param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
+  param <- list(
+    objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
+    nthread = n_threads,
+    eta = 0.5
+  )
  nrounds <- 5
  # model with no restriction on max_delta_step
  bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
@@ -395,14 +461,16 @@ test_that("colsample_bytree works", {
  test_y <- as.numeric(rowSums(test_x) > 0)
  colnames(train_x) <- paste0("Feature_", sprintf("%03d", 1:100))
  colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
-  dtrain <- xgb.DMatrix(train_x, label = train_y)
-  dtest <- xgb.DMatrix(test_x, label = test_y)
+  dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
+  dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
  watchlist <- list(train = dtrain, eval = dtest)
  ## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
  ## each tree
-  param <- list(max_depth = 2, eta = 0, nthread = 2,
-                colsample_bytree = 0.01, objective = "binary:logistic",
-                eval_metric = "auc")
+  param <- list(
+    max_depth = 2, eta = 0, nthread = n_threads,
+    colsample_bytree = 0.01, objective = "binary:logistic",
+    eval_metric = "auc"
+  )
  set.seed(2)
  bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
  xgb.importance(model = bst)
@@ -412,9 +480,11 @@ test_that("colsample_bytree works", {
 })

 test_that("Configuration works", {
-  bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-                 eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
-                 eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
+  bst <- xgboost(
+    data = train$data, label = train$label, max_depth = 2,
+    eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
+    eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
+  )
  config <- xgb.config(bst)
  xgb.config(bst) <- config
  reloaded_config <- xgb.config(bst)
@@ -451,22 +521,26 @@ test_that("strict_shape works", {
    y <- as.numeric(iris$Species) - 1
    X <- as.matrix(iris[, -5])

-    bst <- xgboost(data = X, label = y,
-                   max_depth = 2, nrounds = n_rounds,
-                   objective = "multi:softprob", num_class = 3, eval_metric = "merror")
+    bst <- xgboost(
+      data = X, label = y,
+      max_depth = 2, nrounds = n_rounds, nthread = n_threads,
+      objective = "multi:softprob", num_class = 3, eval_metric = "merror"
+    )

    test_strict_shape(bst, X, 3)
  }


  test_agaricus <- function() {
-    data(agaricus.train, package = 'xgboost')
+    data(agaricus.train, package = "xgboost")
    X <- agaricus.train$data
    y <- agaricus.train$label

-    bst <- xgboost(data = X, label = y, max_depth = 2,
-                   nrounds = n_rounds, objective = "binary:logistic",
-                   eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
+    bst <- xgboost(
+      data = X, label = y, max_depth = 2, nthread = n_threads,
+      nrounds = n_rounds, objective = "binary:logistic",
+      eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
+    )

    test_strict_shape(bst, X, 1)
  }
@@ -481,8 +555,10 @@ test_that("'predict' accepts CSR data", {
  x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
  x_csr <- as(x_csc, "RsparseMatrix")
  x_spv <- as(x_csc, "sparseVector")
-  bst <- xgboost(data = X, label = y, objective = "binary:logistic",
-                 nrounds = 5L, verbose = FALSE)
+  bst <- xgboost(
+    data = X, label = y, objective = "binary:logistic",
+    nrounds = 5L, verbose = FALSE, nthread = n_threads,
+  )
  p_csc <- predict(bst, x_csc)
  p_csr <- predict(bst, x_csr)
  p_spv <- predict(bst, x_spv)
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -6,6 +6,8 @@ data(agaricus.test, package = 'xgboost')
 train <- agaricus.train
 test <- agaricus.test

+n_threads <- 2
+
 # add some label noise for early stopping tests
 add.noise <- function(label, frac) {
  inoise <- sample(length(label), length(label) * frac)
@@ -15,15 +17,15 @@ add.noise <- function(label, frac) {
 set.seed(11)
 ltrain <- add.noise(train$label, 0.2)
 ltest <- add.noise(test$label, 0.2)
-dtrain <- xgb.DMatrix(train$data, label = ltrain)
-dtest <- xgb.DMatrix(test$data, label = ltest)
+dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
+dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
 watchlist <- list(train = dtrain, test = dtest)


 err <- function(label, pr) sum((pr > 0.5) != label) / length(label)

 param <- list(objective = "binary:logistic", eval_metric = "error",
-              max_depth = 2, nthread = 2)
+              max_depth = 2, nthread = n_threads)


 test_that("cb.print.evaluation works as expected", {
@@ -103,7 +105,7 @@ test_that("cb.evaluation.log works as expected", {


 param <- list(objective = "binary:logistic", eval_metric = "error",
-              max_depth = 4, nthread = 2)
+              max_depth = 4, nthread = n_threads)

 test_that("can store evaluation_log without printing", {
  expect_silent(
@@ -179,8 +181,10 @@ test_that("cb.save.model works as expected", {
  expect_true(file.exists('xgboost_01.json'))
  expect_true(file.exists('xgboost_02.json'))
  b1 <- xgb.load('xgboost_01.json')
+  xgb.parameters(b1) <- list(nthread = 2)
  expect_equal(xgb.ntree(b1), 1)
  b2 <- xgb.load('xgboost_02.json')
+  xgb.parameters(b2) <- list(nthread = 2)
  expect_equal(xgb.ntree(b2), 2)

  xgb.config(b2) <- xgb.config(bst)
@@ -267,7 +271,8 @@ test_that("early stopping works with titanic", {
    objective = "binary:logistic",
    eval_metric = "auc",
    nrounds = 100,
-    early_stopping_rounds = 3
+    early_stopping_rounds = 3,
+    nthread = n_threads
  )

  expect_true(TRUE)  # should not crash
@@ -308,7 +313,7 @@ test_that("prediction in xgb.cv works", {

 test_that("prediction in xgb.cv works for gblinear too", {
  set.seed(11)
-  p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = 2)
+  p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads)
  cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
  expect_false(is.null(cv$evaluation_log))
  expect_false(is.null(cv$pred))
@@ -341,7 +346,7 @@ test_that("prediction in xgb.cv for softprob works", {
  set.seed(11)
  expect_warning(
    cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
-                 eta = 0.5, nrounds = 5, max_depth = 3, nthread = 2,
+                 eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads,
                 subsample = 0.8, gamma = 2, verbose = 0,
                 prediction = TRUE, objective = "multi:softprob", num_class = 3)
  , NA)
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -2,10 +2,16 @@ context('Test models with custom objective')

 set.seed(1994)

+n_threads <- 2
+
 data(agaricus.train, package = 'xgboost')
 data(agaricus.test, package = 'xgboost')
-dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
+dtrain <- xgb.DMatrix(
+  agaricus.train$data, label = agaricus.train$label, nthread = n_threads
+)
+dtest <- xgb.DMatrix(
+  agaricus.test$data, label = agaricus.test$label, nthread = n_threads
+)
 watchlist <- list(eval = dtest, train = dtrain)

 logregobj <- function(preds, dtrain) {
@@ -22,7 +28,7 @@ evalerror <- function(preds, dtrain) {
  return(list(metric = "error", value = err))
 }

-param <- list(max_depth = 2, eta = 1, nthread = 2,
+param <- list(max_depth = 2, eta = 1, nthread = n_threads,
              objective = logregobj, eval_metric = evalerror)
 num_round <- 2

@@ -67,7 +73,7 @@ test_that("custom objective using DMatrix attr works", {
 test_that("custom objective with multi-class shape", {
  data <- as.matrix(iris[, -5])
  label <-  as.numeric(iris$Species) - 1
-  dtrain <- xgb.DMatrix(data = data, label = label)
+  dtrain <- xgb.DMatrix(data = data, label = label, nthread = n_threads)
  n_classes <- 3

  fake_softprob <- function(preds, dtrain) {
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@@ -5,19 +5,21 @@ data(agaricus.test, package = "xgboost")
 test_data <- agaricus.test$data[1:100, ]
 test_label <- agaricus.test$label[1:100]

+n_threads <- 2
+
 test_that("xgb.DMatrix: basic construction", {
  # from sparse matrix
-  dtest1 <- xgb.DMatrix(test_data, label = test_label)
+  dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)

  # from dense matrix
-  dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label)
+  dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label, nthread = n_threads)
  expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
  expect_equal(dim(dtest1), dim(dtest2))

  # from dense integer matrix
  int_data <- as.matrix(test_data)
  storage.mode(int_data) <- "integer"
-  dtest3 <- xgb.DMatrix(int_data, label = test_label)
+  dtest3 <- xgb.DMatrix(int_data, label = test_label, nthread = n_threads)
  expect_equal(dim(dtest1), dim(dtest3))

  n_samples <- 100
@@ -29,15 +31,15 @@ test_that("xgb.DMatrix: basic construction", {
  X <- matrix(X, nrow = n_samples)
  y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)

-  fd <- xgb.DMatrix(X, label = y, missing = 1)
+  fd <- xgb.DMatrix(X, label = y, missing = 1, nthread = n_threads)

  dgc <- as(X, "dgCMatrix")
-  fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0)
+  fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0, nthread = n_threads)

  dgr <- as(X, "dgRMatrix")
-  fdgr <- xgb.DMatrix(dgr, label = y, missing = 1)
+  fdgr <- xgb.DMatrix(dgr, label = y, missing = 1, nthread = n_threads)

-  params <- list(tree_method = "hist")
+  params <- list(tree_method = "hist", nthread = n_threads)
  bst_fd <- xgb.train(
    params, nrounds = 8, fd, watchlist = list(train = fd)
  )
@@ -64,12 +66,12 @@ test_that("xgb.DMatrix: NA", {
  )
  x[1, "x1"] <- NA

-  m <- xgb.DMatrix(x)
+  m <- xgb.DMatrix(x, nthread = n_threads)
  xgb.DMatrix.save(m, "int.dmatrix")

  x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
  colnames(x) <- c("x1", "x2")
-  m <- xgb.DMatrix(x)
+  m <- xgb.DMatrix(x, nthread = n_threads)

  xgb.DMatrix.save(m, "float.dmatrix")

@@ -94,7 +96,7 @@ test_that("xgb.DMatrix: NA", {

 test_that("xgb.DMatrix: saving, loading", {
  # save to a local file
-  dtest1 <- xgb.DMatrix(test_data, label = test_label)
+  dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
  tmp_file <- tempfile('xgb.DMatrix_')
  on.exit(unlink(tmp_file))
  expect_true(xgb.DMatrix.save(dtest1, tmp_file))
@@ -109,13 +111,17 @@ test_that("xgb.DMatrix: saving, loading", {
  tmp_file <- tempfile(fileext = ".libsvm")
  writeLines(tmp, tmp_file)
  expect_true(file.exists(tmp_file))
-  dtest4 <- xgb.DMatrix(paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE)
+  dtest4 <- xgb.DMatrix(
+    paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE, nthread = n_threads
+  )
  expect_equal(dim(dtest4), c(3, 4))
  expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))

  # check that feature info is saved
  data(agaricus.train, package = 'xgboost')
-  dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
+  dtrain <- xgb.DMatrix(
+    data = agaricus.train$data, label = agaricus.train$label, nthread = n_threads
+  )
  cnames <- colnames(dtrain)
  expect_equal(length(cnames), 126)
  tmp_file <- tempfile('xgb.DMatrix_')
@@ -129,7 +135,7 @@ test_that("xgb.DMatrix: saving, loading", {
 })

 test_that("xgb.DMatrix: getinfo & setinfo", {
-  dtest <- xgb.DMatrix(test_data)
+  dtest <- xgb.DMatrix(test_data, nthread = n_threads)
  expect_true(setinfo(dtest, 'label', test_label))
  labels <- getinfo(dtest, 'label')
  expect_equal(test_label, getinfo(dtest, 'label'))
@@ -156,7 +162,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
 })

 test_that("xgb.DMatrix: slice, dim", {
-  dtest <- xgb.DMatrix(test_data, label = test_label)
+  dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
  expect_equal(dim(dtest), dim(test_data))
  dsub1 <- slice(dtest, 1:42)
  expect_equal(nrow(dsub1), 42)
@@ -171,16 +177,20 @@ test_that("xgb.DMatrix: slice, trailing empty rows", {
  data(agaricus.train, package = 'xgboost')
  train_data <- agaricus.train$data
  train_label <- agaricus.train$label
-  dtrain <- xgb.DMatrix(data = train_data, label = train_label)
+  dtrain <- xgb.DMatrix(
+    data = train_data, label = train_label, nthread = n_threads
+  )
  slice(dtrain, 6513L)
  train_data[6513, ] <- 0
-  dtrain <- xgb.DMatrix(data = train_data, label = train_label)
+  dtrain <- xgb.DMatrix(
+    data = train_data, label = train_label, nthread = n_threads
+  )
  slice(dtrain, 6513L)
  expect_equal(nrow(dtrain), 6513)
 })

 test_that("xgb.DMatrix: colnames", {
-  dtest <- xgb.DMatrix(test_data, label = test_label)
+  dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
  expect_equal(colnames(dtest), colnames(test_data))
  expect_error(colnames(dtest) <- 'asdf')
  new_names <- make.names(seq_len(ncol(test_data)))
@@ -196,7 +206,7 @@ test_that("xgb.DMatrix: nrow is correct for a very sparse matrix", {
  x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)
  # we want it very sparse, so that last rows are empty
  expect_lt(max(x@i), nr)
-  dtest <- xgb.DMatrix(x)
+  dtest <- xgb.DMatrix(x, nthread = n_threads)
  expect_equal(dim(dtest), dim(x))
 })

@@ -205,8 +215,8 @@ test_that("xgb.DMatrix: print", {

    # core DMatrix with just data and labels
    dtrain <- xgb.DMatrix(
-        data = agaricus.train$data
-        , label = agaricus.train$label
+      data = agaricus.train$data, label = agaricus.train$label,
+      nthread = n_threads
    )
    txt <- capture.output({
        print(dtrain)
@@ -222,10 +232,11 @@ test_that("xgb.DMatrix: print", {

    # DMatrix with weights and base_margin
    dtrain <- xgb.DMatrix(
-        data = agaricus.train$data
-        , label = agaricus.train$label
-        , weight = seq_along(agaricus.train$label)
-        , base_margin = agaricus.train$label
+      data = agaricus.train$data,
+      label = agaricus.train$label,
+      weight = seq_along(agaricus.train$label),
+      base_margin = agaricus.train$label,
+      nthread = n_threads
    )
    txt <- capture.output({
        print(dtrain)
@@ -234,7 +245,8 @@ test_that("xgb.DMatrix: print", {

    # DMatrix with just features
    dtrain <- xgb.DMatrix(
-        data = agaricus.train$data
+      data = agaricus.train$data,
+      nthread = n_threads
    )
    txt <- capture.output({
        print(dtrain)
@@ -245,7 +257,8 @@ test_that("xgb.DMatrix: print", {
    data_no_colnames <- agaricus.train$data
    colnames(data_no_colnames) <- NULL
    dtrain <- xgb.DMatrix(
-        data = data_no_colnames
+      data = data_no_colnames,
+      nthread = n_threads
    )
    txt <- capture.output({
        print(dtrain)
--- a/R-package/tests/testthat/test_feature_weights.R
+++ b/R-package/tests/testthat/test_feature_weights.R
@@ -1,5 +1,7 @@
 context("feature weights")

+n_threads <- 2
+
 test_that("training with feature weights works", {
  nrows <- 1000
  ncols <- 9
@@ -10,8 +12,12 @@ test_that("training with feature weights works", {

  test <- function(tm) {
    names <- paste0("f", 1:ncols)
-    xy <- xgb.DMatrix(data = x, label = y, feature_weights = weights)
-    params <- list(colsample_bynode = 0.4, tree_method = tm, nthread = 1)
+    xy <- xgb.DMatrix(
+      data = x, label = y, feature_weights = weights, nthread = n_threads
+    )
+    params <- list(
+      colsample_bynode = 0.4, tree_method = tm, nthread = n_threads
+    )
    model <- xgb.train(params = params, data = xy, nrounds = 32)
    importance <- xgb.importance(model = model, feature_names = names)
    expect_equal(dim(importance), c(ncols, 4))
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@@ -1,13 +1,19 @@
 context('Test generalized linear models')

+n_threads <- 2
+
 test_that("gblinear works", {
  data(agaricus.train, package = 'xgboost')
  data(agaricus.test, package = 'xgboost')
-  dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-  dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
+  dtrain <- xgb.DMatrix(
+    agaricus.train$data, label = agaricus.train$label, nthread = n_threads
+  )
+  dtest <- xgb.DMatrix(
+    agaricus.test$data, label = agaricus.test$label, nthread = n_threads
+  )

  param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
-                nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
+                nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
  watchlist <- list(eval = dtest, train = dtrain)

  n <- 5         # iterations
@@ -48,12 +54,16 @@ test_that("gblinear works", {
 test_that("gblinear early stopping works", {
  data(agaricus.train, package = 'xgboost')
  data(agaricus.test, package = 'xgboost')
-  dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-  dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
+  dtrain <- xgb.DMatrix(
+    agaricus.train$data, label = agaricus.train$label, nthread = n_threads
+  )
+  dtest <- xgb.DMatrix(
+    agaricus.test$data, label = agaricus.test$label, nthread = n_threads
+  )

  param <- list(
    objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
-    nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
+    nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
    updater = "coord_descent"
  )

--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -171,6 +171,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
    fit <- xgboost(
      params = c(
        list(
+          nthread = 2,
          booster = booster,
          objective = "reg:squarederror",
          eval_metric = "rmse"),
@@ -257,7 +258,7 @@ test_that("xgb.Booster serializing as R object works", {
  .skip_if_vcd_not_available()
  saveRDS(bst.Tree, 'xgb.model.rds')
  bst <- readRDS('xgb.model.rds')
-  dtrain <- xgb.DMatrix(sparse_matrix, label = label)
+  dtrain <- xgb.DMatrix(sparse_matrix, label = label, nthread = 2)
  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
  expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
  xgb.save(bst, 'xgb.model')
@@ -363,7 +364,8 @@ test_that("xgb.importance works with and without feature names", {
    data = as.matrix(data.frame(x = c(0, 1))),
    label = c(1, 2),
    nrounds = 1,
-    base_score = 0.5
+    base_score = 0.5,
+    nthread = 2
  )
  df <- xgb.model.dt.tree(model = m)
  expect_equal(df$Feature, "Leaf")
--- a/R-package/tests/testthat/test_interaction_constraints.R
+++ b/R-package/tests/testthat/test_interaction_constraints.R
@@ -2,6 +2,8 @@ require(xgboost)

 context("interaction constraints")

+n_threads <- 2
+
 set.seed(1024)
 x1 <- rnorm(1000, 1)
 x2 <- rnorm(1000, 1)
@@ -45,11 +47,18 @@ test_that("interaction constraints scientific representation", {
  d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)
  y <- rnorm(rows)

-  dtrain <- xgb.DMatrix(data = d, info = list(label = y))
+  dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
  inc <- list(c(seq.int(from = 0, to = cols, by = 1)))

-  with_inc <- xgb.train(data = dtrain, tree_method = 'hist',
-                        interaction_constraints = inc, nrounds = 10)
-  without_inc <- xgb.train(data = dtrain, tree_method = 'hist', nrounds = 10)
+  with_inc <- xgb.train(
+    data = dtrain,
+    tree_method = 'hist',
+    interaction_constraints = inc,
+    nrounds = 10,
+    nthread = n_threads
+  )
+  without_inc <- xgb.train(
+    data = dtrain, tree_method = 'hist', nrounds = 10, nthread = n_threads
+  )
  expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
 })
--- a/R-package/tests/testthat/test_interactions.R
+++ b/R-package/tests/testthat/test_interactions.R
@@ -1,6 +1,7 @@
 context('Test prediction of feature interactions')

 set.seed(123)
+n_threads <- 2

 test_that("predict feature interactions works", {
  # simulate some binary data and a linear outcome with an interaction term
@@ -19,8 +20,10 @@ test_that("predict feature interactions works", {

  y <- f_int(X)

-  dm <- xgb.DMatrix(X, label = y)
-  param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
+  dm <- xgb.DMatrix(X, label = y, nthread = n_threads)
+  param <- list(
+    eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = n_threads
+  )
  b <- xgb.train(param, dm, 100)

  pred <- predict(b, dm, outputmargin = TRUE)
@@ -99,11 +102,13 @@ test_that("SHAP contribution values are not NAN", {
    verbose = 0,
    params = list(
      objective = "reg:squarederror",
-      eval_metric = "rmse"),
+      eval_metric = "rmse",
+      nthread = n_threads
+    ),
    data = as.matrix(subset(d, fold == 2)[, ivs]),
    label = subset(d, fold == 2)$y,
-    nthread = 1,
-    nrounds = 3)
+    nrounds = 3
+  )

  shaps <- as.data.frame(predict(fit,
    newdata = as.matrix(subset(d, fold == 1)[, ivs]),
@@ -116,8 +121,12 @@ test_that("SHAP contribution values are not NAN", {


 test_that("multiclass feature interactions work", {
-  dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
-  param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
+  dm <- xgb.DMatrix(
+    as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
+  )
+  param <- list(
+    eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads
+  )
  b <- xgb.train(param, dm, 40)
  pred <- t(
    array(
@@ -166,6 +175,7 @@ test_that("SHAP single sample works", {
    max_depth = 2,
    nrounds = 4,
    objective = "binary:logistic",
+    nthread = n_threads
  )

  predt <- predict(
--- a/R-package/tests/testthat/test_io.R
+++ b/R-package/tests/testthat/test_io.R
@@ -9,7 +9,8 @@ test_that("load/save raw works", {
  nrounds <- 8
  booster <- xgboost(
    data = train$data, label = train$label,
-    nrounds = nrounds, objective = "binary:logistic"
+    nrounds = nrounds, objective = "binary:logistic",
+    nthread = 2
  )

  json_bytes <- xgb.save.raw(booster, raw_format = "json")
--- a/R-package/tests/testthat/test_model_compatibility.R
+++ b/R-package/tests/testthat/test_model_compatibility.R
@@ -9,20 +9,20 @@ metadata <- list(
  kClasses = 3
 )

-run_model_param_check <- function (config) {
+run_model_param_check <- function(config) {
  testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')
  testthat::expect_equal(config$learner$learner_train_param$booster, 'gbtree')
 }

-get_num_tree <- function (booster) {
+get_num_tree <- function(booster) {
  dump <- xgb.dump(booster)
  m <- regexec('booster\\[[0-9]+\\]', dump, perl = TRUE)
  m <- regmatches(dump, m)
  num_tree <- Reduce('+', lapply(m, length))
-  return (num_tree)
+  return(num_tree)
 }

-run_booster_check <- function (booster, name) {
+run_booster_check <- function(booster, name) {
  # If given a handle, we need to call xgb.Booster.complete() prior to using xgb.config().
  if (inherits(booster, "xgb.Booster") && xgboost:::is.null.handle(booster$handle)) {
    booster <- xgb.Booster.complete(booster)
@@ -66,9 +66,9 @@ test_that("Models from previous versions of XGBoost can be loaded", {
  unzip(zipfile, exdir = extract_dir, overwrite = TRUE)
  model_dir <- file.path(extract_dir, 'models')

-  pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4))
+  pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4), nthread = 2)

-  lapply(list.files(model_dir), function (x) {
+  lapply(list.files(model_dir), function(x) {
    model_file <- file.path(model_dir, x)
    m <- regexec("xgboost-([0-9\\.]+)\\.([a-z]+)\\.[a-z]+", model_file, perl = TRUE)
    m <- regmatches(model_file, m)[[1]]
@@ -87,6 +87,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
        booster <- readRDS(model_file)
      } else {
        booster <- xgb.load(model_file)
+        xgb.parameters(booster) <- list(nthread = 2)
      }
      predict(booster, newdata = pred_data)
      run_booster_check(booster, name)
--- a/R-package/tests/testthat/test_parameter_exposure.R
+++ b/R-package/tests/testthat/test_parameter_exposure.R
@@ -3,8 +3,12 @@ context('Test model params and call are exposed to R')
 data(agaricus.train, package = 'xgboost')
 data(agaricus.test, package = 'xgboost')

-dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
+dtrain <- xgb.DMatrix(
+  agaricus.train$data, label = agaricus.train$label, nthread = 2
+)
+dtest <- xgb.DMatrix(
+  agaricus.test$data, label = agaricus.test$label, nthread = 2
+)

 bst <- xgboost(data = dtrain,
               max_depth = 2,
--- a/R-package/tests/testthat/test_poisson_regression.R
+++ b/R-package/tests/testthat/test_poisson_regression.R
@@ -4,8 +4,10 @@ set.seed(1994)

 test_that("Poisson regression works", {
  data(mtcars)
-  bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
-                objective = 'count:poisson', nrounds = 10, verbose = 0)
+  bst <- xgboost(
+    data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
+    objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2
+  )
  expect_equal(class(bst), "xgb.Booster")
  pred <- predict(bst, as.matrix(mtcars[, -11]))
  expect_equal(length(pred), 32)
--- a/R-package/tests/testthat/test_ranking.R
+++ b/R-package/tests/testthat/test_ranking.R
@@ -1,5 +1,7 @@
 context('Learning to rank')

+n_threads <- 2
+
 test_that('Test ranking with unweighted data', {
  X <- Matrix::sparseMatrix(
    i = c(2, 3, 7, 9, 12, 15, 17, 18)
@@ -9,10 +11,10 @@ test_that('Test ranking with unweighted data', {
  )
  y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
  group <- c(5, 5, 5, 5)
-  dtrain <- xgb.DMatrix(X, label = y, group = group)
+  dtrain <- xgb.DMatrix(X, label = y, group = group, nthread = n_threads)

  params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
-                 eval_metric = 'auc', eval_metric = 'aucpr')
+                 eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
  # Check if the metric is monotone increasing
  expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
@@ -29,10 +31,14 @@ test_that('Test ranking with weighted data', {
  y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
  group <- c(5, 5, 5, 5)
  weight <- c(1.0, 2.0, 3.0, 4.0)
-  dtrain <- xgb.DMatrix(X, label = y, group = group, weight = weight)
+  dtrain <- xgb.DMatrix(
+    X, label = y, group = group, weight = weight, nthread = n_threads
+  )

-  params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
-                 eval_metric = 'auc', eval_metric = 'aucpr')
+  params <- list(
+    eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
+    eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
+  )
  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
  # Check if the metric is monotone increasing
  expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
@@ -41,7 +47,7 @@ test_that('Test ranking with weighted data', {
    pred <- predict(bst, newdata = dtrain, ntreelimit = i)
    # is_sorted[i]: is i-th group correctly sorted by the ranking predictor?
    is_sorted <- lapply(seq(1, 20, by = 5),
-      function (k) {
+      function(k) {
        ind <- order(-pred[k:(k + 4)])
        z <- y[ind + (k - 1)]
        all(diff(z) <= 0)  # Check if z is monotone decreasing
--- a/R-package/tests/testthat/test_unicode.R
+++ b/R-package/tests/testthat/test_unicode.R
@@ -16,6 +16,7 @@ test_that("Can save and load models with Unicode paths", {
    path <- file.path(tmpdir, x)
    xgb.save(bst, path)
    bst2 <- xgb.load(path)
+    xgb.parameters(bst2) <- list(nthread = 2)
    expect_equal(predict(bst, test$data), predict(bst2, test$data))
  })
 })
--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@@ -2,8 +2,15 @@ context("update trees in an existing model")

 data(agaricus.train, package = 'xgboost')
 data(agaricus.test, package = 'xgboost')
-dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
+
+n_threads <- 1
+
+dtrain <- xgb.DMatrix(
+  agaricus.train$data, label = agaricus.train$label, nthread = n_threads
+)
+dtest <- xgb.DMatrix(
+  agaricus.test$data, label = agaricus.test$label, nthread = n_threads
+)

 # Disable flaky tests for 32-bit Windows.
 # See https://github.com/dmlc/xgboost/issues/3720
@@ -14,7 +21,7 @@ test_that("updating the model works", {

  # no-subsampling
  p1 <- list(
-    objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2,
+    objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = n_threads,
    updater = "grow_colmaker,prune"
  )
  set.seed(11)
@@ -86,9 +93,11 @@ test_that("updating the model works", {
 })

 test_that("updating works for multiclass & multitree", {
-  dtr <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
+  dtr <- xgb.DMatrix(
+    as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
+  )
  watchlist <- list(train = dtr)
-  p0 <- list(max_depth = 2, eta = 0.5, nthread = 2, subsample = 0.6,
+  p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
             objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
             base_score = 0)
  set.seed(121)
--- a/R-package/vignettes/discoverYourData.Rmd
+++ b/R-package/vignettes/discoverYourData.Rmd
@@ -31,6 +31,8 @@ require(data.table)
 if (!require('vcd')) {
  install.packages('vcd')
 }
+
+data.table::setDTthreads(2)
 ```

 > **VCD** package is used for one of its embedded dataset only.
@@ -297,23 +299,25 @@ test <- agaricus.test

 #Random Forest - 1000 trees
 bst <- xgboost(
-    data = train$data
-    , label = train$label
-    , max_depth = 4
-    , num_parallel_tree = 1000
-    , subsample = 0.5
-    , colsample_bytree = 0.5
-    , nrounds = 1
-    , objective = "binary:logistic"
+    data = train$data,
+    label = train$label,
+    max_depth = 4,
+    num_parallel_tree = 1000,
+    subsample = 0.5,
+    colsample_bytree = 0.5,
+    nrounds = 1,
+    objective = "binary:logistic",
+    nthread = 2
 )

 #Boosting - 3 rounds
 bst <- xgboost(
-    data = train$data
-    , label = train$label
-    , max_depth = 4
-    , nrounds = 3
-    , objective = "binary:logistic"
+    data = train$data,
+    label = train$label,
+    max_depth = 4,
+    nrounds = 3,
+    objective = "binary:logistic",
+    nthread = 2
 )
 ```

--- a/R-package/vignettes/xgboost.Rnw
+++ b/R-package/vignettes/xgboost.Rnw
@@ -86,9 +86,10 @@ data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
 bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
-               nrounds = 2, objective = "binary:logistic")
+               nrounds = 2, objective = "binary:logistic", nthread = 2)
 xgb.save(bst, 'model.save')
 bst = xgb.load('model.save')
+xgb.parameters(bst) <- list(nthread = 2)
 pred <- predict(bst, test$data)
@

@@ -127,7 +128,7 @@ training from initial prediction value, weighted training instance.

 We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
 <<xgb.DMatrix>>=
-dtrain <- xgb.DMatrix(train$data, label = train$label)
+dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)
 class(dtrain)
 head(getinfo(dtrain,'label'))
@
@@ -161,9 +162,9 @@ evalerror <- function(preds, dtrain) {
  return(list(metric = "MSE", value = err))
 }

-dtest <- xgb.DMatrix(test$data, label = test$label)
+dtest <- xgb.DMatrix(test$data, label = test$label, nthread = 2)
 watchlist <- list(eval = dtest, train = dtrain)
-param <- list(max_depth = 2, eta = 1)
+param <- list(max_depth = 2, eta = 1, nthread = 2)

 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
@
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -173,13 +173,13 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**

 ```{r trainingDense, message=F, warning=F}
 bstDense <- xgboost(
-    data = as.matrix(train$data)
-    , label = train$label
-    , max_depth = 2
-    , eta = 1
-    , nthread = 2
-    , nrounds = 2
-    , objective = "binary:logistic"
+    data = as.matrix(train$data),
+    label = train$label,
+    max_depth = 2,
+    eta = 1,
+    nthread = 2,
+    nrounds = 2,
+    objective = "binary:logistic"
 )
 ```

@@ -188,14 +188,14 @@ bstDense <- xgboost(
 **XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.

 ```{r trainingDmatrix, message=F, warning=F}
-dtrain <- xgb.DMatrix(data = train$data, label = train$label)
+dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
 bstDMatrix <- xgboost(
-    data = dtrain
-    , max_depth = 2
-    , eta = 1
-    , nthread = 2
-    , nrounds = 2
-    , objective = "binary:logistic"
+    data = dtrain,
+    max_depth = 2,
+    eta = 1,
+    nthread = 2,
+    nrounds = 2,
+    objective = "binary:logistic"
 )
 ```

@@ -314,8 +314,8 @@ Most of the features below have been implemented to help you to improve your mod
 For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.

 ```{r DMatrix, message=F, warning=F}
-dtrain <- xgb.DMatrix(data = train$data, label = train$label)
-dtest <- xgb.DMatrix(data = test$data, label = test$label)
+dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
+dtest <- xgb.DMatrix(data = test$data, label = test$label, nthread = 2)
 ```

 ### Measure learning progress with xgb.train
@@ -476,6 +476,7 @@ An interesting test to see how identical our saved model is to the original one
 ```{r loadModel, message=F, warning=F}
 # load binary model to R
 bst2 <- xgb.load("xgboost.model")
+xgb.parameters(bst2) <- list(nthread = 2)
 pred2 <- predict(bst2, test$data)

 # And now the test
@@ -500,6 +501,7 @@ print(class(rawVec))

 # load binary model to R
 bst3 <- xgb.load(rawVec)
+xgb.parameters(bst3) <- list(nthread = 2)
 pred3 <- predict(bst3, test$data)

 # pred2 should be identical to pred
--- a/R-package/vignettes/xgboostfromJSON.Rmd
+++ b/R-package/vignettes/xgboostfromJSON.Rmd
@@ -175,7 +175,7 @@ bst_preds == bst_from_json_preds

 None are exactly equal again.  What is going on here?  Well, since we are using the value `1` in the calculations, we have introduced a double into the calculation.  Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used.  On the other hand, xgboost uses the 32-bit version of the exponentiation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).

-How do we fix this?  We have to ensure we use the correct data types everywhere and the correct operators.  If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied. 
+How do we fix this?  We have to ensure we use the correct data types everywhere and the correct operators.  If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
 ```{r}
 # calculate the predictions casting doubles to floats
 bst_from_json_preds <- ifelse(
--- a/cmake/Doc.cmake
+++ b/cmake/Doc.cmake
@@ -1,16 +1,17 @@
-function (run_doxygen)
+function(run_doxygen)
  find_package(Doxygen REQUIRED)

-  if (NOT DOXYGEN_DOT_FOUND)
+  if(NOT DOXYGEN_DOT_FOUND)
    message(FATAL_ERROR "Command `dot` not found.  Please install graphviz.")
-  endif (NOT DOXYGEN_DOT_FOUND)
+  endif()

  configure_file(
    ${xgboost_SOURCE_DIR}/doc/Doxyfile.in
    ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
-  add_custom_target( doc_doxygen ALL
+  add_custom_target(
+    doc_doxygen ALL
    COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
    COMMENT "Generate C APIs documentation."
    VERBATIM)
-endfunction (run_doxygen)
+endfunction()
--- a/cmake/FindPrefetchIntrinsics.cmake
+++ b/cmake/FindPrefetchIntrinsics.cmake
@@ -1,4 +1,4 @@
-function (find_prefetch_intrinsics)
+function(find_prefetch_intrinsics)
  include(CheckCXXSourceCompiles)
  check_cxx_source_compiles("
  #include <xmmintrin.h>
@@ -19,4 +19,4 @@ function (find_prefetch_intrinsics)
  " XGBOOST_BUILTIN_PREFETCH_PRESENT)
  set(XGBOOST_MM_PREFETCH_PRESENT ${XGBOOST_MM_PREFETCH_PRESENT} PARENT_SCOPE)
  set(XGBOOST_BUILTIN_PREFETCH_PRESENT ${XGBOOST_BUILTIN_PREFETCH_PRESENT} PARENT_SCOPE)
-endfunction (find_prefetch_intrinsics)
+endfunction()
--- a/cmake/Sanitizer.cmake
+++ b/cmake/Sanitizer.cmake
@@ -12,9 +12,9 @@ macro(enable_sanitizer sanitizer)
  elseif(${sanitizer} MATCHES "thread")
    find_package(TSan)
    set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=thread")
-    if (TSan_FOUND)
+    if(TSan_FOUND)
      link_libraries(${TSan_LIBRARY})
-    endif (TSan_FOUND)
+    endif()

  elseif(${sanitizer} MATCHES "leak")
    find_package(LSan)
@@ -33,16 +33,16 @@ macro(enable_sanitizers SANITIZERS)
  # Check sanitizers compatibility.
  # Idealy, we should use if(san IN_LIST SANITIZERS) ... endif()
  # But I haven't figure out how to make it work.
-  foreach ( _san ${SANITIZERS} )
+  foreach( _san ${SANITIZERS} )
    string(TOLOWER ${_san} _san)
-    if (_san MATCHES "thread")
-      if (${_use_other_sanitizers})
+    if(_san MATCHES "thread")
+      if(${_use_other_sanitizers})
        message(FATAL_ERROR
          "thread sanitizer is not compatible with ${_san} sanitizer.")
      endif()
      set(_use_thread_sanitizer 1)
-    else ()
-      if (${_use_thread_sanitizer})
+    else()
+      if(${_use_thread_sanitizer})
        message(FATAL_ERROR
          "${_san} sanitizer is not compatible with thread sanitizer.")
      endif()
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -11,7 +11,7 @@ function(auto_source_group SOURCES)

      source_group("${GROUP}" FILES "${FILE}")
  endforeach()
-endfunction(auto_source_group)
+endfunction()

 # Force static runtime for MSVC
 function(msvc_use_static_runtime)
@@ -50,7 +50,7 @@ function(msvc_use_static_runtime)
          endif()
      endforeach()
  endif()
-endfunction(msvc_use_static_runtime)
+endfunction()

 # Set output directory of target, ignoring debug or release
 function(set_output_directory target dir)
@@ -70,17 +70,17 @@ function(set_output_directory target dir)
    ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${dir}
    ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO ${dir}
    ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL ${dir})
-endfunction(set_output_directory)
+endfunction()

 # Set a default build type to release if none was specified
 function(set_default_configuration_release)
    if(CMAKE_CONFIGURATION_TYPES STREQUAL "Debug;Release;MinSizeRel;RelWithDebInfo") # multiconfig generator?
        set(CMAKE_CONFIGURATION_TYPES Release CACHE STRING "" FORCE)
-	elseif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
-	  message(STATUS "Setting build type to 'Release' as none was specified.")
-	  set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE )
-	endif()
-endfunction(set_default_configuration_release)
+    elseif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+      message(STATUS "Setting build type to 'Release' as none was specified.")
+      set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
+    endif()
+endfunction()

 # Generate nvcc compiler flags given a list of architectures
 # Also generates PTX for the most recent architecture for forwards compatibility
@@ -90,9 +90,9 @@ function(format_gencode_flags flags out)
  endif()
  # Set up architecture flags
  if(NOT flags)
-    if (CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
      set(flags "50;60;70;80;90")
-    elseif (CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
+    elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
      set(flags "50;60;70;80")
    elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
      set(flags "35;50;60;70")
@@ -103,7 +103,7 @@ function(format_gencode_flags flags out)
    endif()
  endif()

-  if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
+  if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
    cmake_policy(SET CMP0104 NEW)
    list(GET flags -1 latest_arch)
    list(TRANSFORM flags APPEND "-real")
@@ -121,8 +121,8 @@ function(format_gencode_flags flags out)
    set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};")
    set(${out} "${${out}}" PARENT_SCOPE)
    message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
-  endif (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
-endfunction(format_gencode_flags flags)
+  endif()
+endfunction()

 # Set CUDA related flags to target.  Must be used after code `format_gencode_flags`.
 function(xgboost_set_cuda_flags target)
@@ -133,35 +133,35 @@ function(xgboost_set_cuda_flags target)
    $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
    $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)

-  if (USE_PER_THREAD_DEFAULT_STREAM)
+  if(USE_PER_THREAD_DEFAULT_STREAM)
    target_compile_options(${target} PRIVATE
            $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>)
-  endif (USE_PER_THREAD_DEFAULT_STREAM)
+  endif()

-  if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
+  if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
    set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
-  endif (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
+  endif()

-  if (FORCE_COLORED_OUTPUT)
-    if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
+  if(FORCE_COLORED_OUTPUT)
+    if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
        ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
          (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
      target_compile_options(${target} PRIVATE
        $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fdiagnostics-color=always>)
    endif()
-  endif (FORCE_COLORED_OUTPUT)
+  endif()

-  if (USE_DEVICE_DEBUG)
+  if(USE_DEVICE_DEBUG)
    target_compile_options(${target} PRIVATE
      $<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-G;-src-in-ptx>)
-  else (USE_DEVICE_DEBUG)
+  else()
    target_compile_options(${target} PRIVATE
      $<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>)
-  endif (USE_DEVICE_DEBUG)
+  endif()

-  if (USE_NVTX)
+  if(USE_NVTX)
    target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
-  endif (USE_NVTX)
+  endif()

  target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
  target_include_directories(
@@ -169,17 +169,17 @@ function(xgboost_set_cuda_flags target)
    ${xgboost_SOURCE_DIR}/gputreeshap
    ${CUDAToolkit_INCLUDE_DIRS})

-  if (MSVC)
+  if(MSVC)
    target_compile_options(${target} PRIVATE
      $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>)
-  endif (MSVC)
+  endif()

  set_target_properties(${target} PROPERTIES
    CUDA_STANDARD 17
    CUDA_STANDARD_REQUIRED ON
    CUDA_SEPARABLE_COMPILATION OFF
    CUDA_RUNTIME_LIBRARY Static)
-endfunction(xgboost_set_cuda_flags)
+endfunction()

 # Set HIP related flags to target.
 function(xgboost_set_hip_flags target)
@@ -199,16 +199,16 @@ function(xgboost_set_hip_flags target)
 endfunction(xgboost_set_hip_flags)

 macro(xgboost_link_nccl target)
-  if (BUILD_STATIC_LIB)
+  if(BUILD_STATIC_LIB)
    target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR})
    target_compile_definitions(${target} PUBLIC -DXGBOOST_USE_NCCL=1)
    target_link_libraries(${target} PUBLIC ${NCCL_LIBRARY})
-  else ()
+  else()
    target_include_directories(${target} PRIVATE ${NCCL_INCLUDE_DIR})
    target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NCCL=1)
    target_link_libraries(${target} PRIVATE ${NCCL_LIBRARY})
-  endif (BUILD_STATIC_LIB)
-endmacro(xgboost_link_nccl)
+  endif()
+endmacro()

 # compile options
 macro(xgboost_target_properties target)
@@ -217,110 +217,106 @@ macro(xgboost_target_properties target)
    CXX_STANDARD_REQUIRED ON
    POSITION_INDEPENDENT_CODE ON)

-  if (HIDE_CXX_SYMBOLS)
+  if(HIDE_CXX_SYMBOLS)
    #-- Hide all C++ symbols
    set_target_properties(${target} PROPERTIES
      C_VISIBILITY_PRESET hidden
      CXX_VISIBILITY_PRESET hidden
      CUDA_VISIBILITY_PRESET hidden
    )
-  endif (HIDE_CXX_SYMBOLS)
+  endif()

-  if (ENABLE_ALL_WARNINGS)
+  if(ENABLE_ALL_WARNINGS)
    target_compile_options(${target} PUBLIC
      $<IF:$<COMPILE_LANGUAGE:CUDA>,
      -Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
      -Wall -Wextra -Wno-expansion-to-defined>
    )
-    target_compile_options(${target} PUBLIC
-       $<IF:$<COMPILE_LANGUAGE:HIP>,
-      -Wall -Wextra >
-    )
-  endif(ENABLE_ALL_WARNINGS)
+  endif()

  target_compile_options(${target}
    PRIVATE
    $<$<AND:$<CXX_COMPILER_ID:MSVC>,$<COMPILE_LANGUAGE:CXX>>:/MP>
    $<$<AND:$<NOT:$<CXX_COMPILER_ID:MSVC>>,$<COMPILE_LANGUAGE:CXX>>:-funroll-loops>)

-  if (MSVC)
+  if(MSVC)
    target_compile_options(${target} PRIVATE
      $<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>
      -D_CRT_SECURE_NO_WARNINGS
      -D_CRT_SECURE_NO_DEPRECATE
    )
-  endif (MSVC)
+  endif()

-  if (WIN32 AND MINGW)
+  if(WIN32 AND MINGW)
    target_compile_options(${target} PUBLIC -static-libstdc++)
-  endif (WIN32 AND MINGW)
-endmacro(xgboost_target_properties)
+  endif()
+endmacro()

 # Custom definitions used in xgboost.
 macro(xgboost_target_defs target)
-  if (NOT ${target} STREQUAL "dmlc") # skip dmlc core for custom logging.
+  if(NOT ${target} STREQUAL "dmlc") # skip dmlc core for custom logging.
    target_compile_definitions(${target}
      PRIVATE
      -DDMLC_LOG_CUSTOMIZE=1
      $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:_MWAITXINTRIN_H_INCLUDED>)
-  endif ()
-  if (USE_DEBUG_OUTPUT)
+  endif()
+  if(USE_DEBUG_OUTPUT)
    target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_DEBUG_OUTPUT=1)
-  endif (USE_DEBUG_OUTPUT)
-  if (XGBOOST_MM_PREFETCH_PRESENT)
+  endif()
+  if(XGBOOST_MM_PREFETCH_PRESENT)
    target_compile_definitions(${target}
      PRIVATE
      -DXGBOOST_MM_PREFETCH_PRESENT=1)
-  endif(XGBOOST_MM_PREFETCH_PRESENT)
-  if (XGBOOST_BUILTIN_PREFETCH_PRESENT)
+  endif()
+  if(XGBOOST_BUILTIN_PREFETCH_PRESENT)
    target_compile_definitions(${target}
      PRIVATE
      -DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
-  endif (XGBOOST_BUILTIN_PREFETCH_PRESENT)
+  endif()

-  if (PLUGIN_RMM)
+  if(PLUGIN_RMM)
    target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
-  endif (PLUGIN_RMM)
-endmacro(xgboost_target_defs)
+  endif()
+endmacro()

 # handles dependencies
 macro(xgboost_target_link_libraries target)
-  if (BUILD_STATIC_LIB)
+  if(BUILD_STATIC_LIB)
    target_link_libraries(${target} PUBLIC Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
  else()
    target_link_libraries(${target} PRIVATE Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
-  endif (BUILD_STATIC_LIB)
+  endif()

-  if (USE_OPENMP)
-    if (BUILD_STATIC_LIB)
+  if(USE_OPENMP)
+    if(BUILD_STATIC_LIB)
      target_link_libraries(${target} PUBLIC OpenMP::OpenMP_CXX)
    else()
      target_link_libraries(${target} PRIVATE OpenMP::OpenMP_CXX)
-    endif (BUILD_STATIC_LIB)
-  endif (USE_OPENMP)
+    endif()
+  endif()

-  if (USE_CUDA)
+  if(USE_CUDA)
    xgboost_set_cuda_flags(${target})
    target_link_libraries(${target} PUBLIC CUDA::cudart_static)
-  endif (USE_CUDA)
+  endif()

  if (USE_HIP)
    xgboost_set_hip_flags(${target})
  endif (USE_HIP)

-  if (PLUGIN_RMM)
+  if(PLUGIN_RMM)
    target_link_libraries(${target} PRIVATE rmm::rmm)
-  endif (PLUGIN_RMM)
+  endif()

-  if (USE_NCCL)
+  if(USE_NCCL)
    xgboost_link_nccl(${target})
-  endif (USE_NCCL)
+  endif()

-  if (USE_NVTX)
+  if(USE_NVTX)
    target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
-  endif (USE_NVTX)
+  endif()

-  if (MINGW)
+  if(MINGW)
    target_link_libraries(${target} PRIVATE wsock32 ws2_32)
-  endif (MINGW)
-endmacro(xgboost_target_link_libraries)
+  endif()
+endmacro()
--- a/cmake/Version.cmake
+++ b/cmake/Version.cmake
@@ -1,6 +1,6 @@
-function (write_version)
+function(write_version)
  message(STATUS "xgboost VERSION: ${xgboost_VERSION}")
  configure_file(
    ${xgboost_SOURCE_DIR}/cmake/version_config.h.in
    ${xgboost_SOURCE_DIR}/include/xgboost/version_config.h @ONLY)
-endfunction (write_version)
+endfunction()
--- a/cmake/modules/FindLibR.cmake
+++ b/cmake/modules/FindLibR.cmake
@@ -66,7 +66,7 @@ function(create_rlib_for_msvc)
  execute_process(COMMAND ${DLLTOOL_EXE}
    "--input-def" "${CMAKE_CURRENT_BINARY_DIR}/R.def"
    "--output-lib" "${CMAKE_CURRENT_BINARY_DIR}/R.lib")
-endfunction(create_rlib_for_msvc)
+endfunction()


 # detection for OSX
--- a/cmake/modules/FindNVML.cmake
+++ b/cmake/modules/FindNVML.cmake
@@ -1,6 +1,6 @@
-if (NVML_LIBRARY)
+if(NVML_LIBRARY)
  unset(NVML_LIBRARY CACHE)
-endif(NVML_LIBRARY)
+endif()

 set(NVML_LIB_NAME nvml)

--- a/cmake/modules/FindNccl.cmake
+++ b/cmake/modules/FindNccl.cmake
@@ -35,20 +35,20 @@
 #
 # This module assumes that the user has already called find_package(CUDA)

-if (NCCL_LIBRARY)
+if(NCCL_LIBRARY)
  if(NOT USE_NCCL_LIB_PATH)
    # Don't cache NCCL_LIBRARY to enable switching between static and shared.
    unset(NCCL_LIBRARY CACHE)
-  endif(NOT USE_NCCL_LIB_PATH)
+  endif()
 endif()

-if (BUILD_WITH_SHARED_NCCL)
+if(BUILD_WITH_SHARED_NCCL)
  # libnccl.so
  set(NCCL_LIB_NAME nccl)
-else ()
+else()
  # libnccl_static.a
  set(NCCL_LIB_NAME nccl_static)
-endif (BUILD_WITH_SHARED_NCCL)
+endif()

 find_path(NCCL_INCLUDE_DIR
  NAMES nccl.h
--- a/demo/c-api/basic/CMakeLists.txt
+++ b/demo/c-api/basic/CMakeLists.txt
@@ -3,11 +3,11 @@ find_package(xgboost REQUIRED)

 # xgboost is built as static libraries, all cxx dependencies need to be linked into the
 # executable.
-if (XGBOOST_BUILD_STATIC_LIB)
+if(XGBOOST_BUILD_STATIC_LIB)
  enable_language(CXX)
  # find again for those  cxx libraries.
  find_package(xgboost REQUIRED)
-endif(XGBOOST_BUILD_STATIC_LIB)
+endif()

 add_executable(api-demo c-api-demo.c)
 target_link_libraries(api-demo PRIVATE xgboost::xgboost)
--- a/demo/c-api/inference/CMakeLists.txt
+++ b/demo/c-api/inference/CMakeLists.txt
@@ -4,11 +4,11 @@ find_package(xgboost REQUIRED)

 # xgboost is built as static libraries, all cxx dependencies need to be linked into the
 # executable.
-if (XGBOOST_BUILD_STATIC_LIB)
+if(XGBOOST_BUILD_STATIC_LIB)
  enable_language(CXX)
  # find again for those  cxx libraries.
  find_package(xgboost REQUIRED)
-endif(XGBOOST_BUILD_STATIC_LIB)
+endif()

 add_executable(inference-demo inference.c)
 target_link_libraries(inference-demo PRIVATE xgboost::xgboost)
--- a/demo/guide-python/callbacks.py
+++ b/demo/guide-python/callbacks.py
@@ -104,7 +104,7 @@ def check_point_callback():
        # Use callback class from xgboost.callback
        # Feel free to subclass/customize it to suit your need.
        check_point = xgb.callback.TrainingCheckPoint(
-            directory=tmpdir, iterations=rounds, name="model"
+            directory=tmpdir, interval=rounds, name="model"
        )
        xgb.train(
            {"objective": "binary:logistic"},
@@ -118,7 +118,7 @@ def check_point_callback():
        # This version of checkpoint saves everything including parameters and
        # model.  See: doc/tutorials/saving_model.rst
        check_point = xgb.callback.TrainingCheckPoint(
-            directory=tmpdir, iterations=rounds, as_pickle=True, name="model"
+            directory=tmpdir, interval=rounds, as_pickle=True, name="model"
        )
        xgb.train(
            {"objective": "binary:logistic"},
--- a/demo/kaggle-higgs/higgs-train.R
+++ b/demo/kaggle-higgs/higgs-train.R
@@ -24,8 +24,8 @@ param <- list("objective" = "binary:logitraw",
              "nthread" = 16)
 watchlist <- list("train" = xgmat)
 nrounds <- 120
-print ("loading data end, start to boost trees")
+print("loading data end, start to boost trees")
 bst <- xgb.train(param, xgmat, nrounds, watchlist)
 # save out model
 xgb.save(bst, "higgs.model")
-print ('finish training')
+print('finish training')
--- a/demo/kaggle-higgs/speedtest.R
+++ b/demo/kaggle-higgs/speedtest.R
@@ -39,11 +39,11 @@ for (i in seq_along(threads)){
                  "nthread" = thread)
    watchlist <- list("train" = xgmat)
    nrounds <- 120
-    print ("loading data end, start to boost trees")
+    print("loading data end, start to boost trees")
    bst <- xgb.train(param, xgmat, nrounds, watchlist)
    # save out model
    xgb.save(bst, "higgs.model")
-    print ('finish training')
+    print('finish training')
  })
 }

--- a/demo/nvflare/horizontal/README.md
+++ b/demo/nvflare/horizontal/README.md
@@ -85,8 +85,8 @@ shutdown server
 ## Training with GPUs

 To demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs.
-Build XGBoost with the federated learning plugin enabled along with CUDA, but with NCCL
-turned off (see the [README](../../plugin/federated/README.md)).
+Build XGBoost with the federated learning plugin enabled along with CUDA
+(see the [README](../../plugin/federated/README.md)).

-Modify `config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
+Modify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
 above.
--- a/demo/nvflare/horizontal/custom/trainer.py
+++ b/demo/nvflare/horizontal/custom/trainer.py
@@ -67,7 +67,7 @@ class XGBoostTrainer(Executor):
            dtest = xgb.DMatrix('agaricus.txt.test?format=libsvm')

            # Specify parameters via map, definition are same as c++ version
-            param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
+            param = {'tree_method': 'hist', 'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
            if self._use_gpus:
                self.log_info(fl_ctx, f'Training with GPU {rank}')
                param['device'] = f"cuda:{rank}"
--- a/demo/nvflare/vertical/README.md
+++ b/demo/nvflare/vertical/README.md
@@ -56,4 +56,9 @@ shutdown server

 ## Training with GPUs

-Currently GPUs are not yet supported by vertical federated XGBoost.
+To demo with Vertical Federated Learning using GPUs, make sure your machine has at least 2 GPUs.
+Build XGBoost with the federated learning plugin enabled along with CUDA
+(see the [README](../../plugin/federated/README.md)).
+
+Modify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
+above.
--- a/demo/nvflare/vertical/custom/trainer.py
+++ b/demo/nvflare/vertical/custom/trainer.py
@@ -77,13 +77,14 @@ class XGBoostTrainer(Executor):
                'gamma': 1.0,
                'max_depth': 8,
                'min_child_weight': 100,
-                'tree_method': 'approx',
+                'tree_method': 'hist',
                'grow_policy': 'depthwise',
                'objective': 'binary:logistic',
                'eval_metric': 'auc',
            }
            if self._use_gpus:
-                self.log_info(fl_ctx, 'GPUs are not currently supported by vertical federated XGBoost')
+                self.log_info(fl_ctx, f'Training with GPU {rank}')
+                param['device'] = f"cuda:{rank}"

            # specify validations set to watch performance
            watchlist = [(dtest, "eval"), (dtrain, "train")]
--- a/dev/release-artifacts.py
+++ b/dev/release-artifacts.py
@@ -250,8 +250,8 @@ echo "<hash> <artifact>" | shasum -a 256 --check
 ```

 **Experimental binary packages for R with CUDA enabled**
-* xgboost_r_gpu_linux_1.7.5.tar.gz: [Download]({r_gpu_linux_url})
-* xgboost_r_gpu_win64_1.7.5.tar.gz: [Download]({r_gpu_win64_url})
+* xgboost_r_gpu_linux_{release}.tar.gz: [Download]({r_gpu_linux_url})
+* xgboost_r_gpu_win64_{release}.tar.gz: [Download]({r_gpu_win64_url})

 **Source tarball**
 * xgboost.tar.gz: [Download]({src_tarball})"""
@@ -296,12 +296,13 @@ def main(args: argparse.Namespace) -> None:
    git.submodule("update")
    commit_hash = latest_hash()

-    if not os.path.exists(args.outdir):
-        os.mkdir(args.outdir)
+    outdir = os.path.abspath(args.outdir)
+    if not os.path.exists(outdir):
+        os.mkdir(outdir)

    # source tarball
    hashes: List[str] = []
-    tarname, h = make_src_package(release, args.outdir)
+    tarname, h = make_src_package(release, outdir)
    hashes.append(h)

    # CUDA R packages
@@ -310,18 +311,18 @@ def main(args: argparse.Namespace) -> None:
        branch,
        "" if rc is None else rc + str(rc_ver),
        commit_hash,
-        args.outdir,
+        outdir,
    )
    hashes.extend(hr)

    # Python source wheel
-    make_pysrc_wheel(release, rc, rc_ver, args.outdir)
+    make_pysrc_wheel(release, rc, rc_ver, outdir)

    # Python binary wheels
-    download_py_packages(branch, major, minor, commit_hash, args.outdir)
+    download_py_packages(branch, major, minor, commit_hash, outdir)

    # Write end note
-    release_note(release, hashes, urls, tarname, args.outdir)
+    release_note(release, hashes, urls, tarname, outdir)


 if __name__ == "__main__":
--- a/doc/contrib/coding_guide.rst
+++ b/doc/contrib/coding_guide.rst
@@ -80,6 +80,24 @@ R package versioning
 ====================
 See :ref:`release`.

+Testing R package with different compilers
+==========================================
+
+You can change the default compiler of R by changing the configuration file in home
+directory. For instance, if you want to test XGBoost built with clang++ instead of g++ on
+Linux, put the following in your ``~/.R/Makevars`` file:
+
+.. code-block:: sh
+
+  CC=clang-15
+  CXX17=clang++-15
+
+Be aware that the variable name should match with the name used by ``R CMD``:
+
+.. code-block:: sh
+
+  R CMD config CXX17
+
 Registering native routines in R
 ================================
 According to `R extension manual <https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines>`_,
--- a/doc/contrib/python_packaging.rst
+++ b/doc/contrib/python_packaging.rst
@@ -35,7 +35,7 @@ Building sdists

 In the case of XGBoost, an sdist contains both the Python code as well as
 the C++ code, so that the core part of XGBoost can be compiled into the
-shared libary ``libxgboost.so`` [#shared_lib_name]_.
+shared library ``libxgboost.so`` [#shared_lib_name]_.

 You can obtain an sdist as follows:

--- a/doc/contrib/unit_tests.rst
+++ b/doc/contrib/unit_tests.rst
@@ -16,7 +16,14 @@ Adding a new unit test

 Python package: pytest
 ======================
-Add your test under the directory `tests/python/ <https://github.com/dmlc/xgboost/tree/master/tests/python>`_ or `tests/python-gpu/ <https://github.com/dmlc/xgboost/tree/master/tests/python-gpu>`_ (if you are testing GPU code). Refer to `the PyTest tutorial <https://docs.pytest.org/en/latest/getting-started.html>`_ to learn how to write tests for Python code.
+Add your test under the directories
+
+- `tests/python/ <https://github.com/dmlc/xgboost/tree/master/tests/python>`_
+- `tests/python-gpu/ <https://github.com/dmlc/xgboost/tree/master/tests/python-gpu>`_ (if you are testing GPU code)
+- `tests/test_distributed <https://github.com/dmlc/xgboost/tree/master/tests/test_distributed>`_. (if a distributed framework is used)
+
+Refer to `the PyTest tutorial <https://docs.pytest.org/en/latest/getting-started.html>`_
+to learn how to write tests for Python code.

 You may try running your test by following instructions in :ref:`this section <running_pytest>`.

@@ -56,19 +63,26 @@ Run

 .. code-block:: bash

-  make Rcheck
+  python ./tests/ci_build/test_r_package.py --task=check

-at the root of the project directory.
+at the root of the project directory. The command builds and checks the XGBoost
+r-package. Alternatively, if you want to just run the tests, you can use the following
+commands after installing XGBoost:
+
+.. code-block:: bash
+
+  cd R-package/tests/
+  Rscript testthat.R

 .. _running_jvm_tests:

 JVM packages
 ============
-As part of the building process, tests are run:
+Maven is used

 .. code-block:: bash

-  mvn package
+  mvn test

 .. _running_pytest:

@@ -99,6 +113,14 @@ In addition, to test CUDA code, run:

 (For this step, you should have compiled XGBoost with CUDA enabled.)

+For testing with distributed frameworks like ``Dask`` and ``PySpark``:
+
+.. code:: bash
+
+  # Tell Python where to find XGBoost module
+  export PYTHONPATH=./python-package
+  pytest -v -s --fulltrace tests/test_distributed
+
 .. _running_gtest:

 C++: Google Test
@@ -110,21 +132,13 @@ To build and run C++ unit tests enable tests while running CMake:

  mkdir build
  cd build
-  cmake -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON  ..
-  make
-  make test
+  cmake -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_CUDA=ON -DUSE_NCCL=ON ..
+  ninja
+  ./testxgboost

-To enable tests for CUDA code, add ``-DUSE_CUDA=ON`` and ``-DUSE_NCCL=ON`` (CUDA toolkit required):
-
-.. code-block:: bash
-
-  mkdir build
-  cd build
-  cmake -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_CUDA=ON -DUSE_NCCL=ON ..
-  make
-  make test
-
-One can also run all unit test using ctest tool which provides higher flexibility. For example:
+Flags like ``USE_CUDA``, ``USE_DMLC_GTEST`` are optional. For more info about how to build
+XGBoost from source, see :doc:`</build>`. One can also run all unit test using ctest tool
+which provides higher flexibility. For example:

 .. code-block:: bash

@@ -157,14 +171,14 @@ sanitizer is not compatible with the other two sanitizers.

 .. code-block:: bash

-  cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS="address;leak" /path/to/xgboost
+  cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS="address;undefined" /path/to/xgboost

 By default, CMake will search regular system paths for sanitizers, you can also
 supply a specified SANITIZER_PATH.

 .. code-block:: bash

-  cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS="address;leak" \
+  cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS="address;undefined" \
  -DSANITIZER_PATH=/path/to/sanitizers /path/to/xgboost

 How to use sanitizers with CUDA support
@@ -181,7 +195,7 @@ environment variable:
 Other sanitizer runtime options
 ===============================

-By default undefined sanitizer doesn't print out the backtrace.  You can enable it by
+By default undefined sanitizer doesn't print out the backtrace. You can enable it by
 exporting environment variable:

 .. code-block::
--- a/doc/prediction.rst
+++ b/doc/prediction.rst
@@ -146,3 +146,48 @@ instance we might accidentally call ``clf.set_params()`` inside a predict functi

    with ThreadPoolExecutor(max_workers=10) as e:
        e.submit(predict_fn, ...)
+
+*****************************
+Privacy-Preserving Prediction
+*****************************
+
+`Concrete ML`_ is a third-party open-source library developed by `Zama`_ that proposes gradient
+boosting classes similar to ours, but predicting directly over encrypted data, thanks to
+Fully Homomorphic Encryption. A simple example would be as follows:
+
+.. code-block:: python
+
+    from sklearn.datasets import make_classification
+    from sklearn.model_selection import train_test_split
+    from concrete.ml.sklearn import XGBClassifier
+
+    x, y = make_classification(n_samples=100, class_sep=2, n_features=30, random_state=42)
+    X_train, X_test, y_train, y_test = train_test_split(
+        x, y, test_size=10, random_state=42
+    )
+
+    # Train in the clear and quantize the weights
+    model = XGBClassifier()
+    model.fit(X_train, y_train)
+
+    # Simulate the predictions in the clear
+    y_pred_clear = model.predict(X_test)
+
+    # Compile in FHE
+    model.compile(X_train)
+
+    # Generate keys
+    model.fhe_circuit.keygen()
+
+    # Run the inference on encrypted inputs!
+    y_pred_fhe = model.predict(X_test, fhe="execute")
+
+    print("In clear  :", y_pred_clear)
+    print("In FHE    :", y_pred_fhe)
+    print(f"Similarity: {int((y_pred_fhe == y_pred_clear).mean()*100)}%")
+
+More information and examples are given in the `Concrete ML documentation`_.
+
+.. _Zama: https://www.zama.ai/
+.. _Concrete ML: https://github.com/zama-ai/concrete-ml
+.. _Concrete ML documentation: https://docs.zama.ai/concrete-ml
--- a/doc/python/python_intro.rst
+++ b/doc/python/python_intro.rst
@@ -172,9 +172,8 @@ Support Matrix
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
 | modin.Series            | NPA       | FF                | NPA       | NPA       | FF                 |             |
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| pyarrow.Table           | T         | F                 |           | NPA       | FF                 |             |
+| pyarrow.Table           | NPA       | NPA               | NPA       | NPA       | NPA                | NPA         |
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| pyarrow.dataset.Dataset | T         | F                 |           |           | F                  |             |
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
 | _\_array\_\_            | NPA       | F                 | NPA       | NPA       | H                  |             |
 +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
--- a/doc/tutorials/index.rst
+++ b/doc/tutorials/index.rst
@@ -30,3 +30,4 @@ See `Awesome XGBoost <https://github.com/dmlc/xgboost/tree/master/demo>`_ for mo
  input_format
  param_tuning
  custom_metric_obj
+  privacy_preserving
--- a/doc/tutorials/learning_to_rank.rst
+++ b/doc/tutorials/learning_to_rank.rst
@@ -58,6 +58,7 @@ Notice that the samples are sorted based on their query index in a non-decreasin
  sorted_idx = np.argsort(qid)
  X = X[sorted_idx, :]
  y = y[sorted_idx]
+  qid = qid[sorted_idx]

 The simplest way to train a ranking model is by using the scikit-learn estimator interface. Continuing the previous snippet, we can train a simple ranking model without tuning:

--- a/doc/tutorials/privacy_preserving.rst
+++ b/doc/tutorials/privacy_preserving.rst
@@ -0,0 +1,97 @@
+#############################################
+Privacy Preserving Inference with Concrete ML
+#############################################
+
+`Concrete ML`_ is a specialized library developed by Zama that allows the execution of machine learning models on encrypted data through `Fully Homomorphic Encryption (FHE) <https://www.youtube.com/watch?v=FFox2S4uqEo>`_, thereby preserving data privacy.
+
+To use models such as XGBClassifier, use the following import:
+
+.. code:: python
+
+  from concrete.ml.sklearn import XGBClassifier
+
+***************************************
+Performing Privacy Preserving Inference
+***************************************
+
+Initialization of a XGBClassifier can be done as follows:
+
+.. code:: python
+
+  classifier = XGBClassifier(n_bits=6, [other_hyperparameters])
+
+
+where ``n_bits`` determines the precision of the input features. Note that a higher value of ``n_bits`` increases the precision of the input features and possibly the final model accuracy but also ends up with longer FHE execution time.
+
+Other hyper-parameters that exist in xgboost library can be used.
+
+******************************
+Model Training and Compilation
+******************************
+
+As commonly used in scikit-learn like models, it can be trained with the .fit() method.
+
+.. code:: python
+
+  classifier.fit(X_train, y_train)
+
+After training, the model can be compiled with a calibration dataset, potentially a subset of the training data:
+
+.. code:: python
+
+  classifier.compile(X_calibrate)
+
+This calibration dataset, ``X_calibrate``, is used in Concrete ML compute the precision (bit-width) of each intermediate value in the model. This is a necessary step to optimize the equivalent FHE circuit.
+
+****************************
+FHE Simulation and Execution
+****************************
+
+To verify model accuracy in encrypted computations, you can run an FHE simulation:
+
+.. code:: python
+
+  predictions = classifier.predict(X_test, fhe="simulate")
+
+This simulation can be used to evaluate the model. The resulting accuracy of this simulation step is representative of the actual FHE execution without having to pay the cost of an actual FHE execution. 
+
+When the model is ready, actual Fully Homomorphic Encryption execution can be performed:
+
+.. code:: python
+
+  predictions = classifier.predict(X_test, fhe="execute")
+
+
+Note that using FHE="execute" is a convenient way to assess the model in FHE, but for real deployment, functions to encrypt (on the client), run in FHE (on the server), and finally decrypt (on the client) have to be used for end-to-end privacy-preserving inferences.
+
+Concrete ML provides a deployment API to facilitate this process, ensuring end-to-end privacy.
+
+To go further in the deployment API you can read:
+
+- the `deployment documentation <https://docs.zama.ai/concrete-ml/advanced-topics/client_server>`_
+- the `deployment notebook <https://github.com/zama-ai/concrete-ml/blob/17779ca571d20b001caff5792eb11e76fe2c19ba/docs/advanced_examples/ClientServer.ipynb>`_
+
+*******************************
+Parameter Tuning in Concrete ML
+*******************************
+
+Concrete ML is compatible with standard scikit-learn pipelines such as GridSearchCV or any other hyper-parameter tuning techniques.
+
+******************
+Examples and Demos
+******************
+
+- `Sentiment analysis (based on transformers + xgboost) <https://huggingface.co/spaces/zama-fhe/encrypted_sentiment_analysis>`_
+- `XGBoost Classifier <https://github.com/zama-ai/concrete-ml/blob/6966c84b9698d5418209b346900f81d1270c64bd/docs/advanced_examples/XGBClassifier.ipynb>`_
+- `XGBoost Regressor <https://github.com/zama-ai/concrete-ml/blob/6966c84b9698d5418209b346900f81d1270c64bd/docs/advanced_examples/XGBRegressor.ipynb>`_
+
+**********
+Conclusion
+**********
+
+Concrete ML provides a framework for executing privacy-preserving inferences by leveraging Fully Homomorphic Encryption, allowing secure and private computations on encrypted data.
+
+More information and examples are given in the `Concrete ML documentation`_.
+
+.. _Concrete ML: https://github.com/zama-ai/concrete-ml
+.. _`Concrete ML documentation`: https://docs.zama.ai/concrete-ml
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -144,9 +144,7 @@ XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle
 *            See :doc:`/tutorials/input_format` for more info.
 *          \endverbatim
 *   - silent (optional): Whether to print message during loading. Default to true.
- *   - data_split_mode (optional): Whether to split by row or column. In distributed mode, the
- *     file is split accordingly; otherwise this is only an indicator on how the file was split
- *     beforehand. Default to row.
+ *   - data_split_mode (optional): Whether the file was split by row or column beforehand for distributed computing. Default to row.
 * \param out a loaded data matrix
 * \return 0 when success, -1 when failure happens
 */
@@ -174,6 +172,7 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indic
 * \param config  JSON encoded configuration.  Required values are:
 *   - missing: Which value to represent missing value.
 *   - nthread (optional): Number of threads used for initializing DMatrix.
+ *   - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
 * \param out created dmatrix
 * \return 0 when success, -1 when failure happens
 */
@@ -186,6 +185,7 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char
 * \param config JSON encoded configuration.  Required values are:
 *   - missing: Which value to represent missing value.
 *   - nthread (optional): Number of threads used for initializing DMatrix.
+ *   - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
 * \param out created dmatrix
 * \return 0 when success, -1 when failure happens
 */
@@ -200,6 +200,7 @@ XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatr
 * \param config  JSON encoded configuration.  Supported values are:
 *   - missing: Which value to represent missing value.
 *   - nthread (optional): Number of threads used for initializing DMatrix.
+ *   - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
 * \param out created dmatrix
 * \return 0 when success, -1 when failure happens
 */
@@ -266,6 +267,7 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data,
 * \param config JSON encoded configuration.  Required values are:
 *   - missing: Which value to represent missing value.
 *   - nthread (optional): Number of threads used for initializing DMatrix.
+ *   - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
 * \param out created dmatrix
 * \return 0 when success, -1 when failure happens
 */
@@ -278,6 +280,7 @@ XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data, char const *config
 * \param config JSON encoded configuration.  Required values are:
 *   - missing: Which value to represent missing value.
 *   - nthread (optional): Number of threads used for initializing DMatrix.
+ *   - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
 * \param out created dmatrix
 * \return 0 when success, -1 when failure happens
 */
@@ -552,24 +555,6 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,

 /** @} */  // End of Streaming

-XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array, void *ptr_schema);
-
-/*!
- * \brief Construct DMatrix from arrow using callbacks.  Arrow related C API is not stable
- *        and subject to change in the future.
- *
- * \param next   Callback function for fetching arrow records.
- * \param config JSON encoded configuration.  Required values are:
- *   - missing: Which value to represent missing value.
- *   - nbatch: Number of batches in arrow table.
- *   - nthread (optional): Number of threads used for initializing DMatrix.
- * \param out      The created DMatrix.
- *
- * \return 0 when success, -1 when failure happens
- */
-XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *config,
-                                             DMatrixHandle *out);
-
 /*!
 * \brief create a new dmatrix from sliced content of existing matrix
 * \param handle instance of data matrix to be sliced
@@ -808,6 +793,16 @@ XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out);
 */
 XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);

+/*!
+ * \brief Get the data split mode from DMatrix.
+ *
+ * \param handle the handle to the DMatrix
+ * \param out The output of the data split mode
+ *
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixDataSplitMode(DMatrixHandle handle, bst_ulong *out);
+
 /**
 * \brief Get the predictors from DMatrix as CSR matrix for testing.  If this is a
 *        quantized DMatrix, quantized values are returned instead.
@@ -1276,15 +1271,6 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
 XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *config, bst_ulong *out_len,
                                       char const **out_dptr);

-/*!
- * \brief Save booster to a buffer with in binary format.
- *
- * \deprecated since 1.6.0
- * \see XGBoosterSaveModelToBuffer()
- */
-XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, bst_ulong *out_len,
-                                 const char **out_dptr);
-
 /*!
 * \brief Memory snapshot based serialization method.  Saves everything states
 * into buffer.
@@ -1308,24 +1294,6 @@ XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, bst_ulong *out_len,
 XGB_DLL int XGBoosterUnserializeFromBuffer(BoosterHandle handle,
                                           const void *buf, bst_ulong len);

-/*!
- * \brief Initialize the booster from rabit checkpoint.
- *  This is used in distributed training API.
- * \param handle handle
- * \param version The output version of the model.
- * \return 0 when success, -1 when failure happens
- */
-XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
-                                         int* version);
-
-/*!
- * \brief Save the current checkpoint to rabit.
- * \param handle handle
- * \return 0 when success, -1 when failure happens
- */
-XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle);
-
-
 /*!
 * \brief Save XGBoost's internal configuration into a JSON document.  Currently the
 *        support is experimental, function signature may change in the future without
@@ -1554,29 +1522,19 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config,
 * \param config JSON encoded configuration. Accepted JSON keys are:
 *   - xgboost_communicator: The type of the communicator. Can be set as an environment variable.
 *     * rabit: Use Rabit. This is the default if the type is unspecified.
- *     * mpi: Use MPI.
 *     * federated: Use the gRPC interface for Federated Learning.
 * Only applicable to the Rabit communicator (these are case-sensitive):
 *   - rabit_tracker_uri: Hostname of the tracker.
 *   - rabit_tracker_port: Port number of the tracker.
 *   - rabit_task_id: ID of the current task, can be used to obtain deterministic rank assignment.
 *   - rabit_world_size: Total number of workers.
- *   - rabit_hadoop_mode: Enable Hadoop support.
- *   - rabit_tree_reduce_minsize: Minimal size for tree reduce.
- *   - rabit_reduce_ring_mincount: Minimal count to perform ring reduce.
- *   - rabit_reduce_buffer: Size of the reduce buffer.
- *   - rabit_bootstrap_cache: Size of the bootstrap cache.
- *   - rabit_debug: Enable debugging.
 *   - rabit_timeout: Enable timeout.
 *   - rabit_timeout_sec: Timeout in seconds.
- *   - rabit_enable_tcp_no_delay: Enable TCP no delay on Unix platforms.
 * Only applicable to the Rabit communicator (these are case-sensitive, and can be set as
 * environment variables):
 *   - DMLC_TRACKER_URI: Hostname of the tracker.
 *   - DMLC_TRACKER_PORT: Port number of the tracker.
 *   - DMLC_TASK_ID: ID of the current task, can be used to obtain deterministic rank assignment.
- *   - DMLC_ROLE: Role of the current task, "worker" or "server".
- *   - DMLC_NUM_ATTEMPT: Number of attempts after task failure.
 *   - DMLC_WORKER_CONNECT_RETRY: Number of retries to connect to the tracker.
 * Only applicable to the Federated communicator (use upper case for environment variables, use
 * lower case for runtime configuration):
--- a/include/xgboost/collective/result.h
+++ b/include/xgboost/collective/result.h
@@ -157,4 +157,13 @@ struct Result {
 [[nodiscard]] inline auto Fail(std::string msg, std::error_code errc, Result&& prev) {
  return Result{std::move(msg), std::move(errc), std::forward<Result>(prev)};
 }
+
+// We don't have monad, a simple helper would do.
+template <typename Fn>
+Result operator<<(Result&& r, Fn&& fn) {
+  if (!r.OK()) {
+    return std::forward<Result>(r);
+  }
+  return fn();
+}
 }  // namespace xgboost::collective
--- a/include/xgboost/collective/socket.h
+++ b/include/xgboost/collective/socket.h
@@ -215,9 +215,9 @@ class SockAddrV4 {
  static SockAddrV4 Loopback();
  static SockAddrV4 InaddrAny();

-  in_port_t Port() const { return ntohs(addr_.sin_port); }
+  [[nodiscard]] in_port_t Port() const { return ntohs(addr_.sin_port); }

-  std::string Addr() const {
+  [[nodiscard]] std::string Addr() const {
    char buf[INET_ADDRSTRLEN];
    auto const *s = system::inet_ntop(static_cast<std::int32_t>(SockDomain::kV4), &addr_.sin_addr,
                                      buf, INET_ADDRSTRLEN);
@@ -226,7 +226,7 @@ class SockAddrV4 {
    }
    return {buf};
  }
-  sockaddr_in const &Handle() const { return addr_; }
+  [[nodiscard]] sockaddr_in const &Handle() const { return addr_; }
 };

 /**
@@ -243,13 +243,13 @@ class SockAddress {
  explicit SockAddress(SockAddrV6 const &addr) : v6_{addr}, domain_{SockDomain::kV6} {}
  explicit SockAddress(SockAddrV4 const &addr) : v4_{addr} {}

-  auto Domain() const { return domain_; }
+  [[nodiscard]] auto Domain() const { return domain_; }

-  bool IsV4() const { return Domain() == SockDomain::kV4; }
-  bool IsV6() const { return !IsV4(); }
+  [[nodiscard]] bool IsV4() const { return Domain() == SockDomain::kV4; }
+  [[nodiscard]] bool IsV6() const { return !IsV4(); }

-  auto const &V4() const { return v4_; }
-  auto const &V6() const { return v6_; }
+  [[nodiscard]] auto const &V4() const { return v4_; }
+  [[nodiscard]] auto const &V6() const { return v6_; }
 };

 /**
@@ -261,6 +261,7 @@ class TCPSocket {

 private:
  HandleT handle_{InvalidSocket()};
+  bool non_blocking_{false};
  // There's reliable no way to extract domain from a socket without first binding that
  // socket on macos.
 #if defined(__APPLE__)
@@ -276,7 +277,7 @@ class TCPSocket {
  /**
   * \brief Return the socket domain.
   */
-  auto Domain() const -> SockDomain {
+  [[nodiscard]] auto Domain() const -> SockDomain {
    auto ret_iafamily = [](std::int32_t domain) {
      switch (domain) {
        case AF_INET:
@@ -321,10 +322,10 @@ class TCPSocket {
 #endif  // platforms
  }

-  bool IsClosed() const { return handle_ == InvalidSocket(); }
+  [[nodiscard]] bool IsClosed() const { return handle_ == InvalidSocket(); }

-  /** \brief get last error code if any */
-  Result GetSockError() const {
+  /** @brief get last error code if any */
+  [[nodiscard]] Result GetSockError() const {
    std::int32_t optval = 0;
    socklen_t len = sizeof(optval);
    auto ret = getsockopt(handle_, SOL_SOCKET, SO_ERROR, reinterpret_cast<char *>(&optval), &len);
@@ -340,7 +341,7 @@ class TCPSocket {
  }

  /** \brief check if anything bad happens */
-  bool BadSocket() const {
+  [[nodiscard]] bool BadSocket() const {
    if (IsClosed()) {
      return true;
    }
@@ -352,24 +353,63 @@ class TCPSocket {
    return false;
  }

-  void SetNonBlock(bool non_block) {
+  [[nodiscard]] Result NonBlocking(bool non_block) {
 #if defined(_WIN32)
    u_long mode = non_block ? 1 : 0;
-    xgboost_CHECK_SYS_CALL(ioctlsocket(handle_, FIONBIO, &mode), NO_ERROR);
+    if (ioctlsocket(handle_, FIONBIO, &mode) != NO_ERROR) {
+      return system::FailWithCode("Failed to set socket to non-blocking.");
+    }
 #else
    std::int32_t flag = fcntl(handle_, F_GETFL, 0);
-    if (flag == -1) {
-      system::ThrowAtError("fcntl");
+    auto rc = flag;
+    if (rc == -1) {
+      return system::FailWithCode("Failed to get socket flag.");
    }
    if (non_block) {
      flag |= O_NONBLOCK;
    } else {
      flag &= ~O_NONBLOCK;
    }
-    if (fcntl(handle_, F_SETFL, flag) == -1) {
-      system::ThrowAtError("fcntl");
+    rc = fcntl(handle_, F_SETFL, flag);
+    if (rc == -1) {
+      return system::FailWithCode("Failed to set socket to non-blocking.");
    }
 #endif  // _WIN32
+    non_blocking_ = non_block;
+    return Success();
+  }
+  [[nodiscard]] bool NonBlocking() const { return non_blocking_; }
+  [[nodiscard]] Result RecvTimeout(std::chrono::seconds timeout) {
+    // https://stackoverflow.com/questions/2876024/linux-is-there-a-read-or-recv-from-socket-with-timeout
+#if defined(_WIN32)
+    DWORD tv = timeout.count() * 1000;
+    auto rc =
+        setsockopt(Handle(), SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char *>(&tv), sizeof(tv));
+#else
+    struct timeval tv;
+    tv.tv_sec = timeout.count();
+    tv.tv_usec = 0;
+    auto rc = setsockopt(Handle(), SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char const *>(&tv),
+                         sizeof(tv));
+#endif
+    if (rc != 0) {
+      return system::FailWithCode("Failed to set timeout on recv.");
+    }
+    return Success();
+  }
+
+  [[nodiscard]] Result SetBufSize(std::int32_t n_bytes) {
+    auto rc = setsockopt(this->Handle(), SOL_SOCKET, SO_SNDBUF, reinterpret_cast<char *>(&n_bytes),
+                         sizeof(n_bytes));
+    if (rc != 0) {
+      return system::FailWithCode("Failed to set send buffer size.");
+    }
+    rc = setsockopt(this->Handle(), SOL_SOCKET, SO_RCVBUF, reinterpret_cast<char *>(&n_bytes),
+                    sizeof(n_bytes));
+    if (rc != 0) {
+      return system::FailWithCode("Failed to set recv buffer size.");
+    }
+    return Success();
  }

  void SetKeepAlive() {
@@ -391,14 +431,31 @@ class TCPSocket {
   * \brief Accept new connection, returns a new TCP socket for the new connection.
   */
  TCPSocket Accept() {
-    HandleT newfd = accept(handle_, nullptr, nullptr);
-    if (newfd == InvalidSocket()) {
+    HandleT newfd = accept(Handle(), nullptr, nullptr);
+#if defined(_WIN32)
+    auto interrupt = WSAEINTR;
+#else
+    auto interrupt = EINTR;
+#endif
+    if (newfd == InvalidSocket() && system::LastError() != interrupt) {
      system::ThrowAtError("accept");
    }
    TCPSocket newsock{newfd};
    return newsock;
  }

+  [[nodiscard]] Result Accept(TCPSocket *out, SockAddrV4 *addr) {
+    struct sockaddr_in caddr;
+    socklen_t caddr_len = sizeof(caddr);
+    HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);
+    if (newfd == InvalidSocket()) {
+      return system::FailWithCode("Failed to accept.");
+    }
+    *addr = SockAddrV4{caddr};
+    *out = TCPSocket{newfd};
+    return Success();
+  }
+
  ~TCPSocket() {
    if (!IsClosed()) {
      Close();
@@ -413,9 +470,9 @@ class TCPSocket {
    return *this;
  }
  /**
-   * \brief Return the native socket file descriptor.
+   * @brief Return the native socket file descriptor.
   */
-  HandleT const &Handle() const { return handle_; }
+  [[nodiscard]] HandleT const &Handle() const { return handle_; }
  /**
   * \brief Listen to incoming requests. Should be called after bind.
   */
@@ -423,7 +480,7 @@ class TCPSocket {
  /**
   * \brief Bind socket to INADDR_ANY, return the port selected by the OS.
   */
-  in_port_t BindHost() {
+  [[nodiscard]] in_port_t BindHost() {
    if (Domain() == SockDomain::kV6) {
      auto addr = SockAddrV6::InaddrAny();
      auto handle = reinterpret_cast<sockaddr const *>(&addr.Handle());
@@ -448,10 +505,53 @@ class TCPSocket {
      return ntohs(res_addr.sin_port);
    }
  }
+
+  [[nodiscard]] auto Port() const {
+    if (this->Domain() == SockDomain::kV4) {
+      sockaddr_in res_addr;
+      socklen_t addrlen = sizeof(res_addr);
+      auto code = getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen);
+      if (code != 0) {
+        return std::make_pair(system::FailWithCode("getsockname"), std::int32_t{0});
+      }
+      return std::make_pair(Success(), std::int32_t{ntohs(res_addr.sin_port)});
+    } else {
+      sockaddr_in6 res_addr;
+      socklen_t addrlen = sizeof(res_addr);
+      auto code = getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen);
+      if (code != 0) {
+        return std::make_pair(system::FailWithCode("getsockname"), std::int32_t{0});
+      }
+      return std::make_pair(Success(), std::int32_t{ntohs(res_addr.sin6_port)});
+    }
+  }
+
+  [[nodiscard]] Result Bind(StringView ip, std::int32_t *port) {
+    // bind socket handle_ to ip
+    auto addr = MakeSockAddress(ip, 0);
+    std::int32_t errc{0};
+    if (addr.IsV4()) {
+      auto handle = reinterpret_cast<sockaddr const *>(&addr.V4().Handle());
+      errc = bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.V4().Handle())>));
+    } else {
+      auto handle = reinterpret_cast<sockaddr const *>(&addr.V6().Handle());
+      errc = bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.V6().Handle())>));
+    }
+    if (errc != 0) {
+      return system::FailWithCode("Failed to bind socket.");
+    }
+    auto [rc, new_port] = this->Port();
+    if (!rc.OK()) {
+      return std::move(rc);
+    }
+    *port = new_port;
+    return Success();
+  }
+
  /**
   * \brief Send data, without error then all data should be sent.
   */
-  auto SendAll(void const *buf, std::size_t len) {
+  [[nodiscard]] auto SendAll(void const *buf, std::size_t len) {
    char const *_buf = reinterpret_cast<const char *>(buf);
    std::size_t ndone = 0;
    while (ndone < len) {
@@ -470,7 +570,7 @@ class TCPSocket {
  /**
   * \brief Receive data, without error then all data should be received.
   */
-  auto RecvAll(void *buf, std::size_t len) {
+  [[nodiscard]] auto RecvAll(void *buf, std::size_t len) {
    char *_buf = reinterpret_cast<char *>(buf);
    std::size_t ndone = 0;
    while (ndone < len) {
@@ -524,7 +624,15 @@ class TCPSocket {
   */
  void Close() {
    if (InvalidSocket() != handle_) {
+#if defined(_WIN32)
+      auto rc = system::CloseSocket(handle_);
+      // it's possible that we close TCP sockets after finalizing WSA due to detached thread.
+      if (rc != 0 && system::LastError() != WSANOTINITIALISED) {
+        system::ThrowAtError("close", rc);
+      }
+#else
      xgboost_CHECK_SYS_CALL(system::CloseSocket(handle_), 0);
+#endif
      handle_ = InvalidSocket();
    }
  }
@@ -546,6 +654,24 @@ class TCPSocket {
    socket.domain_ = domain;
 #endif  // defined(__APPLE__)
    return socket;
+#endif  // defined(xgboost_IS_MINGW)
+  }
+
+  static TCPSocket *CreatePtr(SockDomain domain) {
+#if defined(xgboost_IS_MINGW)
+    MingWError();
+    return nullptr;
+#else
+    auto fd = socket(static_cast<std::int32_t>(domain), SOCK_STREAM, 0);
+    if (fd == InvalidSocket()) {
+      system::ThrowAtError("socket");
+    }
+    auto socket = new TCPSocket{fd};
+
+#if defined(__APPLE__)
+    socket->domain_ = domain;
+#endif  // defined(__APPLE__)
+    return socket;
 #endif  // defined(xgboost_IS_MINGW)
  }
 };
@@ -567,12 +693,36 @@ class TCPSocket {
                             xgboost::collective::TCPSocket *out_conn);

 /**
- * \brief Get the local host name.
+ * @brief Get the local host name.
 */
-inline std::string GetHostName() {
-  char buf[HOST_NAME_MAX];
-  xgboost_CHECK_SYS_CALL(gethostname(&buf[0], HOST_NAME_MAX), 0);
-  return buf;
+[[nodiscard]] Result GetHostName(std::string *p_out);
+
+/**
+ * @brief inet_ntop
+ */
+template <typename H>
+Result INetNToP(H const &host, std::string *p_out) {
+  std::string &ip = *p_out;
+  switch (host->h_addrtype) {
+    case AF_INET: {
+      auto addr = reinterpret_cast<struct in_addr *>(host->h_addr_list[0]);
+      char str[INET_ADDRSTRLEN];
+      inet_ntop(AF_INET, addr, str, INET_ADDRSTRLEN);
+      ip = str;
+      break;
+    }
+    case AF_INET6: {
+      auto addr = reinterpret_cast<struct in6_addr *>(host->h_addr_list[0]);
+      char str[INET6_ADDRSTRLEN];
+      inet_ntop(AF_INET6, addr, str, INET6_ADDRSTRLEN);
+      ip = str;
+      break;
+    }
+    default: {
+      return Fail("Invalid address type.");
+    }
+  }
+  return Success();
 }
 }  // namespace collective
 }  // namespace xgboost
--- a/include/xgboost/context.h
+++ b/include/xgboost/context.h
@@ -29,31 +29,37 @@ struct DeviceSym {
 *        viewing types like `linalg::TensorView`.
 */
 struct DeviceOrd {
+  // Constant representing the device ID of CPU.
+  static bst_d_ordinal_t constexpr CPUOrdinal() { return -1; }
+  static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
+
  enum Type : std::int16_t { kCPU = 0, kCUDA = 1 } device{kCPU};
  // CUDA device ordinal.
-  bst_d_ordinal_t ordinal{-1};
+  bst_d_ordinal_t ordinal{CPUOrdinal()};

  [[nodiscard]] bool IsCUDA() const { return device == kCUDA; }
  [[nodiscard]] bool IsCPU() const { return device == kCPU; }

-  DeviceOrd() = default;
+  constexpr DeviceOrd() = default;
  constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}

-  DeviceOrd(DeviceOrd const& that) = default;
-  DeviceOrd& operator=(DeviceOrd const& that) = default;
-  DeviceOrd(DeviceOrd&& that) = default;
-  DeviceOrd& operator=(DeviceOrd&& that) = default;
+  constexpr DeviceOrd(DeviceOrd const& that) = default;
+  constexpr DeviceOrd& operator=(DeviceOrd const& that) = default;
+  constexpr DeviceOrd(DeviceOrd&& that) = default;
+  constexpr DeviceOrd& operator=(DeviceOrd&& that) = default;

  /**
   * @brief Constructor for CPU.
   */
-  [[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, -1}; }
+  [[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, CPUOrdinal()}; }
  /**
   * @brief Constructor for CUDA device.
   *
   * @param ordinal CUDA device ordinal.
   */
-  [[nodiscard]] static auto CUDA(bst_d_ordinal_t ordinal) { return DeviceOrd{kCUDA, ordinal}; }
+  [[nodiscard]] static constexpr auto CUDA(bst_d_ordinal_t ordinal) {
+    return DeviceOrd{kCUDA, ordinal};
+  }

  [[nodiscard]] bool operator==(DeviceOrd const& that) const {
    return device == that.device && ordinal == that.ordinal;
@@ -78,25 +84,26 @@ struct DeviceOrd {

 static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));

+std::ostream& operator<<(std::ostream& os, DeviceOrd ord);
+
 /**
 * @brief Runtime context for XGBoost. Contains information like threads and device.
 */
 struct Context : public XGBoostParameter<Context> {
 private:
+  // User interfacing parameter for device ordinal
  std::string device{DeviceSym::CPU()};  // NOLINT
-  // The device object for the current context. We are in the middle of replacing the
-  // `gpu_id` with this device field.
+  // The device ordinal set by user
  DeviceOrd device_{DeviceOrd::CPU()};

 public:
-  // Constant representing the device ID of CPU.
-  static bst_d_ordinal_t constexpr kCpuId = -1;
-  static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
  static std::int64_t constexpr kDefaultSeed = 0;

 public:
  Context();

+  void Init(Args const& kwargs);
+
  template <typename Container>
  Args UpdateAllowUnknown(Container const& kwargs) {
    auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
@@ -104,7 +111,6 @@ struct Context : public XGBoostParameter<Context> {
    return args;
  }

-  std::int32_t gpu_id{kCpuId};
  // The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
  std::int32_t nthread{0};  // NOLINT
  // stored random seed
@@ -116,7 +122,8 @@ struct Context : public XGBoostParameter<Context> {
  bool validate_parameters{false};

  /**
-   * @brief Configure the parameter `gpu_id'.
+   * @brief Configure the parameter `device'. Deprecated, will remove once `gpu_id` is
+   *        removed.
   *
   * @param require_gpu Whether GPU is explicitly required by the user through other
   *                    configurations.
@@ -212,9 +219,7 @@ struct Context : public XGBoostParameter<Context> {
 private:
  void SetDeviceOrdinal(Args const& kwargs);
  Context& SetDevice(DeviceOrd d) {
-    this->device_ = d;
-    this->gpu_id = d.ordinal;  // this can be removed once we move away from `gpu_id`.
-    this->device = d.Name();
+    this->device = (this->device_ = d).Name();
    return *this;
  }

--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -106,10 +106,10 @@ class MetaInfo {
  MetaInfo& operator=(MetaInfo&& that) = default;
  MetaInfo& operator=(MetaInfo const& that) = delete;

-  /*!
-   * \brief Validate all metainfo.
+  /**
+   * @brief Validate all metainfo.
   */
-  void Validate(int32_t device) const;
+  void Validate(DeviceOrd device) const;

  MetaInfo Slice(common::Span<int32_t const> ridxs) const;

@@ -559,8 +559,7 @@ class DMatrix {
   *
   * \param uri The URI of input.
   * \param silent Whether print information during loading.
-   * \param data_split_mode In distributed mode, split the input according this mode; otherwise,
-   *                        it's just an indicator on how the input was split beforehand.
+   * \param data_split_mode Indicate how the data was split beforehand.
   * \return The created DMatrix.
   */
  static DMatrix* Load(const std::string& uri, bool silent = true,
--- a/include/xgboost/host_device_vector.h
+++ b/include/xgboost/host_device_vector.h
@@ -88,9 +88,9 @@ class HostDeviceVector {
  static_assert(std::is_standard_layout<T>::value, "HostDeviceVector admits only POD types");

 public:
-  explicit HostDeviceVector(size_t size = 0, T v = T(), int device = -1);
-  HostDeviceVector(std::initializer_list<T> init, int device = -1);
-  explicit HostDeviceVector(const std::vector<T>& init, int device = -1);
+  explicit HostDeviceVector(size_t size = 0, T v = T(), DeviceOrd device = DeviceOrd::CPU());
+  HostDeviceVector(std::initializer_list<T> init, DeviceOrd device = DeviceOrd::CPU());
+  explicit HostDeviceVector(const std::vector<T>& init, DeviceOrd device = DeviceOrd::CPU());
  ~HostDeviceVector();

  HostDeviceVector(const HostDeviceVector<T>&) = delete;
@@ -99,17 +99,9 @@ class HostDeviceVector {
  HostDeviceVector<T>& operator=(const HostDeviceVector<T>&) = delete;
  HostDeviceVector<T>& operator=(HostDeviceVector<T>&&);

-  bool Empty() const { return Size() == 0; }
-  size_t Size() const;
-  int DeviceIdx() const;
-  DeviceOrd Device() const {
-    auto idx = this->DeviceIdx();
-    if (idx == DeviceOrd::CPU().ordinal) {
-      return DeviceOrd::CPU();
-    } else {
-      return DeviceOrd::CUDA(idx);
-    }
-  }
+  [[nodiscard]] bool Empty() const { return Size() == 0; }
+  [[nodiscard]] std::size_t Size() const;
+  [[nodiscard]] DeviceOrd Device() const;
  common::Span<T> DeviceSpan();
  common::Span<const T> ConstDeviceSpan() const;
  common::Span<const T> DeviceSpan() const { return ConstDeviceSpan(); }
@@ -135,13 +127,12 @@ class HostDeviceVector {
  const std::vector<T>& ConstHostVector() const;
  const std::vector<T>& HostVector() const {return ConstHostVector(); }

-  bool HostCanRead() const;
-  bool HostCanWrite() const;
-  bool DeviceCanRead() const;
-  bool DeviceCanWrite() const;
-  GPUAccess DeviceAccess() const;
+  [[nodiscard]] bool HostCanRead() const;
+  [[nodiscard]] bool HostCanWrite() const;
+  [[nodiscard]] bool DeviceCanRead() const;
+  [[nodiscard]] bool DeviceCanWrite() const;
+  [[nodiscard]] GPUAccess DeviceAccess() const;

-  void SetDevice(int device) const;
  void SetDevice(DeviceOrd device) const;

  void Resize(size_t new_size, T v = T());
--- a/include/xgboost/json.h
+++ b/include/xgboost/json.h
@@ -372,6 +372,19 @@ class Json {
  /*! \brief Use your own JsonWriter. */
  static void Dump(Json json, JsonWriter* writer);

+  template <typename Container = std::string>
+  static Container Dump(Json json) {
+    if constexpr (std::is_same_v<Container, std::string>) {
+      std::string str;
+      Dump(json, &str);
+      return str;
+    } else {
+      std::vector<char> str;
+      Dump(json, &str);
+      return str;
+    }
+  }
+
  Json() = default;

  // number
@@ -595,44 +608,6 @@ using Boolean = JsonBoolean;
 using String  = JsonString;
 using Null    = JsonNull;

-// Utils tailored for XGBoost.
-namespace detail {
-template <typename Head>
-bool TypeCheckImpl(Json const& value) {
-  return IsA<Head>(value);
-}
-
-template <typename Head, typename... JT>
-std::enable_if_t<sizeof...(JT) != 0, bool> TypeCheckImpl(Json const& value) {
-  return IsA<Head>(value) || TypeCheckImpl<JT...>(value);
-}
-
-template <typename Head>
-std::string TypeCheckError() {
-  return "`" + Head{}.TypeStr() + "`";
-}
-
-template <typename Head, typename... JT>
-std::enable_if_t<sizeof...(JT) != 0, std::string> TypeCheckError() {
-  return "`" + Head{}.TypeStr() + "`, " + TypeCheckError<JT...>();
-}
-}  // namespace detail
-
-/**
- * \brief Type check for JSON-based parameters
- *
- * \tparam JT    Expected JSON types.
- * \param  value Value to be checked.
- */
-template <typename... JT>
-void TypeCheck(Json const& value, StringView name) {
-  if (!detail::TypeCheckImpl<JT...>(value)) {
-    LOG(FATAL) << "Invalid type for: `" << name << "`, expecting one of the: {`"
-               << detail::TypeCheckError<JT...>() << "}, got: `" << value.GetValue().TypeStr()
-               << "`";
-  }
-}
-
 /**
 * \brief Convert XGBoost parameter to JSON object.
 *
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -603,13 +603,13 @@ auto MakeTensorView(Context const *ctx, Order order, common::Span<T> data, S &&.

 template <typename T, typename... S>
 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
-  auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
+  auto span = ctx->IsCUDA() ? data->DeviceSpan() : data->HostSpan();
  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
 }

 template <typename T, typename... S>
 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
-  auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
+  auto span = ctx->IsCUDA() ? data->ConstDeviceSpan() : data->ConstHostSpan();
  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
 }

@@ -659,13 +659,13 @@ auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {

 template <typename T>
 auto MakeVec(HostDeviceVector<T> *data) {
-  return MakeVec(data->DeviceIdx() == -1 ? data->HostPointer() : data->DevicePointer(),
-                 data->Size(), data->Device());
+  return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(), data->Size(),
+                 data->Device());
 }

 template <typename T>
 auto MakeVec(HostDeviceVector<T> const *data) {
-  return MakeVec(data->DeviceIdx() == -1 ? data->ConstHostPointer() : data->ConstDevicePointer(),
+  return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),
                 data->Size(), data->Device());
 }

@@ -757,13 +757,13 @@ class Tensor {
  Order order_{Order::kC};

  template <typename I, std::int32_t D>
-  void Initialize(I const (&shape)[D], std::int32_t device) {
+  void Initialize(I const (&shape)[D], DeviceOrd device) {
    static_assert(D <= kDim, "Invalid shape.");
    std::copy(shape, shape + D, shape_);
    for (auto i = D; i < kDim; ++i) {
      shape_[i] = 1;
    }
-    if (device >= 0) {
+    if (device.IsCUDA()) {
      data_.SetDevice(device);
      data_.ConstDevicePointer();  // Pull to device;
    }
@@ -780,14 +780,11 @@ class Tensor {
   * See \ref TensorView for parameters of this constructor.
   */
  template <typename I, int32_t D>
-  explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
-      : Tensor{common::Span<I const, D>{shape}, device, order} {}
-  template <typename I, int32_t D>
  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
-      : Tensor{common::Span<I const, D>{shape}, device.ordinal, order} {}
+      : Tensor{common::Span<I const, D>{shape}, device, order} {}

  template <typename I, size_t D>
-  explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
+  explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
      : order_{order} {
    // No device unroll as this is a host only function.
    std::copy(shape.data(), shape.data() + D, shape_);
@@ -795,11 +792,11 @@ class Tensor {
      shape_[i] = 1;
    }
    auto size = detail::CalcSize(shape_);
-    if (device >= 0) {
+    if (device.IsCUDA()) {
      data_.SetDevice(device);
    }
    data_.Resize(size);
-    if (device >= 0) {
+    if (device.IsCUDA()) {
      data_.DevicePointer();  // Pull to device
    }
  }
@@ -807,7 +804,7 @@ class Tensor {
   * Initialize from 2 host iterators.
   */
  template <typename It, typename I, int32_t D>
-  explicit Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order = kC)
+  explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
      : order_{order} {
    auto &h_vec = data_.HostVector();
    h_vec.insert(h_vec.begin(), begin, end);
@@ -816,7 +813,7 @@ class Tensor {
  }

  template <typename I, int32_t D>
-  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], std::int32_t device,
+  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
                  Order order = kC)
      : order_{order} {
    auto &h_vec = data_.HostVector();
@@ -824,10 +821,6 @@ class Tensor {
    // shape
    this->Initialize(shape, device);
  }
-  template <typename I, int32_t D>
-  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
-                  Order order = kC)
-      : Tensor{data, shape, device.ordinal, order} {}
  /**
   * \brief Index operator. Not thread safe, should not be used in performance critical
   *        region. For more efficient indexing, consider getting a view first.
@@ -944,9 +937,7 @@ class Tensor {
  /**
   * \brief Set device ordinal for this tensor.
   */
-  void SetDevice(int32_t device) const { data_.SetDevice(device); }
  void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
-  [[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
  [[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
 };

@@ -962,7 +953,7 @@ using Vector = Tensor<T, 1>;
 template <typename T, typename... Index>
 auto Empty(Context const *ctx, Index &&...index) {
  Tensor<T, sizeof...(Index)> t;
-  t.SetDevice(ctx->gpu_id);
+  t.SetDevice(ctx->Device());
  t.Reshape(index...);
  return t;
 }
@@ -973,7 +964,7 @@ auto Empty(Context const *ctx, Index &&...index) {
 template <typename T, typename... Index>
 auto Constant(Context const *ctx, T v, Index &&...index) {
  Tensor<T, sizeof...(Index)> t;
-  t.SetDevice(ctx->gpu_id);
+  t.SetDevice(ctx->Device());
  t.Reshape(index...);
  t.Data()->Fill(std::move(v));
  return t;
@@ -990,8 +981,8 @@ auto Zeros(Context const *ctx, Index &&...index) {
 // Only first axis is supported for now.
 template <typename T, int32_t D>
 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
-  if (r.DeviceIdx() >= 0) {
-    l->SetDevice(r.DeviceIdx());
+  if (r.Device().IsCUDA()) {
+    l->SetDevice(r.Device());
  }
  l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {
    for (size_t i = 1; i < D; ++i) {
--- a/include/xgboost/predictor.h
+++ b/include/xgboost/predictor.h
@@ -52,9 +52,9 @@ class PredictionContainer : public DMatrixCache<PredictionCacheEntry> {

 public:
  PredictionContainer() : DMatrixCache<PredictionCacheEntry>{DefaultSize()} {}
-  PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, std::int32_t device) {
+  PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, DeviceOrd device) {
    auto p_cache = this->CacheItem(m);
-    if (device != Context::kCpuId) {
+    if (device.IsCUDA()) {
      p_cache->predictions.SetDevice(device);
    }
    return *p_cache;
--- a/include/xgboost/string_view.h
+++ b/include/xgboost/string_view.h
@@ -29,7 +29,7 @@ struct StringView {
 public:
  constexpr StringView() = default;
  constexpr StringView(CharT const* str, std::size_t size) : str_{str}, size_{size} {}
-  explicit StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {}
+  StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {}  // NOLINT
  constexpr StringView(CharT const* str)  // NOLINT
      : str_{str}, size_{str == nullptr ? 0ul : Traits::length(str)} {}

--- a/jvm-packages/CMakeLists.txt
+++ b/jvm-packages/CMakeLists.txt
@@ -4,16 +4,16 @@ list(APPEND JVM_SOURCES
  ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j.cpp
  ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cpp)

-if (USE_CUDA)
+if(USE_CUDA)
  list(APPEND JVM_SOURCES
    ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu)
-endif (USE_CUDA)
+endif()

 add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})

-if (ENABLE_ALL_WARNINGS)
+if(ENABLE_ALL_WARNINGS)
  target_compile_options(xgboost4j PUBLIC -Wall -Wextra)
-endif (ENABLE_ALL_WARNINGS)
+endif()

 target_link_libraries(xgboost4j PRIVATE objxgboost)
 target_include_directories(xgboost4j
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ExternalCheckpointManagerSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ExternalCheckpointManagerSuite.scala
@@ -1,5 +1,5 @@
 /*
- Copyright (c) 2014-2022 by Contributors
+ Copyright (c) 2014-2023 by Contributors

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -32,57 +32,53 @@ class ExternalCheckpointManagerSuite extends AnyFunSuite with TmpFolderPerSuite
  }

  private def createNewModels():
-    (String, XGBoostClassificationModel, XGBoostClassificationModel) = {
+      (String, XGBoostClassificationModel, XGBoostClassificationModel) = {
    val tmpPath = createTmpFolder("test").toAbsolutePath.toString
-    val (model4, model8) = {
+    val (model2, model4) = {
      val training = buildDataFrame(Classification.train)
      val paramMap = produceParamMap(tmpPath, 2)
      (new XGBoostClassifier(paramMap ++ Seq("num_round" -> 2)).fit(training),
        new XGBoostClassifier(paramMap ++ Seq("num_round" -> 4)).fit(training))
    }
-    (tmpPath, model4, model8)
+    (tmpPath, model2, model4)
  }

  test("test update/load models") {
-    val (tmpPath, model4, model8) = createNewModels()
+    val (tmpPath, model2, model4) = createNewModels()
    val manager = new ExternalCheckpointManager(tmpPath, FileSystem.get(sc.hadoopConfiguration))

-    manager.updateCheckpoint(model4._booster.booster)
+    manager.updateCheckpoint(model2._booster.booster)
    var files = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(tmpPath))
    assert(files.length == 1)
-    assert(files.head.getPath.getName == "4.model")
-    assert(manager.loadCheckpointAsScalaBooster().getVersion == 4)
+    assert(files.head.getPath.getName == "1.model")
+    assert(manager.loadCheckpointAsScalaBooster().getNumBoostedRound == 2)

-    manager.updateCheckpoint(model8._booster)
+    manager.updateCheckpoint(model4._booster)
    files = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(tmpPath))
    assert(files.length == 1)
-    assert(files.head.getPath.getName == "8.model")
-    assert(manager.loadCheckpointAsScalaBooster().getVersion == 8)
+    assert(files.head.getPath.getName == "3.model")
+    assert(manager.loadCheckpointAsScalaBooster().getNumBoostedRound == 4)
  }

  test("test cleanUpHigherVersions") {
-    val (tmpPath, model4, model8) = createNewModels()
+    val (tmpPath, model2, model4) = createNewModels()

    val manager = new ExternalCheckpointManager(tmpPath, FileSystem.get(sc.hadoopConfiguration))
-    manager.updateCheckpoint(model8._booster)
-    manager.cleanUpHigherVersions(8)
-    assert(new File(s"$tmpPath/8.model").exists())
+    manager.updateCheckpoint(model4._booster)
+    manager.cleanUpHigherVersions(3)
+    assert(new File(s"$tmpPath/3.model").exists())

-    manager.cleanUpHigherVersions(4)
-    assert(!new File(s"$tmpPath/8.model").exists())
+    manager.cleanUpHigherVersions(2)
+    assert(!new File(s"$tmpPath/3.model").exists())
  }

  test("test checkpoint rounds") {
    import scala.collection.JavaConverters._
-    val (tmpPath, model4, model8) = createNewModels()
+    val (tmpPath, model2, model4) = createNewModels()
    val manager = new ExternalCheckpointManager(tmpPath, FileSystem.get(sc.hadoopConfiguration))
-    assertResult(Seq(7))(
-      manager.getCheckpointRounds(0, 7).asScala)
-    assertResult(Seq(2, 4, 6, 7))(
-      manager.getCheckpointRounds(2, 7).asScala)
-    manager.updateCheckpoint(model4._booster)
-    assertResult(Seq(4, 6, 7))(
-      manager.getCheckpointRounds(2, 7).asScala)
+    assertResult(Seq(2))(manager.getCheckpointRounds(0, 0, 3).asScala)
+    assertResult(Seq(0, 2, 4, 6))(manager.getCheckpointRounds(0, 2, 7).asScala)
+    assertResult(Seq(0, 2, 4, 6, 7))(manager.getCheckpointRounds(0, 2, 8).asScala)
  }


@@ -109,8 +105,8 @@ class ExternalCheckpointManagerSuite extends AnyFunSuite with TmpFolderPerSuite
      // Check only one model is kept after training
      val files = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(tmpPath))
      assert(files.length == 1)
-      assert(files.head.getPath.getName == "8.model")
-      val tmpModel = SXGBoost.loadModel(s"$tmpPath/8.model")
+      assert(files.head.getPath.getName == "4.model")
+      val tmpModel = SXGBoost.loadModel(s"$tmpPath/4.model")
      // Train next model based on prev model
      val nextModel = new XGBoostClassifier(paramMap ++ Seq("num_round" -> 8)).fit(training)
      assert(error(tmpModel) >= error(prevModel._booster))
--- a/Show More
+++ b/Show More