Make 1.6.0 release. (#7813 )

[backport] #7808 #7810 (#7811 )
* [jvm-packages] add hostIp and python exec for rabit tracker (#7808) * Fix training continuation with categorical model. (#7810) * Make sure the task is initialized before construction of tree updater. This is a quick fix meant to be backported to 1.6, for a full fix we should pass the model param into tree updater by reference instead. Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2022-04-16 08:43:21 +08:00 · 2022-04-15 19:56:42 +08:00 · 2022-04-14 10:41:54 +08:00 · 2022-04-13 17:35:29 +08:00 · 2022-04-08 14:18:46 +08:00 · 2022-03-30 19:16:54 -07:00
454 changed files with 23726 additions and 10877 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -0,0 +1,214 @@
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignArrayOfStructures: None
+AlignConsecutiveMacros: None
+AlignConsecutiveAssignments: None
+AlignConsecutiveBitFields: None
+AlignConsecutiveDeclarations: None
+AlignEscapedNewlines: Left
+AlignOperands:   Align
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortEnumsOnASingleLine: true
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortLambdasOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: WithoutElse
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: Yes
+AttributeMacros:
+  - __capability
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: Never
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  AfterExternBlock: false
+  BeforeCatch:     false
+  BeforeElse:      false
+  BeforeLambdaBody: false
+  BeforeWhile:     false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeConceptDeclarations: true
+BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     100
+CommentPragmas:  '^ IWYU pragma:'
+QualifierAlignment: Leave
+CompactNamespaces: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DeriveLineEnding: true
+DerivePointerAlignment: true
+DisableFormat:   false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: LogicalBlock
+ExperimentalAutoDetectBinPacking: false
+PackConstructorInitializers: NextLine
+BasedOnStyle:    ''
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+AllowAllConstructorInitializersOnNextLine: true
+FixNamespaceComments: true
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IfMacros:
+  - KJ_IF_MAYBE
+IncludeBlocks:   Regroup
+IncludeCategories:
+  - Regex:           '^<ext/.*\.h>'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '.*'
+    Priority:        3
+    SortPriority:    0
+    CaseSensitive:   false
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
+IndentCaseLabels: true
+IndentCaseBlocks: false
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentExternBlock: AfterExternBlock
+IndentRequires:  false
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+InsertTrailingCommas: None
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+LambdaBodyIndentation: Signature
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Never
+ObjCBlockIndentWidth: 2
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PenaltyIndentedWhitespace: 0
+PointerAlignment: Left
+PPIndentWidth:   -1
+RawStringFormats:
+  - Language:        Cpp
+    Delimiters:
+      - cc
+      - CC
+      - cpp
+      - Cpp
+      - CPP
+      - 'c++'
+      - 'C++'
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+  - Language:        TextProto
+    Delimiters:
+      - pb
+      - PB
+      - proto
+      - PROTO
+    EnclosingFunctions:
+      - EqualsProto
+      - EquivToProto
+      - PARSE_PARTIAL_TEXT_PROTO
+      - PARSE_TEST_PROTO
+      - PARSE_TEXT_PROTO
+      - ParseTextOrDie
+      - ParseTextProtoOrDie
+      - ParseTestProto
+      - ParsePartialTestProto
+    CanonicalDelimiter: pb
+    BasedOnStyle:    google
+ReferenceAlignment: Pointer
+ReflowComments:  true
+ShortNamespaceLines: 1
+SortIncludes:    CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceAroundPointerQualifiers: Default
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  Never
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInLineCommentPrefix:
+  Minimum:         1
+  Maximum:         -1
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceBeforeSquareBrackets: false
+BitFieldColonSpacing: Both
+Standard:        Auto
+StatementAttributeLikeMacros:
+  - Q_EMIT
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        8
+UseCRLF:         false
+UseTab:          Never
+WhitespaceSensitiveMacros:
+  - STRINGIZE
+  - PP_STRINGIZE
+  - BOOST_PP_STRINGIZE
+  - NS_SWIFT_NAME
+  - CF_SWIFT_NAME
+...
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -21,10 +21,7 @@ jobs:
        submodules: 'true'
    - name: Install system packages
      run: |
-        # Use libomp 11.1.0: https://github.com/dmlc/xgboost/issues/7039
-        wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
        brew install ninja libomp
-        brew pin libomp
    - name: Build gtest binary
      run: |
        mkdir build
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -17,10 +17,7 @@ jobs:
    - name: Install osx system dependencies
      if: matrix.os == 'macos-10.15'
      run: |
-        # Use libomp 11.1.0: https://github.com/dmlc/xgboost/issues/7039
-        wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
        brew install ninja libomp
-        brew pin libomp
    - name: Install Ubuntu system dependencies
      if: matrix.os == 'ubuntu-latest'
      run: |
@@ -51,7 +48,7 @@ jobs:
    strategy:
      matrix:
        config:
-          - {os: windows-2016, python-version: '3.8'}
+          - {os: windows-latest, python-version: '3.8'}

    steps:
    - uses: actions/checkout@v2
@@ -76,7 +73,7 @@ jobs:
      run: |
        mkdir build_msvc
        cd build_msvc
-        cmake .. -G"Visual Studio 15 2017" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON  -DUSE_DMLC_GTEST=ON
+        cmake .. -G"Visual Studio 17 2022" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON  -DUSE_DMLC_GTEST=ON
        cmake --build . --config Release --parallel $(nproc)

    - name: Install Python package
@@ -119,14 +116,16 @@ jobs:
        conda list

    - name: Build XGBoost on macos
+      shell: bash -l {0}
      run: |
-        wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
-        brew install ninja libomp
-        brew pin libomp
+        brew install ninja

        mkdir build
        cd build
-        cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON
+        # Set prefix, to use OpenMP library from Conda env
+        # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
+        # to learn why we don't use libomp from Homebrew.
+        cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
        ninja

    - name: Install Python package
@@ -141,24 +140,3 @@ jobs:
      shell: bash -l {0}
      run: |
        pytest -s -v ./tests/python
-
-    - name: Rename Python wheel
-      shell: bash -l {0}
-      run: |
-        TAG=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
-        python tests/ci_build/rename_whl.py python-package/dist/*.whl ${{ github.sha }} ${TAG}
-
-    - name: Extract branch name
-      shell: bash
-      run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
-      id: extract_branch
-      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
-
-    - name: Upload Python wheel
-      shell: bash -l {0}
-      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
-      run: |
-        python -m awscli s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
--- a/.github/workflows/python_wheels.yml
+++ b/.github/workflows/python_wheels.yml
@@ -0,0 +1,38 @@
+name: XGBoost-Python-Wheels
+
+on: [push, pull_request]
+
+jobs:
+  python-wheels:
+    name: Build wheel for ${{ matrix.platform_id }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        include:
+        - os: macos-latest
+          platform_id: macosx_x86_64
+        - os: macos-latest
+          platform_id: macosx_arm64
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        submodules: 'true'
+    - name: Setup Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.9'
+    - name: Build wheels
+      run: bash tests/ci_build/build_python_wheels.sh ${{ matrix.platform_id }} ${{ github.sha }}
+    - name: Extract branch name
+      shell: bash
+      run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
+      id: extract_branch
+      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
+    - name: Upload Python wheel
+      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
+      run: |
+        python -m pip install awscli
+        python -m awscli s3 cp wheelhouse/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@@ -31,8 +31,8 @@ jobs:
      uses: actions/cache@v2
      with:
        path: ${{ env.R_LIBS_USER }}
-        key: ${{ runner.os }}-r-${{ matrix.config.r }}-2-${{ hashFiles('R-package/DESCRIPTION') }}
-        restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-2-${{ hashFiles('R-package/DESCRIPTION') }}
+        key: ${{ runner.os }}-r-${{ matrix.config.r }}-3-${{ hashFiles('R-package/DESCRIPTION') }}
+        restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-3-${{ hashFiles('R-package/DESCRIPTION') }}

    - name: Install dependencies
      shell: Rscript {0}
@@ -59,9 +59,9 @@ jobs:
      fail-fast: false
      matrix:
        config:
-          - {os: windows-2016, r: 'release', compiler: 'mingw', build: 'autotools'}
-          - {os: windows-2016, r: 'release', compiler: 'msvc', build: 'cmake'}
-          - {os: windows-2016, r: 'release', compiler: 'mingw', build: 'cmake'}
+          - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'}
+          - {os: windows-latest, r: 'release', compiler: 'msvc', build: 'cmake'}
+          - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'}
    env:
      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
      RSPM: ${{ matrix.config.rspm }}
@@ -79,8 +79,8 @@ jobs:
      uses: actions/cache@v2
      with:
        path: ${{ env.R_LIBS_USER }}
-        key: ${{ runner.os }}-r-${{ matrix.config.r }}-2-${{ hashFiles('R-package/DESCRIPTION') }}
-        restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-2-${{ hashFiles('R-package/DESCRIPTION') }}
+        key: ${{ runner.os }}-r-${{ matrix.config.r }}-3-${{ hashFiles('R-package/DESCRIPTION') }}
+        restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-3-${{ hashFiles('R-package/DESCRIPTION') }}

    - name: Install dependencies
      shell: Rscript {0}
@@ -90,7 +90,7 @@ jobs:
                         dependencies = c('Depends', 'Imports', 'LinkingTo'))
    - name: Install igraph on Windows
      shell: Rscript {0}
-      if: matrix.config.os == 'windows-2016'
+      if: matrix.config.os == 'windows-latest'
      run: |
        install.packages('igraph', type='binary', dependencies = c('Depends', 'Imports', 'LinkingTo'))

@@ -131,8 +131,8 @@ jobs:
      uses: actions/cache@v2
      with:
        path: ${{ env.R_LIBS_USER }}
-        key: ${{ runner.os }}-r-${{ matrix.config.r }}-2-${{ hashFiles('R-package/DESCRIPTION') }}
-        restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-2-${{ hashFiles('R-package/DESCRIPTION') }}
+        key: ${{ runner.os }}-r-${{ matrix.config.r }}-3-${{ hashFiles('R-package/DESCRIPTION') }}
+        restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-3-${{ hashFiles('R-package/DESCRIPTION') }}

    - name: Install dependencies
      shell: Rscript {0}
--- a/.gitignore
+++ b/.gitignore
@@ -63,6 +63,7 @@ nb-configuration*
 # Eclipse
 .project
 .cproject
+.classpath
 .pydevproject
 .settings/
 build
@@ -125,3 +126,8 @@ credentials.csv
 *.pub
 *.rdp
 *_rsa
+
+# Visual Studio code + extensions
+.vscode
+.metals
+.bloop
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-project(xgboost LANGUAGES CXX C VERSION 1.5.2)
+project(xgboost LANGUAGES CXX C VERSION 1.6.0)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
@@ -28,6 +28,7 @@ set_default_configuration_release()
 option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF)
 option(USE_OPENMP "Build with OpenMP support." ON)
 option(BUILD_STATIC_LIB "Build static library" OFF)
+option(FORCE_SHARED_CRT "Build with dynamic CRT on Windows (/MD)" OFF)
 option(RABIT_BUILD_MPI "Build MPI" OFF)
 ## Bindings
 option(JVM_BINDINGS "Build JVM bindings" OFF)
@@ -137,7 +138,7 @@ if (USE_CUDA)
  add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)

  if ((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 11.4) AND (NOT BUILD_WITH_CUDA_CUB))
-    message(SEND_ERROR "`BUILD_WITH_CUDA_CUB` should be set to `ON` for CUDA >= 11.4")
+    set(BUILD_WITH_CUDA_CUB ON)
  endif ()
 endif (USE_CUDA)

@@ -164,6 +165,9 @@ endif (USE_NCCL)

 # dmlc-core
 msvc_use_static_runtime()
+if (FORCE_SHARED_CRT)
+  set(DMLC_FORCE_SHARED_CRT ON)
+endif ()
 add_subdirectory(${xgboost_SOURCE_DIR}/dmlc-core)

 if (MSVC)
@@ -300,7 +304,7 @@ write_basic_package_version_file(
  COMPATIBILITY AnyNewerVersion)
 install(
  FILES
-  ${CMAKE_BINARY_DIR}/cmake/xgboost-config.cmake
+  ${CMAKE_CURRENT_BINARY_DIR}/cmake/xgboost-config.cmake
  ${CMAKE_BINARY_DIR}/cmake/xgboost-config-version.cmake
  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/xgboost)

--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -10,8 +10,8 @@ The Project Management Committee(PMC) consists group of active committers that m
  - Tianqi is a Ph.D. student working on large-scale machine learning. He is the creator of the project.
 * [Michael Benesty](https://github.com/pommedeterresautee)
  - Michael is a lawyer and data scientist in France. He is the creator of XGBoost interactive analysis module in R.
-* [Yuan Tang](https://github.com/terrytangyuan), Ant Group
-  - Yuan is a software engineer in Ant Group. He contributed mostly in R and Python packages.
+* [Yuan Tang](https://github.com/terrytangyuan), Akuity
+  - Yuan is a founding engineer at Akuity. He contributed mostly in R and Python packages.
 * [Nan Zhu](https://github.com/CodingCat), Uber
  - Nan is a software engineer in Uber. He contributed mostly in JVM packages.
 * [Jiaming Yuan](https://github.com/trivialfis)
--- a/46
+++ b/46
@@ -7,7 +7,7 @@
 dockerRun = 'tests/ci_build/ci_build.sh'

 // Which CUDA version to use when building reference distribution wheel
-ref_cuda_ver = '10.1'
+ref_cuda_ver = '11.0'

 import groovy.transform.Field

@@ -58,14 +58,12 @@ pipeline {
            'build-cpu': { BuildCPU() },
            'build-cpu-arm64': { BuildCPUARM64() },
            'build-cpu-rabit-mock': { BuildCPUMock() },
-            // Build reference, distribution-ready Python wheel with CUDA 10.1
+            // Build reference, distribution-ready Python wheel with CUDA 11.0
            // using CentOS 7 image
-            'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
-            // The build-gpu-* builds below use Ubuntu image
            'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0', build_rmm: true) },
-            'build-gpu-rpkg': { BuildRPackageWithCUDA(cuda_version: '10.1') },
-            'build-jvm-packages-gpu-cuda10.1': { BuildJVMPackagesWithCUDA(spark_version: '3.0.0', cuda_version: '11.0') },
-            'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.0') },
+            'build-gpu-rpkg': { BuildRPackageWithCUDA(cuda_version: '11.0') },
+            'build-jvm-packages-gpu-cuda11.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.1', cuda_version: '11.0') },
+            'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.1') },
            'build-jvm-doc': { BuildJVMDoc() }
          ])
        }
@@ -79,13 +77,10 @@ pipeline {
            'test-python-cpu': { TestPythonCPU() },
            'test-python-cpu-arm64': { TestPythonCPUARM64() },
            // artifact_cuda_version doesn't apply to RMM tests; RMM tests will always match CUDA version between artifact and host env
-            'test-python-gpu-cuda11.0-cross': { TestPythonGPU(artifact_cuda_version: '10.1', host_cuda_version: '11.0', test_rmm: true) },
-            'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
-            'test-python-mgpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '10.1', host_cuda_version: '11.0', multi_gpu: true, test_rmm: true) },
+            'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0', test_rmm: true) },
+            'test-python-mgpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0', multi_gpu: true, test_rmm: true) },
            'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0', test_rmm: true) },
-            'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') },
-            'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
-            'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') }
+            'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') }
          ])
        }
      }
@@ -128,9 +123,9 @@ def ClangTidy() {
    echo "Running clang-tidy job..."
    def container_type = "clang_tidy"
    def docker_binary = "docker"
-    def dockerArgs = "--build-arg CUDA_VERSION_ARG=10.1"
+    def dockerArgs = "--build-arg CUDA_VERSION_ARG=11.0"
    sh """
-    ${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py
+    ${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py --cuda-archs 75
    """
    deleteDir()
  }
@@ -184,8 +179,9 @@ def BuildCPUARM64() {
    stash name: "xgboost_whl_arm64_cpu", includes: 'python-package/dist/*.whl'
    if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
      echo 'Uploading Python wheel...'
-      path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
-      s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
+      sh """
+      ${dockerRun} ${container_type} ${docker_binary} bash -c "source activate aarch64_test && python -m awscli s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ --acl public-read --no-progress"
+      """
    }
    stash name: 'xgboost_cli_arm64', includes: 'xgboost'
    deleteDir()
@@ -237,8 +233,9 @@ def BuildCUDA(args) {
    stash name: "xgboost_whl_cuda${args.cuda_version}", includes: 'python-package/dist/*.whl'
    if (args.cuda_version == ref_cuda_ver && (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release'))) {
      echo 'Uploading Python wheel...'
-      path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
-      s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
+      sh """
+      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python -m awscli s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ --acl public-read --no-progress
+      """
    }
    echo 'Stashing C++ test executable (testxgboost)...'
    stash name: "xgboost_cpp_tests_cuda${args.cuda_version}", includes: 'build/testxgboost'
@@ -273,8 +270,9 @@ def BuildRPackageWithCUDA(args) {
      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_r_pkg_with_cuda.sh ${commit_id}
      """
      echo 'Uploading R tarball...'
-      path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
-      s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', includePathPattern:'xgboost_r_gpu_linux_*.tar.gz'
+      sh """
+      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python -m awscli s3 cp xgboost_r_gpu_linux_*.tar.gz s3://xgboost-nightly-builds/${BRANCH_NAME}/ --acl public-read --no-progress
+      """
    }
    deleteDir()
  }
@@ -330,7 +328,9 @@ def BuildJVMDoc() {
    """
    if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
      echo 'Uploading doc...'
-      s3Upload file: "jvm-packages/${BRANCH_NAME}.tar.bz2", bucket: 'xgboost-docs', acl: 'PublicRead', path: "${BRANCH_NAME}.tar.bz2"
+      sh """
+      ${dockerRun} ${container_type} ${docker_binary} python -m awscli s3 cp jvm-packages/${BRANCH_NAME}.tar.bz2 s3://xgboost-docs/${BRANCH_NAME}.tar.bz2 --acl public-read --no-progress
+      """
    }
    deleteDir()
  }
@@ -445,7 +445,7 @@ def DeployJVMPackages(args) {
    if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
      echo 'Deploying to xgboost-maven-repo S3 repo...'
      sh """
-      ${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=10.1 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version}
+      ${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=11.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version}
      """
    }
    deleteDir()
--- a/20
+++ b/20
@@ -40,8 +40,8 @@ pipeline {
      steps {
        script {
          parallel ([
-            'build-win64-cuda10.1': { BuildWin64() },
-            'build-rpkg-win64-cuda10.1': { BuildRPackageWithCUDAWin64() }
+            'build-win64-cuda11.0': { BuildWin64() },
+            'build-rpkg-win64-cuda11.0': { BuildRPackageWithCUDAWin64() }
          ])
        }
      }
@@ -51,7 +51,7 @@ pipeline {
      steps {
        script {
          parallel ([
-            'test-win64-cuda10.1': { TestWin64() },
+            'test-win64-cuda11.0': { TestWin64() },
          ])
        }
      }
@@ -75,7 +75,7 @@ def checkoutSrcs() {
 }

 def BuildWin64() {
-  node('win64 && cuda10_unified') {
+  node('win64 && cuda11_unified') {
    deleteDir()
    unstash name: 'srcs'
    echo "Building XGBoost for Windows AMD64 target..."
@@ -107,7 +107,7 @@ def BuildWin64() {
    stash name: 'xgboost_whl', includes: 'python-package/dist/*.whl'
    if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
      echo 'Uploading Python wheel...'
-      path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
+      path = "${BRANCH_NAME}/"
      s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
    }
    echo 'Stashing C++ test executable (testxgboost)...'
@@ -118,7 +118,7 @@ def BuildWin64() {
 }

 def BuildRPackageWithCUDAWin64() {
-  node('win64 && cuda10_unified') {
+  node('win64 && cuda11_unified') {
    deleteDir()
    unstash name: 'srcs'
    bat "nvcc --version"
@@ -127,7 +127,7 @@ def BuildRPackageWithCUDAWin64() {
      bash tests/ci_build/build_r_pkg_with_cuda_win64.sh ${commit_id}
      """
      echo 'Uploading R tarball...'
-      path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
+      path = "${BRANCH_NAME}/"
      s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', includePathPattern:'xgboost_r_gpu_win64_*.tar.gz'
    }
    deleteDir()
@@ -135,7 +135,7 @@ def BuildRPackageWithCUDAWin64() {
 }

 def TestWin64() {
-  node('win64 && cuda10_unified') {
+  node('win64 && cuda11_unified') {
    deleteDir()
    unstash name: 'srcs'
    unstash name: 'xgboost_whl'
@@ -153,9 +153,9 @@ def TestWin64() {
    conda activate ${env_name} && for /R %%i in (python-package\\dist\\*.whl) DO python -m pip install "%%i"
    """
    echo "Running Python tests..."
-    bat "conda activate ${env_name} && python -m pytest -v -s -rxXs --fulltrace tests\\python"
+    bat "conda activate ${env_name} && python -X faulthandler -m pytest -v -s -rxXs --fulltrace tests\\python"
    bat """
-    conda activate ${env_name} && python -m pytest -v -s -rxXs --fulltrace -m "(not slow) and (not mgpu)" tests\\python-gpu
+    conda activate ${env_name} && python -X faulthandler -m pytest -v -s -rxXs --fulltrace -m "(not slow) and (not mgpu)" tests\\python-gpu
    """
    bat "conda env remove --name ${env_name}"
    deleteDir()
--- a/8
+++ b/8
@@ -93,11 +93,14 @@ mypy:
 	cd python-package; \
 	mypy ./xgboost/dask.py && \
 	mypy ./xgboost/rabit.py && \
+	mypy ./xgboost/tracker.py && \
+	mypy ./xgboost/sklearn.py && \
 	mypy ../demo/guide-python/external_memory.py && \
+	mypy ../demo/guide-python/categorical.py && \
+	mypy ../demo/guide-python/cat_in_the_dat.py && \
 	mypy ../tests/python-gpu/test_gpu_with_dask.py && \
 	mypy ../tests/python/test_data_iterator.py && \
-	mypy ../tests/python-gpu/test_gpu_data_iterator.py && \
-	mypy ./xgboost/sklearn.py || exit 1; \
+	mypy ../tests/python-gpu/test_gpu_data_iterator.py  || exit 1; \
 	mypy . || true ;

 clean:
@@ -150,6 +153,7 @@ Rpack: clean_all
 	bash R-package/remove_warning_suppression_pragma.sh
 	bash xgboost/remove_warning_suppression_pragma.sh
 	rm xgboost/remove_warning_suppression_pragma.sh
+	rm xgboost/CMakeLists.txt
 	rm -rfv xgboost/tests/helper_scripts/

 R ?= R
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,241 @@ XGBoost Change Log

 This file records the changes in xgboost library in reverse chronological order.

+## v1.5.0 (2021 Oct 11)
+
+This release comes with many exciting new features and optimizations, along with some bug
+fixes.  We will describe the experimental categorical data support and the external memory
+interface independently. Package-specific new features will be listed in respective
+sections.
+
+### Development on categorical data support
+In version 1.3, XGBoost introduced an experimental feature for handling categorical data
+natively, without one-hot encoding. XGBoost can fit categorical splits in decision
+trees. (Currently, the generated splits will be of form `x \in {v}`, where the input is
+compared to a single category value. A future version of XGBoost will generate splits that
+compare the input against a list of multiple category values.)
+
+Most of the other features, including prediction, SHAP value computation, feature
+importance, and model plotting were revised to natively handle categorical splits.  Also,
+all Python interfaces including native interface with and without quantized `DMatrix`,
+scikit-learn interface, and Dask interface now accept categorical data with a wide range
+of data structures support including numpy/cupy array and cuDF/pandas/modin dataframe.  In
+practice, the following are required for enabling categorical data support during
+training:
+
+  - Use Python package.
+  - Use `gpu_hist` to train the model.
+  - Use JSON model file format for saving the model.
+
+Once the model is trained, it can be used with most of the features that are available on
+the Python package.  For a quick introduction, see
+https://xgboost.readthedocs.io/en/latest/tutorials/categorical.html
+
+Related PRs: (#7011, #7001, #7042, #7041, #7047, #7043, #7036, #7054, #7053, #7065, #7213, #7228, #7220, #7221, #7231, #7306)
+
+* Next steps
+
+	- Revise the CPU training algorithm to handle categorical data natively and generate categorical splits
+	- Extend the CPU and GPU algorithms to generate categorical splits of form `x \in S`
+	where the input is compared with multiple category values.  split. (#7081)
+
+### External memory
+This release features a brand-new interface and implementation for external memory (also
+known as out-of-core training).  (#6901, #7064, #7088, #7089, #7087, #7092, #7070,
+#7216). The new implementation leverages the data iterator interface, which is currently
+used to create `DeviceQuantileDMatrix`. For a quick introduction, see
+https://xgboost.readthedocs.io/en/latest/tutorials/external_memory.html#data-iterator
+. During the development of this new interface, `lz4` compression is removed. (#7076).
+Please note that external memory support is still experimental and not ready for
+production use yet.  All future development will focus on this new interface and users are
+advised to migrate. (You are using the old interface if you are using a URL suffix to use
+external memory.)
+
+### New features in Python package
+* Support numpy array interface and all numeric types from numpy in `DMatrix`
+  construction and `inplace_predict` (#6998, #7003).  Now XGBoost no longer makes data
+  copy when input is numpy array view.
+* The early stopping callback in Python has a new `min_delta` parameter to control the
+  stopping behavior (#7137)
+* Python package now supports calculating feature scores for the linear model, which is
+  also available on R package. (#7048)
+* Python interface now supports configuring constraints using feature names instead of
+  feature indices.
+* Typehint support for more Python code including scikit-learn interface and rabit
+  module. (#6799, #7240)
+* Add tutorial for XGBoost-Ray (#6884)
+
+### New features in R package
+* In 1.4 we have a new prediction function in the C API which is used by the Python
+  package.  This release revises the R package to use the new prediction function as well.
+  A new parameter `iteration_range` for the predict function is available, which can be
+  used for specifying the range of trees for running prediction. (#6819, #7126)
+* R package now supports the `nthread` parameter in `DMatrix` construction. (#7127)
+
+### New features in JVM packages
+* Support GPU dataframe and `DeviceQuantileDMatrix` (#7195).  Constructing `DMatrix`
+  with GPU data structures and the interface for quantized `DMatrix` were first
+  introduced in the Python package and are now available in the xgboost4j package.
+* JVM packages now support saving and getting early stopping attributes. (#7095) Here is a
+  quick [example](https://github.com/dmlc/xgboost/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java "example") in JAVA (#7252).
+
+### General new features
+* We now have a pre-built binary package for R on Windows with GPU support. (#7185)
+* CUDA compute capability 86 is now part of the default CMake build configuration with
+  newly added support for CUDA 11.4. (#7131, #7182, #7254)
+* XGBoost can be compiled using system CUB provided by CUDA 11.x installation. (#7232)
+
+### Optimizations
+The performance for both `hist` and `gpu_hist` has been significantly improved in 1.5
+with the following optimizations:
+* GPU multi-class model training now supports prediction cache. (#6860)
+* GPU histogram building is sped up and the overall training time is 2-3 times faster on
+  large datasets (#7180, #7198).  In addition, we removed the parameter `deterministic_histogram` and now
+  the GPU algorithm is always deterministic.
+* CPU hist has an optimized procedure for data sampling (#6922)
+* More performance optimization in regression and binary classification objectives on
+  CPU (#7206)
+* Tree model dump is now performed in parallel (#7040)
+
+### Breaking changes
+* `n_gpus` was deprecated in 1.0 release and is now removed.
+* Feature grouping in CPU hist tree method is removed, which was disabled long
+  ago. (#7018)
+* C API for Quantile DMatrix is changed to be consistent with the new external memory
+  implementation. (#7082)
+
+### Notable general bug fixes
+* XGBoost no long changes global CUDA device ordinal when `gpu_id` is specified (#6891,
+  #6987)
+* Fix `gamma` negative likelihood evaluation metric. (#7275)
+* Fix integer value of `verbose_eal` for `xgboost.cv` function in Python. (#7291)
+* Remove extra sync in CPU hist for dense data, which can lead to incorrect tree node
+  statistics. (#7120, #7128)
+* Fix a bug in GPU hist when data size is larger than `UINT32_MAX` with missing
+  values. (#7026)
+* Fix a thread safety issue in prediction with the `softmax` objective. (#7104)
+* Fix a thread safety issue in CPU SHAP value computation. (#7050) Please note that all
+  prediction functions in Python are thread-safe.
+* Fix model slicing. (#7149, #7078)
+* Workaround a bug in old GCC which can lead to segfault during construction of
+  DMatrix. (#7161)
+* Fix histogram truncation in GPU hist, which can lead to slightly-off results. (#7181)
+* Fix loading GPU linear model pickle files on CPU-only machine. (#7154)
+* Check input value is duplicated when CPU quantile queue is full (#7091)
+* Fix parameter loading with training continuation. (#7121)
+* Fix CMake interface for exposing C library by specifying dependencies. (#7099)
+* Callback and early stopping are explicitly disabled for the scikit-learn interface
+  random forest estimator. (#7236)
+* Fix compilation error on x86 (32-bit machine) (#6964)
+* Fix CPU memory usage with extremely sparse datasets (#7255)
+* Fix a bug in GPU multi-class AUC implementation with weighted data (#7300)
+
+### Python package
+Other than the items mentioned in the previous sections, there are some Python-specific
+improvements.
+* Change development release postfix to `dev` (#6988)
+* Fix early stopping behavior with MAPE metric (#7061)
+* Fixed incorrect feature mismatch error message (#6949)
+* Add predictor to skl constructor. (#7000, #7159)
+* Re-enable feature validation in predict proba. (#7177)
+* scikit learn interface regression estimator now can pass the scikit-learn estimator
+  check and is fully compatible with scikit-learn utilities.  `__sklearn_is_fitted__` is
+  implemented as part of the changes (#7130, #7230)
+* Conform the latest pylint. (#7071, #7241)
+* Support latest panda range index in DMatrix construction. (#7074)
+* Fix DMatrix construction from pandas series. (#7243)
+* Fix typo and grammatical mistake in error message (#7134)
+* [dask] disable work stealing explicitly for training tasks (#6794)
+* [dask] Set dataframe index in predict. (#6944)
+* [dask] Fix prediction on df with latest dask. (#6969)
+* [dask] Fix dask predict on `DaskDMatrix` with `iteration_range`. (#7005)
+* [dask] Disallow importing non-dask estimators from xgboost.dask (#7133)
+
+### R package
+Improvements other than new features on R package:
+* Optimization for updating R handles in-place (#6903)
+* Removed the magrittr dependency. (#6855, #6906, #6928)
+* The R package now hides all C++ symbols to avoid conflicts. (#7245)
+* Other maintenance including code cleanups, document updates. (#6863, #6915, #6930, #6966, #6967)
+
+### JVM packages
+Improvements other than new features on JVM packages:
+* Constructors with implicit missing value are deprecated due to confusing behaviors. (#7225)
+* Reduce scala-compiler, scalatest dependency scopes (#6730)
+* Making the Java library loader emit helpful error messages on missing dependencies. (#6926)
+* JVM packages now use the Python tracker in XGBoost instead of dmlc.  The one in XGBoost
+  is shared between JVM packages and Python Dask and enjoys better maintenance (#7132)
+* Fix "key not found: train" error (#6842)
+* Fix model loading from stream (#7067)
+
+### General document improvements
+* Overhaul the installation documents. (#6877)
+* A few demos are added for AFT with dask (#6853), callback with dask (#6995), inference
+  in C (#7151), `process_type`. (#7135)
+* Fix PDF format of document. (#7143)
+* Clarify the behavior of `use_rmm`. (#6808)
+* Clarify prediction function. (#6813)
+* Improve tutorial on feature interactions (#7219)
+* Add small example for dask sklearn interface. (#6970)
+* Update Python intro.  (#7235)
+* Some fixes/updates (#6810, #6856, #6935, #6948, #6976, #7084, #7097, #7170, #7173, #7174, #7226, #6979, #6809, #6796, #6979)
+
+### Maintenance
+* Some refactoring around CPU hist, which lead to better performance but are listed under general maintenance tasks:
+  - Extract evaluate splits from CPU hist. (#7079)
+  - Merge lossgude and depthwise strategies for CPU hist (#7007)
+  - Simplify sparse and dense CPU hist kernels (#7029)
+  - Extract histogram builder from CPU Hist. (#7152)
+
+* Others
+  - Fix `gpu_id` with custom objective. (#7015)
+  - Fix typos in AUC. (#6795)
+  - Use constexpr in `dh::CopyIf`. (#6828)
+  - Update dmlc-core. (#6862)
+  - Bump version to 1.5.0 snapshot in master. (#6875)
+  - Relax shotgun test. (#6900)
+  - Guard against index error in prediction. (#6982)
+  - Hide symbols in CI build + hide symbols for C and CUDA (#6798)
+  - Persist data in dask test. (#7077)
+  - Fix typo in arguments of PartitionBuilder::Init (#7113)
+  - Fix typo in src/common/hist.cc BuildHistKernel (#7116)
+  - Use upstream URI in distributed quantile tests. (#7129)
+  - Include cpack (#7160)
+  - Remove synchronization in monitor. (#7164)
+  - Remove unused code. (#7175)
+  - Fix building on CUDA 11.0. (#7187)
+  - Better error message for `ncclUnhandledCudaError`. (#7190)
+  - Add noexcept to JSON objects. (#7205)
+  - Improve wording for warning (#7248)
+  - Fix typo in release script. [skip ci] (#7238)
+  - Relax shotgun test. (#6918)
+  - Relax test for decision stump in distributed environment. (#6919)
+  -	[dask] speed up tests (#7020)
+
+### CI
+* [CI] Rotate access keys for uploading MacOS artifacts from Travis CI (#7253)
+* Reduce Travis environment setup time. (#6912)
+* Restore R cache on github action. (#6985)
+* [CI] Remove stray build artifact to avoid error in artifact packaging (#6994)
+* [CI] Move appveyor tests to action (#6986)
+* Remove appveyor badge. [skip ci] (#7035)
+* [CI] Configure RAPIDS, dask, modin (#7033)
+* Test on s390x. (#7038)
+* [CI] Upgrade to CMake 3.14 (#7060)
+* [CI] Update R cache. (#7102)
+* [CI] Pin libomp to 11.1.0  (#7107)
+* [CI] Upgrade build image to CentOS 7 + GCC 8; require CUDA 10.1 and later (#7141)
+* [dask] Work around segfault in prediction. (#7112)
+* [dask] Remove the workaround for segfault. (#7146)
+* [CI] Fix hanging Python setup in Windows CI (#7186)
+* [CI] Clean up in beginning of each task in Win CI (#7189)
+* Fix travis. (#7237)
+
+### Acknowledgement
+* **Contributors**: Adam Pocock (@Craigacp), Jeff H (@JeffHCross), Johan Hansson (@JohanWork), Jose Manuel Llorens (@JoseLlorensRipolles), Benjamin Szőke (@Livius90), @ReeceGoding, @ShvetsKS, Robert Zabel (@ZabelTech), Ali (@ali5h), Andrew Ziem (@az0), Andy Adinets (@canonizer), @david-cortes, Daniel Saxton (@dsaxton), Emil Sadek (@esadek), @farfarawayzyt, Gil Forsyth (@gforsyth), @giladmaya, @graue70, Philip Hyunsu Cho (@hcho3), James Lamb (@jameslamb), José Morales (@jmoralez), Kai Fricke (@krfricke), Christian Lorentzen (@lorentzenchr), Mads R. B. Kristensen (@madsbk), Anton Kostin (@masguit42), Martin Petříček (@mpetricek-corp), @naveenkb, Taewoo Kim (@oOTWK), Viktor Szathmáry (@phraktle), Robert Maynard (@robertmaynard), TP Boudreau (@tpboudreau), Jiaming Yuan (@trivialfis), Paul Taylor (@trxcllnt), @vslaykovsky, Bobby Wang (@wbo4958),
+* **Reviewers**: Nan Zhu (@CodingCat), Adam Pocock (@Craigacp), Jose Manuel Llorens (@JoseLlorensRipolles), Kodi Arfer (@Kodiologist), Benjamin Szőke (@Livius90), Mark Guryanov (@MarkGuryanov), Rory Mitchell (@RAMitchell), @ReeceGoding, @ShvetsKS, Egor Smirnov (@SmirnovEgorRu), Andrew Ziem (@az0), @candalfigomoro, Andy Adinets (@canonizer), Dante Gama Dessavre (@dantegd), @david-cortes, Daniel Saxton (@dsaxton), @farfarawayzyt, Gil Forsyth (@gforsyth), Harutaka Kawamura (@harupy), Philip Hyunsu Cho (@hcho3), @jakirkham, James Lamb (@jameslamb), José Morales (@jmoralez), James Bourbeau (@jrbourbeau), Christian Lorentzen (@lorentzenchr), Martin Petříček (@mpetricek-corp), Nikolay Petrov (@napetrov), @naveenkb, Viktor Szathmáry (@phraktle), Robin Teuwens (@rteuwens), Yuan Tang (@terrytangyuan), TP Boudreau (@tpboudreau), Jiaming Yuan (@trivialfis), @vkuzmin-uber, Bobby Wang (@wbo4958), William Hicks (@wphicks)
+
+
 ## v1.4.2 (2021.05.13)
 This is a patch release for Python package with following fixes:

--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,12 +1,12 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 1.5.2.1
-Date: 2022-1-17
+Version: 1.6.0.1
+Date: 2022-03-29
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
         email = "tianqi.tchen@gmail.com"),
-  person("Tong", "He", role = c("aut", "cre"),
+  person("Tong", "He", role = c("aut"),
         email = "hetong007@gmail.com"),
  person("Michael", "Benesty", role = c("aut"),
         email = "michael@benesty.fr"),
@@ -26,7 +26,8 @@ Authors@R: c(
  person("Min", "Lin", role = c("aut")),
  person("Yifeng", "Geng", role = c("aut")),
  person("Yutian", "Li", role = c("aut")),
-  person("Jiaming", "Yuan", role = c("aut")),
+  person("Jiaming", "Yuan", role = c("aut", "cre"),
+         email = "jm.yuan@outlook.com"),
  person("XGBoost contributors", role = c("cph"),
         comment = "base XGBoost implementation")
  )
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -162,7 +162,11 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' Predicted values based on either xgboost model or model handle object.
 #'
 #' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}
-#' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or \code{xgb.DMatrix}.
+#' @param newdata takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
+#'        local data file or \code{xgb.DMatrix}.
+#'
+#'        For single-row predictions on sparse data, it's recommended to use CSR format. If passing
+#'        a sparse vector, it will take it as a row vector.
 #' @param missing Missing is only used when input is dense matrix. Pick a float value that represents
 #'        missing values in data (e.g., sometimes 0 or some other extreme value is used).
 #' @param outputmargin whether the prediction should be returned in the for of original untransformed
@@ -180,7 +184,7 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #'        training predicting will perform dropout.
 #' @param iterationrange Specifies which layer of trees are used in prediction.  For
 #'        example, if a random forest is trained with 100 rounds.  Specifying
-#'        `iteration_range=(1, 21)`, then only the forests built during [1, 21) (half open set)
+#'        `iterationrange=(1, 21)`, then only the forests built during [1, 21) (half open set)
 #'        rounds are used in this prediction.  It's 1-based index just like R vector.  When set
 #'        to \code{c(1, 1)} XGBoost will use all trees.
 #' @param strict_shape  Default is \code{FALSE}. When it's set to \code{TRUE}, output
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -4,8 +4,10 @@
 #' Supported input file formats are either a LIBSVM text file or a binary file that was created previously by
 #' \code{\link{xgb.DMatrix.save}}).
 #'
-#' @param data a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
-#'        string representing a filename.
+#' @param data a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object,
+#'        a \code{dgRMatrix} object (only when making predictions from a fitted model),
+#'        a \code{dsparseVector} object (only when making predictions from a fitted model, will be
+#'        interpreted as a row vector), or a character string representing a filename.
 #' @param info a named list of additional information to store in the \code{xgb.DMatrix} object.
 #'        See \code{\link{setinfo}} for the specific allowed kinds of
 #' @param missing a float value to represents missing values in data (used only when input is a dense matrix).
@@ -33,8 +35,21 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, silent = FALSE, nthre
    handle <- .Call(XGDMatrixCreateFromMat_R, data, missing, as.integer(NVL(nthread, -1)))
    cnames <- colnames(data)
  } else if (inherits(data, "dgCMatrix")) {
-    handle <- .Call(XGDMatrixCreateFromCSC_R, data@p, data@i, data@x, nrow(data))
+    handle <- .Call(
+      XGDMatrixCreateFromCSC_R, data@p, data@i, data@x, nrow(data), as.integer(NVL(nthread, -1))
+    )
    cnames <- colnames(data)
+  } else if (inherits(data, "dgRMatrix")) {
+    handle <- .Call(
+      XGDMatrixCreateFromCSR_R, data@p, data@j, data@x, ncol(data), as.integer(NVL(nthread, -1))
+    )
+    cnames <- colnames(data)
+  } else if (inherits(data, "dsparseVector")) {
+    indptr <- c(0L, as.integer(length(data@i)))
+    ind <- as.integer(data@i) - 1L
+    handle <- .Call(
+      XGDMatrixCreateFromCSR_R, indptr, ind, data@x, length(data), as.integer(NVL(nthread, -1))
+    )
  } else {
    stop("xgb.DMatrix does not support construction from ", typeof(data))
  }
@@ -272,6 +287,13 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) {
    .Call(XGDMatrixSetInfo_R, object, name, as.integer(info))
    return(TRUE)
  }
+  if (name == "feature_weights") {
+    if (length(info) != ncol(object)) {
+      stop("The number of feature weights must equal to the number of columns in the input data")
+    }
+    .Call(XGDMatrixSetInfo_R, object, name, as.numeric(info))
+    return(TRUE)
+  }
  stop("setinfo: unknown info name ", name)
  return(FALSE)
 }
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -5,7 +5,7 @@
 #' @param modelfile the name of the binary input file.
 #'
 #' @details
-#' The input file is expected to contain a model saved in an xgboost-internal binary format
+#' The input file is expected to contain a model saved in an xgboost model format
 #' using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
 #' appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
 #' saved from there in xgboost format, could be loaded from R.
@@ -38,6 +38,13 @@ xgb.load <- function(modelfile) {
  handle <- xgb.Booster.handle(modelfile = modelfile)
  # re-use modelfile if it is raw so we do not need to serialize
  if (typeof(modelfile) == "raw") {
+    warning(
+      paste(
+        "The support for loading raw booster with `xgb.load` will be ",
+        "discontinued in upcoming release. Use `xgb.load.raw` or",
+        " `xgb.unserialize` instead. "
+      )
+    )
    bst <- xgb.handleToBooster(handle, modelfile)
  } else {
    bst <- xgb.handleToBooster(handle, NULL)
--- a/R-package/R/xgb.load.raw.R
+++ b/R-package/R/xgb.load.raw.R
@@ -3,12 +3,21 @@
 #' User can generate raw memory buffer by calling xgb.save.raw
 #'
 #' @param buffer the buffer returned by xgb.save.raw
+#' @param as_booster Return the loaded model as xgb.Booster instead of xgb.Booster.handle.
 #'
 #' @export
-xgb.load.raw <- function(buffer) {
+xgb.load.raw <- function(buffer, as_booster = FALSE) {
  cachelist <- list()
  handle <- .Call(XGBoosterCreate_R, cachelist)
  .Call(XGBoosterLoadModelFromRaw_R, handle, buffer)
  class(handle) <- "xgb.Booster.handle"
-  return (handle)
+
+  if (as_booster) {
+    booster <- list(handle = handle, raw = NULL)
+    class(booster) <- "xgb.Booster"
+    booster <- xgb.Booster.complete(booster, saveraw = TRUE)
+    return(booster)
+  } else {
+    return (handle)
+  }
 }
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@@ -87,7 +87,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
  }

  if (length(text) < 2 ||
-      sum(grepl('yes=(\\d+),no=(\\d+)', text)) < 1) {
+      sum(grepl('leaf=(\\d+)', text)) < 1) {
    stop("Non-tree model detected! This function can only be used with tree models.")
  }

@@ -116,16 +116,28 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
  branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
                      "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
  branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
-  td[isLeaf == FALSE,
-     (branch_cols) := {
-       matches <- regmatches(t, regexec(branch_rx, t))
-       # skip some indices with spurious capture groups from anynumber_regex
-       xtr <- do.call(rbind, matches)[, c(2, 3, 5, 6, 7, 8, 10), drop = FALSE]
-       xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
-       as.data.table(xtr)
-    }]
+  td[
+    isLeaf == FALSE,
+    (branch_cols) := {
+      matches <- regmatches(t, regexec(branch_rx, t))
+      # skip some indices with spurious capture groups from anynumber_regex
+      xtr <- do.call(rbind, matches)[, c(2, 3, 5, 6, 7, 8, 10), drop = FALSE]
+      xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
+      if (length(xtr) == 0) {
+        as.data.table(
+          list(Feature = "NA", Split = "NA", Yes = "NA", No = "NA", Missing = "NA", Quality = "NA", Cover = "NA")
+        )
+      } else {
+        as.data.table(xtr)
+      }
+    }
+  ]
+
  # assign feature_names when available
-  if (!is.null(feature_names)) {
+  is_stump <- function() {
+    return(length(td$Feature) == 1 && is.na(td$Feature))
+  }
+  if (!is.null(feature_names) && !is_stump()) {
    if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE))
      stop("feature_names has less elements than there are features used in the model")
    td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1]]
@@ -134,12 +146,18 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
  # parse leaf lines
  leaf_rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
  leaf_cols <- c("Feature", "Quality", "Cover")
-  td[isLeaf == TRUE,
-     (leaf_cols) := {
-       matches <- regmatches(t, regexec(leaf_rx, t))
-       xtr <- do.call(rbind, matches)[, c(2, 4)]
-       c("Leaf", as.data.table(xtr))
-    }]
+  td[
+    isLeaf == TRUE,
+    (leaf_cols) := {
+      matches <- regmatches(t, regexec(leaf_rx, t))
+      xtr <- do.call(rbind, matches)[, c(2, 4)]
+      if (length(xtr) == 2) {
+        c("Leaf", as.data.table(xtr[1]), as.data.table(xtr[2]))
+      } else {
+        c("Leaf", as.data.table(xtr))
+      }
+    }
+  ]

  # convert some columns to numeric
  numeric_cols <- c("Split", "Quality", "Cover")
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@@ -98,18 +98,22 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
    data      = dt$Feature,
    fontcolor = "black")

-  edges <- DiagrammeR::create_edge_df(
-    from  = match(rep(dt[Feature != "Leaf", c(ID)], 2), dt$ID),
-    to    = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID),
-    label = c(
-      dt[Feature != "Leaf", paste("<", Split)],
-      rep("", nrow(dt[Feature != "Leaf"]))
-    ),
-    style = c(
-      dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")],
-      dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]
-    ),
-    rel   = "leading_to")
+  if (nrow(dt[Feature != "Leaf"]) != 0) {
+    edges <- DiagrammeR::create_edge_df(
+      from  = match(rep(dt[Feature != "Leaf", c(ID)], 2), dt$ID),
+      to    = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID),
+      label = c(
+        dt[Feature != "Leaf", paste("<", Split)],
+        rep("", nrow(dt[Feature != "Leaf"]))
+      ),
+      style = c(
+        dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")],
+        dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]
+      ),
+      rel   = "leading_to")
+  } else {
+    edges <- NULL
+  }

  graph <- DiagrammeR::create_graph(
      nodes_df = nodes,
--- a/R-package/R/xgb.save.raw.R
+++ b/R-package/R/xgb.save.raw.R
@@ -4,6 +4,14 @@
 #' Save xgboost model from xgboost or xgb.train
 #'
 #' @param model the model object.
+#' @param raw_format The format for encoding the booster.  Available options are
+#' \itemize{
+#'     \item \code{json}: Encode the booster into JSON text document.
+#'     \item \code{ubj}:  Encode the booster into Universal Binary JSON.
+#'     \item \code{deprecated}: Encode the booster into old customized binary format.
+#' }
+#'
+#' Right now the default is \code{deprecated} but will be changed to \code{ubj} in upcoming release.
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
@@ -17,7 +25,8 @@
 #' pred <- predict(bst, test$data)
 #'
 #' @export
-xgb.save.raw <- function(model) {
+xgb.save.raw <- function(model, raw_format = "deprecated") {
  handle <- xgb.get.handle(model)
-  .Call(XGBoosterModelToRaw_R, handle)
+  args <- list(format = raw_format)
+  .Call(XGBoosterSaveModelToRaw_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE))
 }
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for xgboost 0.6-3.
+# Generated by GNU Autoconf 2.69 for xgboost 1.6-0.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -576,8 +576,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='xgboost'
 PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='0.6-3'
-PACKAGE_STRING='xgboost 0.6-3'
+PACKAGE_VERSION='1.6-0'
+PACKAGE_STRING='xgboost 1.6-0'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures xgboost 0.6-3 to adapt to many kinds of systems.
+\`configure' configures xgboost 1.6-0 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1257,7 +1257,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of xgboost 0.6-3:";;
+     short | recursive ) echo "Configuration of xgboost 1.6-0:";;
   esac
  cat <<\_ACEOF

@@ -1336,7 +1336,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-xgboost configure 0.6-3
+xgboost configure 1.6-0
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by xgboost $as_me 0.6-3, which was
+It was created by xgboost $as_me 1.6-0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@@ -2725,7 +2725,7 @@ main ()
  return 0;
 }
 _ACEOF
-  ${CC} -o conftest conftest.c ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes
+  ${CC} -o conftest conftest.c ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${ac_pkg_openmp}" >&5
 $as_echo "${ac_pkg_openmp}" >&6; }
  if test "${ac_pkg_openmp}" = no; then
@@ -3287,7 +3287,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by xgboost $as_me 0.6-3, which was
+This file was extended by xgboost $as_me 1.6-0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -3340,7 +3340,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-xgboost config.status 0.6-3
+xgboost config.status 1.6-0
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

@@ -3900,4 +3900,3 @@ if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
 fi

-
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@

 AC_PREREQ(2.69)

-AC_INIT([xgboost],[0.6-3],[],[xgboost],[])
+AC_INIT([xgboost],[1.6-0],[],[xgboost],[])

 # Use this line to set CC variable to a C compiler
 AC_PROG_CC
@@ -33,7 +33,7 @@ then
  ac_pkg_openmp=no
  AC_MSG_CHECKING([whether OpenMP will work in a package])
  AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <omp.h>]], [[ return (omp_get_max_threads() <= 1); ]])])
-  ${CC} -o conftest conftest.c ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes
+  ${CC} -o conftest conftest.c ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes
  AC_MSG_RESULT([${ac_pkg_openmp}])
  if test "${ac_pkg_openmp}" = no; then
    OPENMP_CXXFLAGS=''
--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
@@ -63,7 +63,7 @@ print(paste("sum(abs(pred2-pred))=", sum(abs(pred2 - pred))))
 # save model to R's raw vector
 raw <- xgb.save.raw(bst)
 # load binary model to R
-bst3 <- xgb.load(raw)
+bst3 <- xgb.load.raw(raw)
 pred3 <- predict(bst3, test$data)
 # pred3 should be identical to pred
 print(paste("sum(abs(pred3-pred))=", sum(abs(pred3 - pred))))
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -27,7 +27,11 @@
 \arguments{
 \item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}}

-\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or \code{xgb.DMatrix}.}
+\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
+       local data file or \code{xgb.DMatrix}.
+
+       For single-row predictions on sparse data, it's recommended to use CSR format. If passing
+       a sparse vector, it will take it as a row vector.}

 \item{missing}{Missing is only used when input is dense matrix. Pick a float value that represents
 missing values in data (e.g., sometimes 0 or some other extreme value is used).}
@@ -55,7 +59,7 @@ training predicting will perform dropout.}

 \item{iterationrange}{Specifies which layer of trees are used in prediction.  For
 example, if a random forest is trained with 100 rounds.  Specifying
-`iteration_range=(1, 21)`, then only the forests built during [1, 21) (half open set)
+`iterationrange=(1, 21)`, then only the forests built during [1, 21) (half open set)
 rounds are used in this prediction.  It's 1-based index just like R vector.  When set
 to \code{c(1, 1)} XGBoost will use all trees.}

--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@@ -14,8 +14,10 @@ xgb.DMatrix(
 )
 }
 \arguments{
-\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
-string representing a filename.}
+\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object,
+a \code{dgRMatrix} object (only when making predictions from a fitted model),
+a \code{dsparseVector} object (only when making predictions from a fitted model, will be
+interpreted as a row vector), or a character string representing a filename.}

 \item{info}{a named list of additional information to store in the \code{xgb.DMatrix} object.
 See \code{\link{setinfo}} for the specific allowed kinds of}
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@@ -16,7 +16,7 @@ An object of \code{xgb.Booster} class.
 Load xgboost model from the binary model file.
 }
 \details{
-The input file is expected to contain a model saved in an xgboost-internal binary format
+The input file is expected to contain a model saved in an xgboost model format
 using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
 appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
 saved from there in xgboost format, could be loaded from R.
--- a/R-package/man/xgb.load.raw.Rd
+++ b/R-package/man/xgb.load.raw.Rd
@@ -4,10 +4,12 @@
 \alias{xgb.load.raw}
 \title{Load serialised xgboost model from R's raw vector}
 \usage{
-xgb.load.raw(buffer)
+xgb.load.raw(buffer, as_booster = FALSE)
 }
 \arguments{
 \item{buffer}{the buffer returned by xgb.save.raw}
+
+\item{as_booster}{Return the loaded model as xgb.Booster instead of xgb.Booster.handle.}
 }
 \description{
 User can generate raw memory buffer by calling xgb.save.raw
--- a/R-package/man/xgb.save.raw.Rd
+++ b/R-package/man/xgb.save.raw.Rd
@@ -5,10 +5,19 @@
 \title{Save xgboost model to R's raw vector,
 user can call xgb.load.raw to load the model back from raw vector}
 \usage{
-xgb.save.raw(model)
+xgb.save.raw(model, raw_format = "deprecated")
 }
 \arguments{
 \item{model}{the model object.}
+
+\item{raw_format}{The format for encoding the booster.  Available options are
+\itemize{
+    \item \code{json}: Encode the booster into JSON text document.
+    \item \code{ubj}:  Encode the booster into Universal Binary JSON.
+    \item \code{deprecated}: Encode the booster into old customized binary format.
+}
+
+Right now the default is \code{deprecated} but will be changed to \code{ubj} in upcoming release.}
 }
 \description{
 Save xgboost model from xgboost or xgb.train
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@@ -24,12 +24,12 @@ extern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterGetAttrNames_R(SEXP);
 extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
 extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP);
+extern SEXP XGBoosterSaveModelToRaw_R(SEXP handle, SEXP config);
 extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
 extern SEXP XGBoosterSaveJsonConfig_R(SEXP handle);
 extern SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value);
 extern SEXP XGBoosterSerializeToBuffer_R(SEXP handle);
 extern SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw);
-extern SEXP XGBoosterModelToRaw_R(SEXP);
 extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterPredictFromDMatrix_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
@@ -37,7 +37,8 @@ extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterUpdateOneIter_R(SEXP, SEXP, SEXP);
 extern SEXP XGCheckNullPtr_R(SEXP);
-extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP);
+extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP);
+extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromFile_R(SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromMat_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixGetInfo_R(SEXP, SEXP);
@@ -59,12 +60,12 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBoosterGetAttrNames_R",     (DL_FUNC) &XGBoosterGetAttrNames_R,     1},
  {"XGBoosterGetAttr_R",          (DL_FUNC) &XGBoosterGetAttr_R,          2},
  {"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2},
+  {"XGBoosterSaveModelToRaw_R",   (DL_FUNC) &XGBoosterSaveModelToRaw_R,   2},
  {"XGBoosterLoadModel_R",        (DL_FUNC) &XGBoosterLoadModel_R,        2},
  {"XGBoosterSaveJsonConfig_R",   (DL_FUNC) &XGBoosterSaveJsonConfig_R,   1},
  {"XGBoosterLoadJsonConfig_R",   (DL_FUNC) &XGBoosterLoadJsonConfig_R,   2},
  {"XGBoosterSerializeToBuffer_R",     (DL_FUNC) &XGBoosterSerializeToBuffer_R,     1},
  {"XGBoosterUnserializeFromBuffer_R", (DL_FUNC) &XGBoosterUnserializeFromBuffer_R, 2},
-  {"XGBoosterModelToRaw_R",       (DL_FUNC) &XGBoosterModelToRaw_R,       1},
  {"XGBoosterPredict_R",          (DL_FUNC) &XGBoosterPredict_R,          5},
  {"XGBoosterPredictFromDMatrix_R", (DL_FUNC) &XGBoosterPredictFromDMatrix_R, 3},
  {"XGBoosterSaveModel_R",        (DL_FUNC) &XGBoosterSaveModel_R,        2},
@@ -72,7 +73,8 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBoosterSetParam_R",         (DL_FUNC) &XGBoosterSetParam_R,         3},
  {"XGBoosterUpdateOneIter_R",    (DL_FUNC) &XGBoosterUpdateOneIter_R,    3},
  {"XGCheckNullPtr_R",            (DL_FUNC) &XGCheckNullPtr_R,            1},
-  {"XGDMatrixCreateFromCSC_R",    (DL_FUNC) &XGDMatrixCreateFromCSC_R,    4},
+  {"XGDMatrixCreateFromCSC_R",    (DL_FUNC) &XGDMatrixCreateFromCSC_R,    5},
+  {"XGDMatrixCreateFromCSR_R",    (DL_FUNC) &XGDMatrixCreateFromCSR_R,    5},
  {"XGDMatrixCreateFromFile_R",   (DL_FUNC) &XGDMatrixCreateFromFile_R,   2},
  {"XGDMatrixCreateFromMat_R",    (DL_FUNC) &XGDMatrixCreateFromMat_R,    3},
  {"XGDMatrixGetInfo_R",          (DL_FUNC) &XGDMatrixGetInfo_R,          2},
--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@@ -1,16 +1,23 @@
-// Copyright (c) 2014 by Contributors
-#include <dmlc/logging.h>
-#include <dmlc/omp.h>
+/**
+ * Copyright 2014-2022 by XGBoost Contributors
+ */
 #include <dmlc/common.h>
+#include <dmlc/omp.h>
 #include <xgboost/c_api.h>
-#include <vector>
+#include <xgboost/data.h>
+#include <xgboost/generic_parameters.h>
+#include <xgboost/logging.h>
+
+#include <cstdio>
+#include <cstring>
+#include <sstream>
 #include <string>
 #include <utility>
-#include <cstring>
-#include <cstdio>
-#include <sstream>
+#include <vector>

+#include "../../src/c_api/c_api_error.h"
 #include "../../src/common/threading_utils.h"
+
 #include "./xgboost_R.h"

 /*!
@@ -37,8 +44,21 @@
    error(XGBGetLastError());                   \
  }

+using dmlc::BeginPtr;

-using namespace dmlc;
+xgboost::GenericParameter const *BoosterCtx(BoosterHandle handle) {
+  CHECK_HANDLE();
+  auto *learner = static_cast<xgboost::Learner *>(handle);
+  CHECK(learner);
+  return learner->Ctx();
+}
+
+xgboost::GenericParameter const *DMatrixCtx(DMatrixHandle handle) {
+  CHECK_HANDLE();
+  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
+  CHECK(p_m);
+  return p_m->get()->Ctx();
+}

 XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle) {
  return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
@@ -94,18 +114,13 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
    din = REAL(mat);
  }
  std::vector<float> data(nrow * ncol);
-  dmlc::OMPException exc;
  int32_t threads = xgboost::common::OmpGetNumThreads(asInteger(n_threads));

-#pragma omp parallel for schedule(static) num_threads(threads)
-  for (omp_ulong i = 0; i < nrow; ++i) {
-    exc.Run([&]() {
-      for (size_t j = 0; j < ncol; ++j) {
-        data[i * ncol +j] = is_int ? static_cast<float>(iin[i + nrow * j]) : din[i + nrow * j];
-      }
-    });
-  }
-  exc.Rethrow();
+  xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
+    for (size_t j = 0; j < ncol; ++j) {
+      data[i * ncol + j] = is_int ? static_cast<float>(iin[i + nrow * j]) : din[i + nrow * j];
+    }
+  });
  DMatrixHandle handle;
  CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol,
                                        asReal(missing), &handle, threads));
@@ -117,7 +132,7 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
 }

 XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data,
-                                      SEXP num_row) {
+                                      SEXP num_row, SEXP n_threads) {
  SEXP ret;
  R_API_BEGIN();
  const int *p_indptr = INTEGER(indptr);
@@ -133,15 +148,11 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data,
  for (size_t i = 0; i < nindptr; ++i) {
    col_ptr_[i] = static_cast<size_t>(p_indptr[i]);
  }
-  dmlc::OMPException exc;
-  #pragma omp parallel for schedule(static)
-  for (int64_t i = 0; i < static_cast<int64_t>(ndata); ++i) {
-    exc.Run([&]() {
-      indices_[i] = static_cast<unsigned>(p_indices[i]);
-      data_[i] = static_cast<float>(p_data[i]);
-    });
-  }
-  exc.Rethrow();
+  int32_t threads = xgboost::common::OmpGetNumThreads(asInteger(n_threads));
+  xgboost::common::ParallelFor(ndata, threads, [&](xgboost::omp_ulong i) {
+    indices_[i] = static_cast<unsigned>(p_indices[i]);
+    data_[i] = static_cast<float>(p_data[i]);
+  });
  DMatrixHandle handle;
  CHECK_CALL(XGDMatrixCreateFromCSCEx(BeginPtr(col_ptr_), BeginPtr(indices_),
                                      BeginPtr(data_), nindptr, ndata,
@@ -153,6 +164,39 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data,
  return ret;
 }

+XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data,
+                                      SEXP num_col, SEXP n_threads) {
+  SEXP ret;
+  R_API_BEGIN();
+  const int *p_indptr = INTEGER(indptr);
+  const int *p_indices = INTEGER(indices);
+  const double *p_data = REAL(data);
+  size_t nindptr = static_cast<size_t>(length(indptr));
+  size_t ndata = static_cast<size_t>(length(data));
+  size_t ncol = static_cast<size_t>(INTEGER(num_col)[0]);
+  std::vector<size_t> row_ptr_(nindptr);
+  std::vector<unsigned> indices_(ndata);
+  std::vector<float> data_(ndata);
+
+  for (size_t i = 0; i < nindptr; ++i) {
+    row_ptr_[i] = static_cast<size_t>(p_indptr[i]);
+  }
+  int32_t threads = xgboost::common::OmpGetNumThreads(asInteger(n_threads));
+  xgboost::common::ParallelFor(ndata, threads, [&](xgboost::omp_ulong i) {
+    indices_[i] = static_cast<unsigned>(p_indices[i]);
+    data_[i] = static_cast<float>(p_data[i]);
+  });
+  DMatrixHandle handle;
+  CHECK_CALL(XGDMatrixCreateFromCSREx(BeginPtr(row_ptr_), BeginPtr(indices_),
+                                      BeginPtr(data_), nindptr, ndata,
+                                      ncol, &handle));
+  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
+  R_API_END();
+  UNPROTECT(1);
+  return ret;
+}
+
 XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
  SEXP ret;
  R_API_BEGIN();
@@ -186,31 +230,20 @@ XGB_DLL SEXP XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
  R_API_BEGIN();
  int len = length(array);
  const char *name = CHAR(asChar(field));
-  dmlc::OMPException exc;
+  auto ctx = DMatrixCtx(R_ExternalPtrAddr(handle));
  if (!strcmp("group", name)) {
    std::vector<unsigned> vec(len);
-    #pragma omp parallel for schedule(static)
-    for (int i = 0; i < len; ++i) {
-      exc.Run([&]() {
-        vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
-      });
-    }
-    exc.Rethrow();
-    CHECK_CALL(XGDMatrixSetUIntInfo(R_ExternalPtrAddr(handle),
-                                    CHAR(asChar(field)),
-                                    BeginPtr(vec), len));
+    xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong i) {
+      vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
+    });
+    CHECK_CALL(
+        XGDMatrixSetUIntInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), BeginPtr(vec), len));
  } else {
    std::vector<float> vec(len);
-    #pragma omp parallel for schedule(static)
-    for (int i = 0; i < len; ++i) {
-      exc.Run([&]() {
-        vec[i] = REAL(array)[i];
-      });
-    }
-    exc.Rethrow();
-    CHECK_CALL(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
-                                     CHAR(asChar(field)),
-                                     BeginPtr(vec), len));
+    xgboost::common::ParallelFor(len, ctx->Threads(),
+                                 [&](xgboost::omp_ulong i) { vec[i] = REAL(array)[i]; });
+    CHECK_CALL(
+        XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), BeginPtr(vec), len));
  }
  R_API_END();
  return R_NilValue;
@@ -313,15 +346,11 @@ XGB_DLL SEXP XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP h
      << "gradient and hess must have same length";
  int len = length(grad);
  std::vector<float> tgrad(len), thess(len);
-  dmlc::OMPException exc;
-  #pragma omp parallel for schedule(static)
-  for (int j = 0; j < len; ++j) {
-    exc.Run([&]() {
-      tgrad[j] = REAL(grad)[j];
-      thess[j] = REAL(hess)[j];
-    });
-  }
-  exc.Rethrow();
+  auto ctx = BoosterCtx(R_ExternalPtrAddr(handle));
+  xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong j) {
+    tgrad[j] = REAL(grad)[j];
+    thess[j] = REAL(hess)[j];
+  });
  CHECK_CALL(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
                                 R_ExternalPtrAddr(dtrain),
                                 BeginPtr(tgrad), BeginPtr(thess),
@@ -398,11 +427,10 @@ XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_con
    len *= out_shape[i];
  }
  r_out_result = PROTECT(allocVector(REALSXP, len));
-
-#pragma omp parallel for
-  for (omp_ulong i = 0; i < len; ++i) {
+  auto ctx = BoosterCtx(R_ExternalPtrAddr(handle));
+  xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong i) {
    REAL(r_out_result)[i] = out_result[i];
-  }
+  });

  r_out = PROTECT(allocVector(VECSXP, 2));

@@ -429,21 +457,6 @@ XGB_DLL SEXP XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
  return R_NilValue;
 }

-XGB_DLL SEXP XGBoosterModelToRaw_R(SEXP handle) {
-  SEXP ret;
-  R_API_BEGIN();
-  bst_ulong olen;
-  const char *raw;
-  CHECK_CALL(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw));
-  ret = PROTECT(allocVector(RAWSXP, olen));
-  if (olen != 0) {
-    memcpy(RAW(ret), raw, olen);
-  }
-  R_API_END();
-  UNPROTECT(1);
-  return ret;
-}
-
 XGB_DLL SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
  R_API_BEGIN();
  CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle),
@@ -453,6 +466,22 @@ XGB_DLL SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
  return R_NilValue;
 }

+XGB_DLL SEXP XGBoosterSaveModelToRaw_R(SEXP handle, SEXP json_config) {
+  SEXP ret;
+  R_API_BEGIN();
+  bst_ulong olen;
+  char const *c_json_config = CHAR(asChar(json_config));
+  char const *raw;
+  CHECK_CALL(XGBoosterSaveModelToBuffer(R_ExternalPtrAddr(handle), c_json_config, &olen, &raw))
+  ret = PROTECT(allocVector(RAWSXP, olen));
+  if (olen != 0) {
+    std::memcpy(RAW(ret), raw, olen);
+  }
+  R_API_END();
+  UNPROTECT(1);
+  return ret;
+}
+
 XGB_DLL SEXP XGBoosterSaveJsonConfig_R(SEXP handle) {
  const char* ret;
  R_API_BEGIN();
@@ -599,7 +628,6 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {
  CHECK_CALL(XGBoosterFeatureScore(R_ExternalPtrAddr(handle), c_json_config,
                                   &out_n_features, &out_features,
                                   &out_dim, &out_shape, &out_scores));
-
  out_shape_sexp = PROTECT(allocVector(INTSXP, out_dim));
  size_t len = 1;
  for (size_t i = 0; i < out_dim; ++i) {
@@ -608,10 +636,10 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {
  }

  out_scores_sexp = PROTECT(allocVector(REALSXP, len));
-#pragma omp parallel for
-  for (omp_ulong i = 0; i < len; ++i) {
+  auto ctx = BoosterCtx(R_ExternalPtrAddr(handle));
+  xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong i) {
    REAL(out_scores_sexp)[i] = out_scores[i];
-  }
+  });

  out_features_sexp = PROTECT(allocVector(STRSXP, out_n_features));
  for (size_t i = 0; i < out_n_features; ++i) {
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2014 (c) by Contributors
+ * Copyright 2014-2022 by XGBoost Contributors
 * \file xgboost_R.h
 * \author Tianqi Chen
 * \brief R wrapper of xgboost
@@ -59,12 +59,23 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat,
 * \param indices row indices
 * \param data content of the data
 * \param num_row numer of rows (when it's set to 0, then guess from data)
+ * \param n_threads Number of threads used to construct DMatrix from csc matrix.
 * \return created dmatrix
 */
-XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
-                                      SEXP indices,
-                                      SEXP data,
-                                      SEXP num_row);
+XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_row,
+                                      SEXP n_threads);
+
+/*!
+ * \brief create a matrix content from CSR format
+ * \param indptr pointer to row headers
+ * \param indices column indices
+ * \param data content of the data
+ * \param num_col numer of columns (when it's set to 0, then guess from data)
+ * \param n_threads Number of threads used to construct DMatrix from csr matrix.
+ * \return created dmatrix
+ */
+XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_col,
+                                      SEXP n_threads);

 /*!
 * \brief create a new dmatrix from sliced content of existing matrix
@@ -209,11 +220,21 @@ XGB_DLL SEXP XGBoosterSaveModel_R(SEXP handle, SEXP fname);
 XGB_DLL SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw);

 /*!
- * \brief save model into R's raw array
+ * \brief Save model into R's raw array
+ *
 * \param handle handle
- * \return raw array
+ * \param json_config JSON encoded string storing parameters for the function.  Following
+ *                    keys are expected in the JSON document:
+ *
+ *     "format": str
+ *       - json: Output booster will be encoded as JSON.
+ *       - ubj:  Output booster will be encoded as Univeral binary JSON.
+ *       - deprecated: Output booster will be encoded as old custom binary format.  Do now use
+ *         this format except for compatibility reasons.
+ *
+ * \return Raw array
 */
-XGB_DLL SEXP XGBoosterModelToRaw_R(SEXP handle);
+XGB_DLL SEXP XGBoosterSaveModelToRaw_R(SEXP handle, SEXP json_config);

 /*!
 * \brief Save internal parameters as a JSON string
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -1,4 +1,5 @@
 require(xgboost)
+library(Matrix)

 context("basic functions")

@@ -459,3 +460,18 @@ test_that("strict_shape works", {
  test_iris()
  test_agaricus()
 })
+
+test_that("'predict' accepts CSR data", {
+  X <- agaricus.train$data
+  y <- agaricus.train$label
+  x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
+  x_csr <- as(x_csc, "RsparseMatrix")
+  x_spv <- as(x_csc, "sparseVector")
+  bst <- xgboost(data = X, label = y, objective = "binary:logistic",
+                 nrounds = 5L, verbose = FALSE)
+  p_csc <- predict(bst, x_csc)
+  p_csr <- predict(bst, x_csr)
+  p_spv <- predict(bst, x_spv)
+  expect_equal(p_csc, p_csr)
+  expect_equal(p_csc, p_spv)
+})
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@@ -27,6 +27,7 @@ test_that("xgb.DMatrix: saving, loading", {
  # save to a local file
  dtest1 <- xgb.DMatrix(test_data, label = test_label)
  tmp_file <- tempfile('xgb.DMatrix_')
+  on.exit(unlink(tmp_file))
  expect_true(xgb.DMatrix.save(dtest1, tmp_file))
  # read from a local file
  expect_output(dtest3 <- xgb.DMatrix(tmp_file), "entries loaded from")
@@ -41,7 +42,6 @@ test_that("xgb.DMatrix: saving, loading", {
  dtest4 <- xgb.DMatrix(tmp_file, silent = TRUE)
  expect_equal(dim(dtest4), c(3, 4))
  expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))
-  unlink(tmp_file)
 })

 test_that("xgb.DMatrix: getinfo & setinfo", {
--- a/R-package/tests/testthat/test_feature_weights.R
+++ b/R-package/tests/testthat/test_feature_weights.R
@@ -0,0 +1,27 @@
+library(xgboost)
+
+context("feature weights")
+
+test_that("training with feature weights works", {
+  nrows <- 1000
+  ncols <- 9
+  set.seed(2022)
+  x <- matrix(rnorm(nrows * ncols), nrow = nrows)
+  y <- rowSums(x)
+  weights <- seq(from = 1, to = ncols)
+
+  test <- function(tm) {
+    names <- paste0("f", 1:ncols)
+    xy <- xgb.DMatrix(data = x, label = y, feature_weights = weights)
+    params <- list(colsample_bynode = 0.4, tree_method = tm, nthread = 1)
+    model <- xgb.train(params = params, data = xy, nrounds = 32)
+    importance <- xgb.importance(model = model, feature_names = names)
+    expect_equal(dim(importance), c(ncols, 4))
+    importance <- importance[order(importance$Feature)]
+    expect_lt(importance[1, Frequency], importance[9, Frequency])
+  }
+
+  for (tm in c("hist", "approx", "exact")) {
+    test(tm)
+  }
+})
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -340,6 +340,16 @@ test_that("xgb.importance works with and without feature names", {
    imp
  }
  expect_equal(importance_from_dump(), importance, tolerance = 1e-6)
+
+  ## decision stump
+  m <- xgboost::xgboost(
+    data = as.matrix(data.frame(x = c(0, 1))),
+    label = c(1, 2),
+    nrounds = 1
+  )
+  df <- xgb.model.dt.tree(model = m)
+  expect_equal(df$Feature, "Leaf")
+  expect_equal(df$Cover, 2)
 })

 test_that("xgb.importance works with GLM model", {
--- a/R-package/tests/testthat/test_io.R
+++ b/R-package/tests/testthat/test_io.R
@@ -0,0 +1,30 @@
+context("Test model IO.")
+## some other tests are in test_basic.R
+require(xgboost)
+require(testthat)
+
+data(agaricus.train, package = "xgboost")
+data(agaricus.test, package = "xgboost")
+train <- agaricus.train
+test <- agaricus.test
+
+test_that("load/save raw works", {
+  nrounds <- 8
+  booster <- xgboost(
+    data = train$data, label = train$label,
+    nrounds = nrounds, objective = "binary:logistic"
+  )
+
+  json_bytes <- xgb.save.raw(booster, raw_format = "json")
+  ubj_bytes <- xgb.save.raw(booster, raw_format = "ubj")
+  old_bytes <- xgb.save.raw(booster, raw_format = "deprecated")
+
+  from_json <- xgb.load.raw(json_bytes, as_booster = TRUE)
+  from_ubj <- xgb.load.raw(ubj_bytes, as_booster = TRUE)
+
+  json2old <- xgb.save.raw(from_json, raw_format = "deprecated")
+  ubj2old <- xgb.save.raw(from_ubj, raw_format = "deprecated")
+
+  expect_equal(json2old, ubj2old)
+  expect_equal(json2old, old_bytes)
+})
--- a/amalgamation/xgboost-all0.cc
+++ b/amalgamation/xgboost-all0.cc
@@ -33,8 +33,8 @@
 #include "../src/gbm/gblinear_model.cc"

 // data
-#include "../src/data/data.cc"
 #include "../src/data/simple_dmatrix.cc"
+#include "../src/data/data.cc"
 #include "../src/data/sparse_page_raw_format.cc"
 #include "../src/data/ellpack_page.cc"
 #include "../src/data/gradient_index.cc"
@@ -48,16 +48,18 @@
 #include "../src/predictor/cpu_predictor.cc"

 // trees
+#include "../src/tree/constraints.cc"
+#include "../src/tree/hist/param.cc"
 #include "../src/tree/param.cc"
 #include "../src/tree/tree_model.cc"
 #include "../src/tree/tree_updater.cc"
+#include "../src/tree/updater_approx.cc"
 #include "../src/tree/updater_colmaker.cc"
-#include "../src/tree/updater_quantile_hist.cc"
+#include "../src/tree/updater_histmaker.cc"
 #include "../src/tree/updater_prune.cc"
+#include "../src/tree/updater_quantile_hist.cc"
 #include "../src/tree/updater_refresh.cc"
 #include "../src/tree/updater_sync.cc"
-#include "../src/tree/updater_histmaker.cc"
-#include "../src/tree/constraints.cc"

 // linear
 #include "../src/linear/linear_updater.cc"
@@ -75,9 +77,11 @@
 #include "../src/common/quantile.cc"
 #include "../src/common/host_device_vector.cc"
 #include "../src/common/hist_util.cc"
-#include "../src/common/json.cc"
 #include "../src/common/io.cc"
+#include "../src/common/json.cc"
+#include "../src/common/pseudo_huber.cc"
 #include "../src/common/survival_util.cc"
+#include "../src/common/threading_utils.cc"
 #include "../src/common/version.cc"

 // c_api
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -15,7 +15,7 @@ endfunction(auto_source_group)

 # Force static runtime for MSVC
 function(msvc_use_static_runtime)
-  if(MSVC)
+  if(MSVC AND (NOT BUILD_SHARED_LIBS) AND (NOT FORCE_SHARED_CRT))
      set(variables
          CMAKE_C_FLAGS_DEBUG
          CMAKE_C_FLAGS_MINSIZEREL
@@ -91,9 +91,9 @@ function(format_gencode_flags flags out)
  # Set up architecture flags
  if(NOT flags)
    if (CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
-      set(flags "50;52;60;61;70;75;80;86")
+      set(flags "52;60;61;70;75;80;86")
    elseif (CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
-      set(flags "35;50;52;60;61;70;75;80")
+      set(flags "52;60;61;70;75;80")
    elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
      set(flags "35;50;52;60;61;70;75")
    elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
@@ -105,9 +105,10 @@ function(format_gencode_flags flags out)

  if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
    cmake_policy(SET CMP0104 NEW)
-    foreach(ver ${flags})
-      set(CMAKE_CUDA_ARCHITECTURES "${ver}-real;${ver}-virtual;${CMAKE_CUDA_ARCHITECTURES}")
-    endforeach()
+    list(POP_BACK flags latest_arch)
+    list(TRANSFORM flags APPEND "-real")
+    list(APPEND flags ${latest_arch})
+    set(CMAKE_CUDA_ARCHITECTURES ${flags})
    set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
    message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
  else()
@@ -136,7 +137,8 @@ function(xgboost_set_cuda_flags target)
    $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
    $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
    $<$<COMPILE_LANGUAGE:CUDA>:${GEN_CODE}>
-    $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>)
+    $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
+    $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)

  if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
    set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
--- a/demo/CLI/binary_classification/mapfeat.py
+++ b/demo/CLI/binary_classification/mapfeat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3

 def loadfmap( fname ):
    fmap = {}
--- a/demo/CLI/binary_classification/mknfold.py
+++ b/demo/CLI/binary_classification/mknfold.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import sys
 import random

@@ -26,4 +27,3 @@ for l in fi:
 fi.close()
 ftr.close()
 fte.close()
-
--- a/demo/CLI/regression/mapfeat.py
+++ b/demo/CLI/regression/mapfeat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3

 fo = open('machine.txt', 'w')
 cnt = 6
--- a/demo/CLI/regression/mknfold.py
+++ b/demo/CLI/regression/mknfold.py
@@ -1,4 +1,5 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
+
 import sys
 import random

--- a/demo/CLI/yearpredMSD/csv2libsvm.py
+++ b/demo/CLI/yearpredMSD/csv2libsvm.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import sys
 fo = open(sys.argv[2], 'w')

--- a/demo/README.md
+++ b/demo/README.md
@@ -136,7 +136,7 @@ Send a PR to add a one sentence description:)

 - XGBoost is used in [Kaggle Script](https://www.kaggle.com/scripts) to solve data science challenges.
 - Distribute XGBoost as Rest API server from Jupyter notebook with [BentoML](https://github.com/bentoml/bentoml). [Link to notebook](https://github.com/bentoml/BentoML/blob/master/examples/xgboost-predict-titanic-survival/XGBoost-titanic-survival-prediction.ipynb)
- [Seldon predictive service powered by XGBoost](http://docs.seldon.io/iris-demo.html)
+- [Seldon predictive service powered by XGBoost](https://docs.seldon.io/projects/seldon-core/en/latest/servers/xgboost.html)
 - XGBoost Distributed is used in [ODPS Cloud Service by Alibaba](https://yq.aliyun.com/articles/6355) (in Chinese)
 - XGBoost is incoporated as part of [Graphlab Create](https://dato.com/products/create/) for scalable machine learning.
 - [Hanjing Su](https://www.52cs.org) from Tencent data platform team: "We use distributed XGBoost for click through prediction in wechat shopping and lookalikes. The problems involve hundreds millions of users and thousands of features. XGBoost is cleanly designed and can be easily integrated into our production environment, reducing our cost in developments."
@@ -146,7 +146,7 @@ Send a PR to add a one sentence description:)

 - [BayesBoost](https://github.com/mpearmain/BayesBoost) - Bayesian Optimization using xgboost and sklearn API
 - [FLAML](https://github.com/microsoft/FLAML) - An open source AutoML library 
-designed to automatically produce accurate machine learning models with low computational cost. FLAML includes [XGBoost as one of the default learners](https://github.com/microsoft/FLAML/blob/main/flaml/model.py) and can also be used as a fast hyperparameter tuning tool for XGBoost ([code example](https://github.com/microsoft/FLAML/blob/main/notebook/flaml_xgboost.ipynb)).
+designed to automatically produce accurate machine learning models with low computational cost. FLAML includes [XGBoost as one of the default learners](https://github.com/microsoft/FLAML/blob/main/flaml/model.py) and can also be used as a fast hyperparameter tuning tool for XGBoost ([code example](https://microsoft.github.io/FLAML/docs/Examples/AutoML-for-XGBoost)).
 - [gp_xgboost_gridsearch](https://github.com/vatsan/gp_xgboost_gridsearch) - In-database parallel grid-search for XGBoost on [Greenplum](https://github.com/greenplum-db/gpdb) using PL/Python
 - [tpot](https://github.com/rhiever/tpot) - A Python tool that automatically creates and optimizes machine learning pipelines using genetic programming.

--- a/demo/dask/README.md
+++ b/demo/dask/README.md
@@ -1,6 +0,0 @@
-Dask
-====
-
-This directory contains some demonstrations for using `dask` with `XGBoost`.
-For an overview, see
-https://xgboost.readthedocs.io/en/latest/tutorials/dask.html .
--- a/demo/dask/README.rst
+++ b/demo/dask/README.rst
@@ -0,0 +1,5 @@
+XGBoost Dask Feature Walkthrough
+================================
+
+This directory contains some demonstrations for using `dask` with `XGBoost`.  For an
+overview, see :doc:`/tutorials/dask`
--- a/demo/dask/cpu_survival.py
+++ b/demo/dask/cpu_survival.py
@@ -1,3 +1,9 @@
+"""
+Example of training survival model with Dask on CPU
+===================================================
+
+"""
+
 import xgboost as xgb
 import os
 from xgboost.dask import DaskDMatrix
--- a/demo/dask/cpu_training.py
+++ b/demo/dask/cpu_training.py
@@ -1,3 +1,8 @@
+"""
+Example of training with Dask on CPU
+====================================
+
+"""
 import xgboost as xgb
 from xgboost.dask import DaskDMatrix
 from dask.distributed import Client
--- a/demo/dask/dask_callbacks.py
+++ b/demo/dask/dask_callbacks.py
@@ -1,4 +1,7 @@
-"""Example of using callbacks in Dask"""
+"""
+Example of using callbacks with Dask
+====================================
+"""
 import numpy as np
 import xgboost as xgb
 from xgboost.dask import DaskDMatrix
--- a/demo/dask/gpu_training.py
+++ b/demo/dask/gpu_training.py
@@ -1,3 +1,7 @@
+"""
+Example of training with Dask on GPU
+====================================
+"""
 from dask_cuda import LocalCUDACluster
 from dask.distributed import Client
 from dask import array as da
--- a/demo/dask/sklearn_cpu_training.py
+++ b/demo/dask/sklearn_cpu_training.py
@@ -1,6 +1,7 @@
-'''Dask interface demo:
-
-Use scikit-learn regressor interface with CPU histogram tree method.'''
+"""
+Use scikit-learn regressor interface with CPU histogram tree method
+===================================================================
+"""
 from dask.distributed import Client
 from dask.distributed import LocalCluster
 from dask import array as da
@@ -16,7 +17,7 @@ def main(client):
    y = da.random.random(m, partition_size)

    regressor = xgboost.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
-    regressor.set_params(tree_method='hist')
+    regressor.set_params(tree_method="hist")
    # assigning client here is optional
    regressor.client = client

@@ -26,13 +27,13 @@ def main(client):
    bst = regressor.get_booster()
    history = regressor.evals_result()

-    print('Evaluation history:', history)
+    print("Evaluation history:", history)
    # returned prediction is always a dask array.
    assert isinstance(prediction, da.Array)
-    return bst                  # returning the trained model
+    return bst  # returning the trained model


-if __name__ == '__main__':
+if __name__ == "__main__":
    # or use other clusters for scaling
    with LocalCluster(n_workers=4, threads_per_worker=1) as cluster:
        with Client(cluster) as client:
--- a/demo/dask/sklearn_gpu_training.py
+++ b/demo/dask/sklearn_gpu_training.py
@@ -1,6 +1,7 @@
-'''Dask interface demo:
-
-Use scikit-learn regressor interface with GPU histogram tree method.'''
+"""
+Use scikit-learn regressor interface with GPU histogram tree method
+===================================================================
+"""

 from dask.distributed import Client
 # It's recommended to use dask_cuda for GPU assignment
--- a/demo/guide-python/README.md
+++ b/demo/guide-python/README.md
@@ -1,18 +0,0 @@
-XGBoost Python Feature Walkthrough
-==================================
-* [Basic walkthrough of wrappers](basic_walkthrough.py)
-* [Customize loss function, and evaluation metric](custom_objective.py)
-* [Re-implement RMSLE as customized metric and objective](custom_rmsle.py)
-* [Re-Implement `multi:softmax` objective as customized objective](custom_softmax.py)
-* [Boosting from existing prediction](boost_from_prediction.py)
-* [Predicting using first n trees](predict_first_ntree.py)
-* [Generalized Linear Model](generalized_linear_model.py)
-* [Cross validation](cross_validation.py)
-* [Predicting leaf indices](predict_leaf_indices.py)
-* [Sklearn Wrapper](sklearn_examples.py)
-* [Sklearn Parallel](sklearn_parallel.py)
-* [Sklearn access evals result](sklearn_evals_result.py)
-* [Access evals result](evals_result.py)
-* [External Memory](external_memory.py)
-* [Training continuation](continuation.py)
-* [Feature weights for column sampling](feature_weights.py)
--- a/demo/guide-python/README.rst
+++ b/demo/guide-python/README.rst
@@ -0,0 +1,5 @@
+XGBoost Python Feature Walkthrough
+==================================
+
+
+This is a collection of examples for using the XGBoost Python package.
--- a/demo/guide-python/basic_walkthrough.py
+++ b/demo/guide-python/basic_walkthrough.py
@@ -1,3 +1,7 @@
+"""
+Getting started with XGBoost
+============================
+"""
 import numpy as np
 import scipy.sparse
 import pickle
--- a/demo/guide-python/boost_from_prediction.py
+++ b/demo/guide-python/boost_from_prediction.py
@@ -1,3 +1,7 @@
+"""
+Demo for boosting from prediction
+=================================
+"""
 import os
 import xgboost as xgb

--- a/demo/guide-python/callbacks.py
+++ b/demo/guide-python/callbacks.py
@@ -1,5 +1,6 @@
 '''
-Demo for using and defining callback functions.
+Demo for using and defining callback functions
+==============================================

    .. versionadded:: 1.3.0
 '''
--- a/demo/guide-python/cat_in_the_dat.py
+++ b/demo/guide-python/cat_in_the_dat.py
@@ -0,0 +1,124 @@
+"""
+Train XGBoost with cat_in_the_dat dataset
+=========================================
+
+A simple demo for categorical data support using dataset from Kaggle categorical data
+tutorial.
+
+The excellent tutorial is at:
+https://www.kaggle.com/shahules/an-overview-of-encoding-techniques
+
+And the data can be found at:
+https://www.kaggle.com/shahules/an-overview-of-encoding-techniques/data
+
+Also, see the tutorial for using XGBoost with categorical data:
+:doc:`/tutorials/categorical`.
+
+    .. versionadded 1.6.0
+
+"""
+
+from __future__ import annotations
+from time import time
+import os
+from tempfile import TemporaryDirectory
+
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import roc_auc_score
+
+import xgboost as xgb
+
+
+def load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]:
+    """Assuming you have already downloaded the data into `input` directory."""
+
+    df_train = pd.read_csv("./input/cat-in-the-dat/train.csv")
+
+    print(
+        "train data set has got {} rows and {} columns".format(
+            df_train.shape[0], df_train.shape[1]
+        )
+    )
+    X = df_train.drop(["target"], axis=1)
+    y = df_train["target"]
+
+    for i in range(0, 5):
+        X["bin_" + str(i)] = X["bin_" + str(i)].astype("category")
+
+    for i in range(0, 5):
+        X["nom_" + str(i)] = X["nom_" + str(i)].astype("category")
+
+    for i in range(5, 10):
+        X["nom_" + str(i)] = X["nom_" + str(i)].apply(int, base=16)
+
+    for i in range(0, 6):
+        X["ord_" + str(i)] = X["ord_" + str(i)].astype("category")
+
+    print(
+        "train data set has got {} rows and {} columns".format(X.shape[0], X.shape[1])
+    )
+    return X, y
+
+
+params = {
+    "tree_method": "gpu_hist",
+    "use_label_encoder": False,
+    "n_estimators": 32,
+    "colsample_bylevel": 0.7,
+}
+
+
+def categorical_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> None:
+    """Train using builtin categorical data support from XGBoost"""
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, random_state=1994, test_size=0.2
+    )
+    # Specify `enable_categorical` to True.
+    clf = xgb.XGBClassifier(
+        **params,
+        eval_metric="auc",
+        enable_categorical=True,
+        max_cat_to_onehot=1,  # We use optimal partitioning exclusively
+    )
+    clf.fit(X_train, y_train, eval_set=[(X_test, y_test), (X_train, y_train)])
+    clf.save_model(os.path.join(output_dir, "categorical.json"))
+
+    y_score = clf.predict_proba(X_test)[:, 1]  # proba of positive samples
+    auc = roc_auc_score(y_test, y_score)
+    print("AUC of using builtin categorical data support:", auc)
+
+
+def onehot_encoding_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> None:
+    """Train using one-hot encoded data."""
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, random_state=42, test_size=0.2
+    )
+    # Specify `enable_categorical` to False as we are using encoded data.
+    clf = xgb.XGBClassifier(**params, eval_metric="auc", enable_categorical=False)
+    clf.fit(
+        X_train,
+        y_train,
+        eval_set=[(X_test, y_test), (X_train, y_train)],
+    )
+    clf.save_model(os.path.join(output_dir, "one-hot.json"))
+
+    y_score = clf.predict_proba(X_test)[:, 1]  # proba of positive samples
+    auc = roc_auc_score(y_test, y_score)
+    print("AUC of using onehot encoding:", auc)
+
+
+if __name__ == "__main__":
+    X, y = load_cat_in_the_dat()
+
+    with TemporaryDirectory() as tmpdir:
+        start = time()
+        categorical_model(X, y, tmpdir)
+        end = time()
+        print("Duration:categorical", end - start)
+
+        X = pd.get_dummies(X)
+        start = time()
+        onehot_encoding_model(X, y, tmpdir)
+        end = time()
+        print("Duration:onehot", end - start)
--- a/demo/guide-python/categorical.py
+++ b/demo/guide-python/categorical.py
@@ -1,9 +1,17 @@
-"""Experimental support for categorical data.  After 1.5 XGBoost `gpu_hist` tree method
-has experimental support for one-hot encoding based tree split.
+"""
+Getting started with categorical data
+=====================================
+
+Experimental support for categorical data.  After 1.5 XGBoost `gpu_hist` tree method has
+experimental support for one-hot encoding based tree split, and in 1.6 `approx` support
+was added.

 In before, users need to run an encoder themselves before passing the data into XGBoost,
-which creates a sparse matrix and potentially increase memory usage.  This demo showcases
-the experimental categorical data support, more advanced features are planned.
+which creates a sparse matrix and potentially increase memory usage.  This demo
+showcases the experimental categorical data support, more advanced features are planned.
+
+Also, see :doc:`the tutorial </tutorials/categorical>` for using XGBoost with
+categorical data.

    .. versionadded:: 1.5.0

@@ -47,8 +55,11 @@ def main() -> None:
    # For scikit-learn interface, the input data must be pandas DataFrame or cudf
    # DataFrame with categorical features
    X, y = make_categorical(100, 10, 4, False)
-    # Specify `enable_categorical` to True.
-    reg = xgb.XGBRegressor(tree_method="gpu_hist", enable_categorical=True)
+    # Specify `enable_categorical` to True, also we use onehot encoding based split
+    # here for demonstration. For details see the document of `max_cat_to_onehot`.
+    reg = xgb.XGBRegressor(
+        tree_method="gpu_hist", enable_categorical=True, max_cat_to_onehot=5
+    )
    reg.fit(X, y, eval_set=[(X, y)])

    # Pass in already encoded data
--- a/demo/guide-python/continuation.py
+++ b/demo/guide-python/continuation.py
@@ -1,5 +1,6 @@
 """
-Demo for training continuation.
+Demo for training continuation
+==============================
 """

 from sklearn.datasets import load_breast_cancer
--- a/demo/guide-python/cross_validation.py
+++ b/demo/guide-python/cross_validation.py
@@ -1,3 +1,7 @@
+"""
+Demo for using cross validation
+===============================
+"""
 import os
 import numpy as np
 import xgboost as xgb
--- a/demo/guide-python/custom_objective.py
+++ b/demo/guide-python/custom_objective.py
@@ -1,61 +0,0 @@
-###
-# advanced: customized loss function
-#
-import os
-import numpy as np
-import xgboost as xgb
-
-print('start running example to used customized objective function')
-
-CURRENT_DIR = os.path.dirname(__file__)
-dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
-dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
-
-# note: what we are getting is margin value in prediction you must know what
-# you are doing
-param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:logistic'}
-watchlist = [(dtest, 'eval'), (dtrain, 'train')]
-num_round = 10
-
-
-# user define objective function, given prediction, return gradient and second
-# order gradient this is log likelihood loss
-def logregobj(preds, dtrain):
-    labels = dtrain.get_label()
-    preds = 1.0 / (1.0 + np.exp(-preds))  # transform raw leaf weight
-    grad = preds - labels
-    hess = preds * (1.0 - preds)
-    return grad, hess
-
-
-# user defined evaluation function, return a pair metric_name, result
-
-# NOTE: when you do customized loss function, the default prediction value is
-# margin, which means the prediction is score before logistic transformation.
-def evalerror(preds, dtrain):
-    labels = dtrain.get_label()
-    preds = 1.0 / (1.0 + np.exp(-preds))  # transform raw leaf weight
-    # return a pair metric_name, result. The metric name must not contain a
-    # colon (:) or a space
-    return 'my-error', float(sum(labels != (preds > 0.5))) / len(labels)
-
-
-py_evals_result = {}
-
-# training with customized objective, we can also do step by step training
-# simply look at training.py's implementation of train
-py_params = param.copy()
-py_params.update({'disable_default_eval_metric': True})
-py_logreg = xgb.train(py_params, dtrain, num_round, watchlist, obj=logregobj,
-                      feval=evalerror, evals_result=py_evals_result)
-
-evals_result = {}
-params = param.copy()
-params.update({'eval_metric': 'error'})
-logreg = xgb.train(params, dtrain, num_boost_round=num_round, evals=watchlist,
-                   evals_result=evals_result)
-
-
-for i in range(len(py_evals_result['train']['my-error'])):
-    np.testing.assert_almost_equal(py_evals_result['train']['my-error'],
-                                   evals_result['train']['error'])
--- a/demo/guide-python/custom_rmsle.py
+++ b/demo/guide-python/custom_rmsle.py
@@ -1,16 +1,19 @@
-'''Demo for defining customized metric and objective.  Notice that for
-simplicity reason weight is not used in following example. In this
-script, we implement the Squared Log Error (SLE) objective and RMSLE metric as customized
-functions, then compare it with native implementation in XGBoost.
+"""
+Demo for defining a custom regression objective and metric
+==========================================================

-See doc/tutorials/custom_metric_obj.rst for a step by step
-walkthrough, with other details.
+Demo for defining customized metric and objective.  Notice that for simplicity reason
+weight is not used in following example. In this script, we implement the Squared Log
+Error (SLE) objective and RMSLE metric as customized functions, then compare it with
+native implementation in XGBoost.

-The `SLE` objective reduces impact of outliers in training dataset,
-hence here we also compare its performance with standard squared
-error.
+See :doc:`/tutorials/custom_metric_obj` for a step by step walkthrough, with other
+details.

-'''
+The `SLE` objective reduces impact of outliers in training dataset, hence here we also
+compare its performance with standard squared error.
+
+"""
 import numpy as np
 import xgboost as xgb
 from typing import Tuple, Dict, List
@@ -144,7 +147,7 @@ def py_rmsle(dtrain: xgb.DMatrix, dtest: xgb.DMatrix) -> Dict:
              dtrain=dtrain,
              num_boost_round=kBoostRound,
              obj=squared_log,
-              feval=rmsle,
+              custom_metric=rmsle,
              evals=[(dtrain, 'dtrain'), (dtest, 'dtest')],
              evals_result=results)

@@ -171,9 +174,6 @@ def plot_history(rmse_evals, rmsle_evals, py_rmsle_evals):
    ax2.plot(x, py_rmsle_evals['dtest']['PyRMSLE'], label='test-PyRMSLE')
    ax2.legend()

-    plt.show()
-    plt.close()
-

 def main(args):
    dtrain, dtest = generate_data()
@@ -183,9 +183,10 @@ def main(args):

    if args.plot != 0:
        plot_history(rmse_evals, rmsle_evals, py_rmsle_evals)
+        plt.show()


-if __name__ == '__main__':
+if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Arguments for custom RMSLE objective function demo.')
    parser.add_argument(
--- a/demo/guide-python/custom_softmax.py
+++ b/demo/guide-python/custom_softmax.py
@@ -1,7 +1,12 @@
-'''Demo for creating customized multi-class objective function.  This demo is
-only applicable after (excluding) XGBoost 1.0.0, as before this version XGBoost
-returns transformed prediction for multi-class objective function.  More
-details in comments.
+'''
+Demo for creating customized multi-class objective function
+===========================================================
+
+This demo is only applicable after (excluding) XGBoost 1.0.0, as before this version
+XGBoost returns transformed prediction for multi-class objective function.  More details
+in comments.
+
+See :doc:`/tutorials/custom_metric_obj` for detailed tutorial and notes.

 '''

@@ -95,7 +100,12 @@ def predict(booster: xgb.Booster, X):

 def merror(predt: np.ndarray, dtrain: xgb.DMatrix):
    y = dtrain.get_label()
-    # Like custom objective, the predt is untransformed leaf weight
+    # Like custom objective, the predt is untransformed leaf weight when custom objective
+    # is provided.
+
+    # With the use of `custom_metric` parameter in train function, custom metric receives
+    # raw input only when custom objective is also being used.  Otherwise custom metric
+    # will receive transformed prediction.
    assert predt.shape == (kRows, kClasses)
    out = np.zeros(kRows)
    for r in range(predt.shape[0]):
@@ -134,7 +144,7 @@ def main(args):
                               m,
                               num_boost_round=kRounds,
                               obj=softprob_obj,
-                               feval=merror,
+                               custom_metric=merror,
                               evals_result=custom_results,
                               evals=[(m, 'train')])

@@ -143,6 +153,7 @@ def main(args):
    native_results = {}
    # Use the same objective function defined in XGBoost.
    booster_native = xgb.train({'num_class': kClasses,
+                                "objective": "multi:softmax",
                                'eval_metric': 'merror'},
                               m,
                               num_boost_round=kRounds,
--- a/demo/guide-python/evals_result.py
+++ b/demo/guide-python/evals_result.py
@@ -1,6 +1,7 @@
-##
-#  This script demonstrate how to access the eval metrics in xgboost
-##
+"""
+This script demonstrate how to access the eval metrics
+======================================================
+"""
 import os
 import xgboost as xgb

--- a/demo/guide-python/external_memory.py
+++ b/demo/guide-python/external_memory.py
@@ -1,30 +1,37 @@
-"""Experimental support for external memory.  This is similar to the one in
-`quantile_data_iterator.py`, but for external memory instead of Quantile DMatrix.  The
-feature is not ready for production use yet.
+"""
+Experimental support for external memory
+========================================
+
+This is similar to the one in `quantile_data_iterator.py`, but for external memory
+instead of Quantile DMatrix.  The feature is not ready for production use yet.

    .. versionadded:: 1.5.0

+
+See :doc:`the tutorial </tutorials/external_memory>` for more details.
+
 """
 import os
 import xgboost
 from typing import Callable, List, Tuple
+from sklearn.datasets import make_regression
 import tempfile
 import numpy as np


 def make_batches(
-    n_samples_per_batch: int, n_features: int, n_batches: int
-) -> Tuple[List[np.ndarray], List[np.ndarray]]:
-    """Generate random batches."""
-    X = []
-    y = []
+    n_samples_per_batch: int, n_features: int, n_batches: int, tmpdir: str,
+) -> List[Tuple[str, str]]:
+    files: List[Tuple[str, str]] = []
    rng = np.random.RandomState(1994)
    for i in range(n_batches):
-        _X = rng.randn(n_samples_per_batch, n_features)
-        _y = rng.randn(n_samples_per_batch)
-        X.append(_X)
-        y.append(_y)
-    return X, y
+        X, y = make_regression(n_samples_per_batch, n_features, random_state=rng)
+        X_path = os.path.join(tmpdir, "X-" + str(i) + ".npy")
+        y_path = os.path.join(tmpdir, "y-" + str(i) + ".npy")
+        np.save(X_path, X)
+        np.save(y_path, y)
+        files.append((X_path, y_path))
+    return files


 class Iterator(xgboost.DataIter):
@@ -38,8 +45,8 @@ class Iterator(xgboost.DataIter):

    def load_file(self) -> Tuple[np.ndarray, np.ndarray]:
        X_path, y_path = self._file_paths[self._it]
-        X = np.loadtxt(X_path)
-        y = np.loadtxt(y_path)
+        X = np.load(X_path)
+        y = np.load(y_path)
        assert X.shape[0] == y.shape[0]
        return X, y

@@ -66,24 +73,21 @@ class Iterator(xgboost.DataIter):

 def main(tmpdir: str) -> xgboost.Booster:
    # generate some random data for demo
-    batches = make_batches(1024, 17, 31)
-    files = []
-    for i, (X, y) in enumerate(zip(*batches)):
-        X_path = os.path.join(tmpdir, "X-" + str(i) + ".txt")
-        np.savetxt(X_path, X)
-        y_path = os.path.join(tmpdir, "y-" + str(i) + ".txt")
-        np.savetxt(y_path, y)
-        files.append((X_path, y_path))
-
+    files = make_batches(1024, 17, 31, tmpdir)
    it = Iterator(files)
    # For non-data arguments, specify it here once instead of passing them by the `next`
    # method.
    missing = np.NaN
    Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)

-    # Other tree methods including ``hist`` and ``gpu_hist`` also work, but has some
-    # caveats.  This is still an experimental feature.
-    booster = xgboost.train({"tree_method": "approx"}, Xy)
+    # Other tree methods including ``hist`` and ``gpu_hist`` also work, see tutorial in
+    # doc for details.
+    booster = xgboost.train(
+        {"tree_method": "approx", "max_depth": 2},
+        Xy,
+        evals=[(Xy, "Train")],
+        num_boost_round=10,
+    )
    return booster


--- a/demo/guide-python/feature_weights.py
+++ b/demo/guide-python/feature_weights.py
@@ -1,4 +1,6 @@
-'''Using feature weight to change column sampling.
+'''
+Demo for using feature weight to change column sampling
+=======================================================

    .. versionadded:: 1.3.0
 '''
@@ -25,7 +27,7 @@ def main(args):
    dtrain.set_info(feature_weights=fw)

    bst = xgboost.train({'tree_method': 'hist',
-                         'colsample_bynode': 0.5},
+                         'colsample_bynode': 0.2},
                        dtrain, num_boost_round=10,
                        evals=[(dtrain, 'd')])
    feature_map = bst.get_fscore()
--- a/demo/guide-python/gamma_regression.py
+++ b/demo/guide-python/gamma_regression.py
@@ -1,3 +1,7 @@
+"""
+Demo for gamma regression
+=========================
+"""
 import xgboost as xgb
 import numpy as np

--- a/demo/guide-python/generalized_linear_model.py
+++ b/demo/guide-python/generalized_linear_model.py
@@ -1,3 +1,7 @@
+"""
+Demo for GLM
+============
+"""
 import os
 import xgboost as xgb
 ##
--- a/demo/guide-python/multioutput_regression.py
+++ b/demo/guide-python/multioutput_regression.py
@@ -0,0 +1,111 @@
+"""
+A demo for multi-output regression
+==================================
+
+The demo is adopted from scikit-learn:
+
+https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py
+
+See :doc:`/tutorials/multioutput` for more information.
+"""
+
+import argparse
+from typing import Dict, Tuple, List
+
+import numpy as np
+from matplotlib import pyplot as plt
+import xgboost as xgb
+
+
+def plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None:
+    s = 25
+    plt.scatter(y[:, 0], y[:, 1], c="navy", s=s, edgecolor="black", label="data")
+    plt.scatter(
+        y_predt[:, 0], y_predt[:, 1], c="cornflowerblue", s=s, edgecolor="black"
+    )
+    plt.xlim([-1, 2])
+    plt.ylim([-1, 2])
+    plt.show()
+
+
+def gen_circle() -> Tuple[np.ndarray, np.ndarray]:
+    "Generate a sample dataset that y is a 2 dim circle."
+    rng = np.random.RandomState(1994)
+    X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)
+    y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T
+    y[::5, :] += 0.5 - rng.rand(20, 2)
+    y = y - y.min()
+    y = y / y.max()
+    return X, y
+
+
+def rmse_model(plot_result: bool):
+    """Draw a circle with 2-dim coordinate as target variables."""
+    X, y = gen_circle()
+    # Train a regressor on it
+    reg = xgb.XGBRegressor(tree_method="hist", n_estimators=64)
+    reg.fit(X, y, eval_set=[(X, y)])
+
+    y_predt = reg.predict(X)
+    if plot_result:
+        plot_predt(y, y_predt, "multi")
+
+
+def custom_rmse_model(plot_result: bool) -> None:
+    """Train using Python implementation of Squared Error."""
+
+    # As the experimental support status, custom objective doesn't support matrix as
+    # gradient and hessian, which will be changed in future release.
+    def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
+        """Compute the gradient squared error."""
+        y = dtrain.get_label().reshape(predt.shape)
+        return (predt - y).reshape(y.size)
+
+    def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
+        """Compute the hessian for squared error."""
+        return np.ones(predt.shape).reshape(predt.size)
+
+    def squared_log(
+        predt: np.ndarray, dtrain: xgb.DMatrix
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        grad = gradient(predt, dtrain)
+        hess = hessian(predt, dtrain)
+        return grad, hess
+
+    def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
+        y = dtrain.get_label().reshape(predt.shape)
+        v = np.sqrt(np.sum(np.power(y - predt, 2)))
+        return "PyRMSE", v
+
+    X, y = gen_circle()
+    Xy = xgb.DMatrix(X, y)
+    results: Dict[str, Dict[str, List[float]]] = {}
+    # Make sure the `num_target` is passed to XGBoost when custom objective is used.
+    # When builtin objective is used, XGBoost can figure out the number of targets
+    # automatically.
+    booster = xgb.train(
+        {
+            "tree_method": "hist",
+            "num_target": y.shape[1],
+        },
+        dtrain=Xy,
+        num_boost_round=100,
+        obj=squared_log,
+        evals=[(Xy, "Train")],
+        evals_result=results,
+        custom_metric=rmse,
+    )
+
+    y_predt = booster.inplace_predict(X)
+    if plot_result:
+        plot_predt(y, y_predt, "multi")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--plot", choices=[0, 1], type=int, default=1)
+    args = parser.parse_args()
+    # Train with builtin RMSE objective
+    rmse_model(args.plot == 1)
+    # Train with custom objective.
+    custom_rmse_model(args.plot == 1)
--- a/demo/guide-python/predict_first_ntree.py
+++ b/demo/guide-python/predict_first_ntree.py
@@ -1,3 +1,7 @@
+"""
+Demo for prediction using number of trees
+=========================================
+"""
 import os
 import numpy as np
 import xgboost as xgb
--- a/demo/guide-python/predict_leaf_indices.py
+++ b/demo/guide-python/predict_leaf_indices.py
@@ -1,3 +1,7 @@
+"""
+Demo for obtaining leaf index
+=============================
+"""
 import os
 import xgboost as xgb

@@ -12,7 +16,9 @@ bst = xgb.train(param, dtrain, num_round, watchlist)

 print('start testing predict the leaf indices')
 # predict using first 2 tree
-leafindex = bst.predict(dtest, ntree_limit=2, pred_leaf=True)
+leafindex = bst.predict(
+    dtest, iteration_range=(0, 2), pred_leaf=True, strict_shape=True
+)
 print(leafindex.shape)
 print(leafindex)
 # predict all trees
--- a/demo/guide-python/quantile_data_iterator.py
+++ b/demo/guide-python/quantile_data_iterator.py
@@ -1,4 +1,6 @@
-'''A demo for defining data iterator.
+'''
+Demo for using data iterator with Quantile DMatrix
+==================================================

    .. versionadded:: 1.2.0

--- a/demo/guide-python/sklearn_evals_result.py
+++ b/demo/guide-python/sklearn_evals_result.py
@@ -1,6 +1,7 @@
-##
-#  This script demonstrate how to access the xgboost eval metrics by using sklearn
-##
+"""
+Demo for accessing the xgboost eval metrics by using sklearn interface
+======================================================================
+"""

 import xgboost as xgb
 import numpy as np
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@@ -1,4 +1,7 @@
 '''
+Collection of examples for using sklearn interface
+==================================================
+
 Created on 1 Apr 2015

@author: Jamie Hall
@@ -9,7 +12,7 @@ import xgboost as xgb
 import numpy as np
 from sklearn.model_selection import KFold, train_test_split, GridSearchCV
 from sklearn.metrics import confusion_matrix, mean_squared_error
-from sklearn.datasets import load_iris, load_digits, load_boston
+from sklearn.datasets import load_iris, load_digits, fetch_california_housing

 rng = np.random.RandomState(31337)

@@ -35,10 +38,8 @@ for train_index, test_index in kf.split(X):
    actuals = y[test_index]
    print(confusion_matrix(actuals, predictions))

-print("Boston Housing: regression")
-boston = load_boston()
-y = boston['target']
-X = boston['data']
+print("California Housing: regression")
+X, y = fetch_california_housing(return_X_y=True)
 kf = KFold(n_splits=2, shuffle=True, random_state=rng)
 for train_index, test_index in kf.split(X):
    xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index])
@@ -47,8 +48,6 @@ for train_index, test_index in kf.split(X):
    print(mean_squared_error(actuals, predictions))

 print("Parameter optimization")
-y = boston['target']
-X = boston['data']
 xgb_model = xgb.XGBRegressor(n_jobs=1)
 clf = GridSearchCV(xgb_model,
                   {'max_depth': [2, 4, 6],
@@ -60,8 +59,8 @@ print(clf.best_params_)
 # The sklearn API models are picklable
 print("Pickling sklearn API models")
 # must open in binary format to pickle
-pickle.dump(clf, open("best_boston.pkl", "wb"))
-clf2 = pickle.load(open("best_boston.pkl", "rb"))
+pickle.dump(clf, open("best_calif.pkl", "wb"))
+clf2 = pickle.load(open("best_calif.pkl", "rb"))
 print(np.allclose(clf.predict(X), clf2.predict(X)))

 # Early-stopping
--- a/demo/guide-python/sklearn_parallel.py
+++ b/demo/guide-python/sklearn_parallel.py
@@ -1,14 +1,15 @@
+"""
+Demo for using xgboost with sklearn
+===================================
+"""
 from sklearn.model_selection import GridSearchCV
-from sklearn.datasets import load_boston
+from sklearn.datasets import fetch_california_housing
 import xgboost as xgb
 import multiprocessing

 if __name__ == "__main__":
    print("Parallel Parameter optimization")
-    boston = load_boston()
-
-    y = boston['target']
-    X = boston['data']
+    X, y = fetch_california_housing(return_X_y=True)
    xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
                                   'n_estimators': [50, 100, 200]}, verbose=1,
--- a/demo/guide-python/update_process.py
+++ b/demo/guide-python/update_process.py
@@ -1,17 +1,21 @@
-"""Demo for using `process_type` with `prune` and `refresh`.  Modifying existing trees is
-not a well established use for XGBoost, so feel free to experiment.
+"""
+Demo for using `process_type` with `prune` and `refresh`
+========================================================
+
+Modifying existing trees is not a well established use for XGBoost, so feel free to
+experiment.

 """

 import xgboost as xgb
-from sklearn.datasets import load_boston
+from sklearn.datasets import fetch_california_housing
 import numpy as np


 def main():
    n_rounds = 32

-    X, y = load_boston(return_X_y=True)
+    X, y = fetch_california_housing(return_X_y=True)

    # Train a model first
    X_train = X[: X.shape[0] // 2]
--- a/dev/prepare_jvm_release.py
+++ b/dev/prepare_jvm_release.py
@@ -0,0 +1,102 @@
+import os
+import sys
+import errno
+import subprocess
+import glob
+import shutil
+from contextlib import contextmanager
+
+def normpath(path):
+    """Normalize UNIX path to a native path."""
+    normalized = os.path.join(*path.split("/"))
+    if os.path.isabs(path):
+        return os.path.abspath("/") + normalized
+    else:
+        return normalized
+
+def cp(source, target):
+    source = normpath(source)
+    target = normpath(target)
+    print("cp {0} {1}".format(source, target))
+    shutil.copy(source, target)
+
+def maybe_makedirs(path):
+    path = normpath(path)
+    print("mkdir -p " + path)
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+@contextmanager
+def cd(path):
+    path = normpath(path)
+    cwd = os.getcwd()
+    os.chdir(path)
+    print("cd " + path)
+    try:
+        yield path
+    finally:
+        os.chdir(cwd)
+
+def run(command, **kwargs):
+    print(command)
+    subprocess.check_call(command, shell=True, **kwargs)
+
+def main():
+    with cd("jvm-packages/"):
+        print("====copying pure-Python tracker====")
+        for use_cuda in [True, False]:
+            xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
+            cp("../python-package/xgboost/tracker.py", f"{xgboost4j}/src/main/resources")
+
+        print("====copying resources for testing====")
+        with cd("../demo/CLI/regression"):
+            run(f"{sys.executable} mapfeat.py")
+            run(f"{sys.executable} mknfold.py machine.txt 1")
+        for use_cuda in [True, False]:
+            xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
+            xgboost4j_spark = "xgboost4j-spark-gpu" if use_cuda else "xgboost4j-spark"
+            maybe_makedirs(f"{xgboost4j}/src/test/resources")
+            maybe_makedirs(f"{xgboost4j_spark}/src/test/resources")
+            for file in glob.glob("../demo/data/agaricus.*"):
+                cp(file, f"{xgboost4j}/src/test/resources")
+                cp(file, f"{xgboost4j_spark}/src/test/resources")
+            for file in glob.glob("../demo/CLI/regression/machine.txt.t*"):
+                cp(file, f"{xgboost4j_spark}/src/test/resources")
+
+        print("====Creating directories to hold native binaries====")
+        for os, arch in [("linux", "x86_64"), ("windows", "x86_64"), ("macos", "x86_64")]:
+            output_dir = f"xgboost4j/src/main/resources/lib/{os}/{arch}"
+            maybe_makedirs(output_dir)
+        for os, arch in [("linux", "x86_64")]:
+            output_dir = f"xgboost4j-gpu/src/main/resources/lib/{os}/{arch}"
+            maybe_makedirs(output_dir)
+    print("====Next Steps====")
+    print("1. Gain upload right to Maven Central repo.")
+    print("1-1. Sign up for a JIRA account at Sonatype: ")
+    print("1-2. File a JIRA ticket: "
+          "https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: "
+          "https://issues.sonatype.org/browse/OSSRH-67724")
+    print("2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
+          "https://central.sonatype.org/publish/publish-maven/")
+    print("3. Obtain Linux and Windows binaries from the CI server")
+    print("3-1. Get xgboost4j_[commit].dll from "
+          "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/list.html. Rename it to"
+          "xgboost4j.dll.")
+    print("3-2. For Linux binaries, go to "
+          "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html and navigate to the "
+          "release/ directory. Find and download two JAR files: xgboost4j_2.12-[version].jar and "
+          "xgboost4j-gpu_2.12-[version].jar. Use unzip command to extract libxgboost4j.so (one "
+          "version compiled with GPU support and another compiled without).")
+    print("4. Put the binaries in xgboost4j(-gpu)/src/main/resources/lib/[os]/[arch]")
+    print("5. Now on a Mac machine, run:")
+    print("   GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests")
+    print("6. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
+          "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-1085 "
+          "to inspect the staged JAR files. Finally, press Release button to publish the "
+          "artifacts to the Maven Central repository.")
+
+if __name__ == "__main__":
+    main()
--- a/dev/release-py-r.py
+++ b/dev/release-py-r.py
@@ -3,6 +3,7 @@
 tqdm, sh are required to run this script.
 """
 from urllib.request import urlretrieve
+from typing import cast, Tuple
 import argparse
 from typing import List
 from sh.contrib import git
@@ -33,6 +34,7 @@ def show_progress(block_num, block_size, total_size):


 def retrieve(url, filename=None):
+    print(f"{url} -> {filename}")
    return urlretrieve(url, filename, reporthook=show_progress)


@@ -49,7 +51,7 @@ def download_wheels(
    dir_URL: str,
    src_filename_prefix: str,
    target_filename_prefix: str,
-) -> List:
+) -> List[str]:
    """Download all binary wheels. dir_URL is the URL for remote directory storing the release
    wheels

@@ -63,7 +65,6 @@ def download_wheels(
        target_wheel = target_filename_prefix + platform + ".whl"
        filename = os.path.join(DIST, target_wheel)
        filenames.append(filename)
-        print("Downloading from:", url, "to:", filename)
        retrieve(url=url, filename=filename)
        ret = subprocess.run(["twine", "check", filename], capture_output=True)
        assert ret.returncode == 0, "Failed twine check"
@@ -74,29 +75,14 @@ def download_wheels(
    return filenames


-def check_path():
-    root = os.path.abspath(os.path.curdir)
-    assert os.path.basename(root) == "xgboost", "Must be run on project root."
-
-
-def main(args: argparse.Namespace) -> None:
-    check_path()
-
-    rel = version.StrictVersion(args.release)
+def download_py_packages(major: int, minor: int, commit_hash: str):
    platforms = [
        "win_amd64",
        "manylinux2014_x86_64",
        "manylinux2014_aarch64",
-        "macosx_10_14_x86_64.macosx_10_15_x86_64.macosx_11_0_x86_64",
+        "macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64",
+        "macosx_12_0_arm64"
    ]
-    print("Release:", rel)
-    major, minor, patch = rel.version
-    branch = "release_" + str(major) + "." + str(minor) + ".0"
-    git.clean("-xdf")
-    git.checkout(branch)
-    git.pull("origin", branch)
-    git.submodule("update")
-    commit_hash = latest_hash()

    dir_URL = PREFIX + str(major) + "." + str(minor) + ".0" + "/"
    src_filename_prefix = "xgboost-" + args.release + "%2B" + commit_hash + "-py3-none-"
@@ -119,10 +105,74 @@ Following steps should be done manually:
    )


+def download_r_packages(release: str, rc: str, commit: str) -> None:
+    platforms = ["win64", "linux"]
+    dirname = "./r-packages"
+    if not os.path.exists(dirname):
+        os.mkdir(dirname)
+
+    filenames = []
+
+    for plat in platforms:
+        url = f"{PREFIX}{release}/xgboost_r_gpu_{plat}_{commit}.tar.gz"
+
+        if not rc:
+            filename = f"xgboost_r_gpu_{plat}_{release}.tar.gz"
+        else:
+            filename = f"xgboost_r_gpu_{plat}_{release}-{rc}.tar.gz"
+
+        target = os.path.join(dirname, filename)
+        retrieve(url=url, filename=target)
+        filenames.append(target)
+
+    print("Finished downloading R packages:", filenames)
+
+
+def check_path():
+    root = os.path.abspath(os.path.curdir)
+    assert os.path.basename(root) == "xgboost", "Must be run on project root."
+
+
+def main(args: argparse.Namespace) -> None:
+    check_path()
+
+    rel = version.LooseVersion(args.release)
+
+    print("Release:", rel)
+    if len(rel.version) == 3:
+        # Major release
+        major, minor, patch = version.StrictVersion(args.release).version
+        rc = None
+        rc_ver = None
+    else:
+        # RC release
+        major, minor, patch, rc, rc_ver = cast(
+            Tuple[int, int, int, str, int], rel.version
+        )
+        assert rc == "rc"
+
+    release = str(major) + "." + str(minor) + "." + str(patch)
+    branch = "release_" + release
+    git.clean("-xdf")
+    git.checkout(branch)
+    git.pull("origin", branch)
+    git.submodule("update")
+    commit_hash = latest_hash()
+
+    download_r_packages(
+        release, "" if rc is None else rc + str(rc_ver), commit_hash
+    )
+
+    download_py_packages(major, minor, commit_hash)
+
+
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
-        "--release", type=str, required=True, help="Version tag, e.g. '1.3.2'."
+        "--release",
+        type=str,
+        required=True,
+        help="Version tag, e.g. '1.3.2', or '1.5.0rc1'"
    )
    args = parser.parse_args()
    main(args)
--- a/2
+++ b/2
--- a/doc/_static/custom.css
+++ b/doc/_static/custom.css
@@ -1,23 +1,31 @@
-div.breathe-sectiondef.container {
-  width: 100%;
+@import url('theme.css');
+
+/* Logo background */
+.wy-side-nav-search, .wy-side-nav-search img {
+    background-color: #ffffff !important;
 }

-div.literal-block-wrapper.container {
-  width: 100%;
+.highlight {
+  background: #f1f3f4;
 }

-.red {
-  color: red;
+.navbar {
+  background: #ffffff;
 }

-table {
-  border: 0;
+.navbar-nav {
+  background: #ffffff;
 }

-td, th {
-  padding: 1px 8px 1px 5px;
-  border-top: 0;
-  border-bottom: 1px solid #aaa;
-  border-left: 0;
-  border-right: 0;
+/* side bar */
+.wy-nav-side {
+  background: #f1f3f4;
+}
+
+.wy-menu-vertical a {
+  color: #707070;
+}
+
+.wy-side-nav-search div.version {
+  color: #404040;
 }
--- a/doc/build.rst
+++ b/doc/build.rst
@@ -13,6 +13,8 @@ systems.  If the instructions do not work for you, please feel free to ask quest

 .. contents:: Contents

+.. _get_source:
+
 *************************
 Obtaining the Source Code
 *************************
@@ -52,7 +54,7 @@ This shared library is used by different language bindings (with some additions
 on the binding you choose).  The minimal building requirement is

 - A recent C++ compiler supporting C++11 (g++-5.0 or higher)
- CMake 3.13 or higher.
+- CMake 3.14 or higher.

 For a list of CMake options like GPU support, see ``#-- Options`` in CMakeLists.txt on top
 level of source tree.
@@ -79,33 +81,11 @@ Obtain ``libomp`` from `Homebrew <https://brew.sh/>`_:

  brew install libomp

+Rest is the same as building on Linux.

-Now clone the repository:
-
-.. code-block:: bash
-
-  git clone --recursive https://github.com/dmlc/xgboost
-
-Create the ``build/`` directory and invoke CMake. After invoking CMake, you can build XGBoost with ``make``:
-
-.. code-block:: bash
-
-  mkdir build
-  cd build
-  cmake ..
-  make -j4
-
-You may now continue to :ref:`build_python`.

 Building on Windows
 ===================
-You need to first clone the XGBoost repo with ``--recursive`` option, to clone the submodules.
-We recommend you use `Git for Windows <https://git-for-windows.github.io/>`_, as it comes with a standard Bash shell. This will highly ease the installation process.
-
-.. code-block:: bash
-
-  git submodule init
-  git submodule update

 XGBoost support compilation with Microsoft Visual Studio and MinGW.  To build with Visual
 Studio, we will need CMake. Make sure to install a recent version of CMake. Then run the
@@ -177,14 +157,6 @@ On Windows, run CMake as follows:

 (Change the ``-G`` option appropriately if you have a different version of Visual Studio installed.)

-.. note:: Visual Studio 2017 Win64 Generator may not work
-
-  Choosing the Visual Studio 2017 generator may cause compilation failure. When it happens, specify the 2015 compiler by adding the ``-T`` option:
-
-  .. code-block:: bash
-
-    cmake .. -G"Visual Studio 15 2017 Win64" -T v140,cuda=8.0 -DUSE_CUDA=ON
-
 The above cmake configuration run will create an ``xgboost.sln`` solution file in the build directory. Build this solution in release mode as a x64 build, either from Visual studio or from command line:

 .. code-block:: bash
@@ -328,9 +300,9 @@ So you may want to build XGBoost with GCC own your own risk. This presents some
 4. Don't use ``-march=native`` gcc flag. Using it causes the Python interpreter to crash if the DLL was actually used.
 5. You may need to provide the lib with the runtime libs. If ``mingw32/bin`` is not in ``PATH``, build a wheel (``python setup.py bdist_wheel``), open it with an archiver and put the needed dlls to the directory where ``xgboost.dll`` is situated. Then you can install the wheel with ``pip``.

-*******************************
-Building R Package From Source.
-*******************************
+******************************
+Building R Package From Source
+******************************

 By default, the package installed by running ``install.packages`` is built from source.
 Here we list some other options for installing development version.
@@ -341,23 +313,28 @@ Installing the development version (Linux / Mac OSX)
 Make sure you have installed git and a recent C++ compiler supporting C++11 (See above
 sections for requirements of building C++ core).

-Due to the use of git-submodules, ``devtools::install_github`` can no longer be used to install the latest version of R package.
-Thus, one has to run git to check out the code first:
+Due to the use of git-submodules, ``devtools::install_github`` can no longer be used to
+install the latest version of R package. Thus, one has to run git to check out the code
+first, see :ref:`get_source` on how to initialize the git repository for XGBoost. The
+simplest way to install the R package after obtaining the source code is:
+
+.. code-block:: bash
+
+  cd R-package
+  R CMD INSTALL .
+
+But if you want to use CMake build for better performance (which has the logic for
+detecting available CPU instructions) or greater flexibility around compile flags, the
+above snippet can be replaced by:

 .. code-block:: bash

-  git clone --recursive https://github.com/dmlc/xgboost
-  cd xgboost
-  git submodule init
-  git submodule update
  mkdir build
  cd build
  cmake .. -DR_LIB=ON
  make -j$(nproc)
  make install

-If all fails, try `Building the shared library`_ to see whether a problem is specific to R
-package or not.  Notice that the R package is installed by CMake directly.

 Installing the development version with Visual Studio (Windows)
 ===============================================================
@@ -523,14 +500,7 @@ XGBoost uses `Sphinx <https://www.sphinx-doc.org/en/stable/>`_ for documentation

 * Python dependencies

-  - sphinx
-  - breathe
-  - guzzle_sphinx_theme
-  - recommonmark
-  - mock
-  - sh
-  - graphviz
-  - matplotlib
+  Checkout the ``requirements.txt`` file under ``doc/``

 Under ``xgboost/doc`` directory, run ``make <format>`` with ``<format>`` replaced by the format you want.  For a list of supported formats, run ``make help`` under the same directory.

--- a/doc/c.rst
+++ b/doc/c.rst
@@ -2,11 +2,11 @@
 XGBoost C Package
 #################

-XGBoost implements a set of C API designed for various bindings, we maintain its
-stability and the CMake/make build interface.  See ``demo/c-api/README.md`` for an
-overview and related examples.  Also one can generate doxygen document by providing
-``-DBUILD_C_DOC=ON`` as parameter to ``CMake`` during build, or simply look at function
-comments in ``include/xgboost/c_api.h``.
+XGBoost implements a set of C API designed for various bindings, we maintain its stability
+and the CMake/make build interface.  See :doc:`/tutorials/c_api_tutorial` for an
+introduction and ``demo/c-api/`` for related examples.  Also one can generate doxygen
+document by providing ``-DBUILD_C_DOC=ON`` as parameter to ``CMake`` during build, or
+simply look at function comments in ``include/xgboost/c_api.h``.

 * `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_
 * `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -19,7 +19,6 @@ import sys
 import re
 import os
 import subprocess
-import guzzle_sphinx_theme

 git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None)
 if not git_branch:
@@ -33,6 +32,7 @@ if not git_branch:
 else:
    git_branch = [git_branch]
 print('git_branch = {}'.format(git_branch[0]))
+
 try:
    filename, _ = urllib.request.urlretrieve(
        'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(
@@ -62,12 +62,6 @@ libpath = os.path.join(curr_path, '../python-package/')
 sys.path.insert(0, libpath)
 sys.path.insert(0, curr_path)

-# -- mock out modules
-import mock                     # NOQA
-MOCK_MODULES = ['scipy', 'scipy.sparse', 'sklearn', 'pandas']
-for mod_name in MOCK_MODULES:
-    sys.modules[mod_name] = mock.Mock()
-
 # -- General configuration ------------------------------------------------

 # General information about the project.
@@ -90,10 +84,19 @@ extensions = [
    'sphinx.ext.napoleon',
    'sphinx.ext.mathjax',
    'sphinx.ext.intersphinx',
+    "sphinx_gallery.gen_gallery",
    'breathe',
    'recommonmark'
 ]

+sphinx_gallery_conf = {
+    # path to your example scripts
+    "examples_dirs": ["../demo/guide-python", "../demo/dask"],
+    # path to where to save gallery generated output
+    "gallery_dirs": ["python/examples", "python/dask-examples"],
+    "matplotlib_animations": True,
+}
+
 autodoc_typehints = "description"

 graphviz_output_format = 'png'
@@ -169,17 +172,13 @@ todo_include_todos = False

 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme_path = guzzle_sphinx_theme.html_theme_path()
-html_theme = 'guzzle_sphinx_theme'
+html_theme = "sphinx_rtd_theme"
+html_theme_options = {"logo_only": True}

-# Register the theme as an extension to generate a sitemap.xml
-extensions.append("guzzle_sphinx_theme")

-# Guzzle theme options (see theme.conf for more information)
-html_theme_options = {
-    # Set the name of the project to appear in the sidebar
-    "project_nav_name": "XGBoost"
-}
+html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/xgboost.png"
+
+html_css_files = ["css/custom.css"]

 html_sidebars = {
  '**': ['logo-text.html', 'globaltoc.html', 'searchbox.html']
@@ -201,16 +200,17 @@ latex_elements = {
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-  (master_doc, '%s.tex' % project, project,
-   author, 'manual'),
+  (master_doc, '%s.tex' % project, project, author, 'manual'),
 ]

 intersphinx_mapping = {
-    'python': ('https://docs.python.org/3.6', None),
-    'numpy': ('http://docs.scipy.org/doc/numpy/', None),
-    'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None),
-    'pandas': ('http://pandas-docs.github.io/pandas-docs-travis/', None),
-    'sklearn': ('http://scikit-learn.org/stable', None)
+    "python": ("https://docs.python.org/3.6", None),
+    "numpy": ("https://docs.scipy.org/doc/numpy/", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),
+    "pandas": ("http://pandas-docs.github.io/pandas-docs-travis/", None),
+    "sklearn": ("https://scikit-learn.org/stable", None),
+    "dask": ("https://docs.dask.org/en/stable/", None),
+    "distributed": ("https://distributed.dask.org/en/stable/", None),
 }


--- a/doc/contrib/ci.rst
+++ b/doc/contrib/ci.rst
@@ -25,3 +25,15 @@ requests and every update to branches. A few tests however require manual activa
  details about noLD. This is a requirement for keeping XGBoost on CRAN (the R package index).
  To invoke this test suite for a particular pull request, simply add a review comment
  ``/gha run r-nold-test``. (Ordinary comment won't work. It needs to be a review comment.)
+
+GitHub Actions is also used to build Python wheels targeting MacOS Intel and Apple Silicon. See
+`.github/workflows/python_wheels.yml
+<https://github.com/dmlc/xgboost/tree/master/.github/workflows/python_wheels.yml>`_. The
+``python_wheels`` pipeline sets up environment variables prefixed ``CIBW_*`` to indicate the target
+OS and processor. The pipeline then invokes the script ``build_python_wheels.sh``, which in turns
+calls ``cibuildwheel`` to build the wheel. The ``cibuildwheel`` is a library that sets up a
+suitable Python environment for each OS and processor target. Since we don't have Apple Silion
+machine in GitHub Actions, cross-compilation is needed; ``cibuildwheel`` takes care of the complex
+task of cross-compiling a Python wheel. (Note that ``cibuildwheel`` will call
+``setup.py bdist_wheel``. Since XGBoost has a native library component, ``setup.py`` contains
+a glue code to call CMake and a C++ compiler to build the native library on the fly.)
--- a/doc/contrib/coding_guide.rst
+++ b/doc/contrib/coding_guide.rst
@@ -134,3 +134,49 @@ Similarly, if you want to exclude C++ source from linting:
  cd /path/to/xgboost/
  python3 tests/ci_build/tidy.py --cpp=0

+**********************************
+Guide for handling user input data
+**********************************
+
+This is an in-comprehensive guide for handling user input data.  XGBoost has wide verity
+of native supported data structures, mostly come from higher level language bindings. The
+inputs ranges from basic contiguous 1 dimension memory buffer to more sophisticated data
+structures like columnar data with validity mask.  Raw input data can be used in 2 places,
+firstly it's the construction of various ``DMatrix``, secondly it's the in-place
+prediction.  For plain memory buffer, there's not much to discuss since it's just a
+pointer with a size. But for general n-dimension array and columnar data, there are many
+subtleties.  XGBoost has 3 different data structures for handling optionally masked arrays
+(tensors), for consuming user inputs ``ArrayInterface`` should be chosen.  There are many
+existing functions that accept only plain pointer due to legacy reasons (XGBoost started
+as a much simpler library and didn't care about memory usage that much back then).  The
+``ArrayInterface`` is a in memory representation of ``__array_interface__`` protocol
+defined by numpy or the ``__cuda_array_interface__`` defined by numba.  Following is a
+check list of things to have in mind when accepting related user inputs:
+
+- [ ] Is it strided? (identified by the ``strides`` field)
+- [ ] If it's a vector, is it row vector or column vector? (Identified by both ``shape``
+  and ``strides``).
+- [ ] Is the data type supported? Half type and 128 integer types should be converted
+  before going into XGBoost.
+- [ ] Does it have higher than 1 dimension? (identified by ``shape`` field)
+- [ ] Are some of dimensions trivial? (shape[dim] <= 1)
+- [ ] Does it have mask? (identified by ``mask`` field)
+- [ ] Can the mask be broadcasted? (unsupported at the moment)
+- [ ] Is it on CUDA memory? (identified by ``data`` field, and optionally ``stream``)
+
+Most of the checks are handled by the ``ArrayInterface`` during construction, except for
+the data type issue since it doesn't know how to cast such pointers with C builtin types.
+But for safety reason one should still try to write related tests for the all items. The
+data type issue should be taken care of in language binding for each of the specific data
+input.  For single-chunk columnar format, it's just a masked array for each column so it
+should be treated uniformly as normal array. For input predictor ``X``, we have adapters
+for each type of input. Some are composition of the others. For instance, CSR matrix has 3
+potentially strided arrays for ``indptr``, ``indices`` and ``values``. No assumption
+should be made to these components (all the check boxes should be considered). Slicing row
+of CSR matrix should calculate the offset of each field based on respective strides.
+
+For meta info like labels, which is growing both in size and complexity, we accept only
+masked array at the moment (no specialized adapter).  One should be careful about the
+input data shape. For base margin it can be 2 dim or higher if we have multiple targets in
+the future.  The getters in ``DMatrix`` returns only 1 dimension flatten vectors at the
+moment, which can be improved in the future when it's needed.
--- a/doc/contrib/docs.rst
+++ b/doc/contrib/docs.rst
@@ -19,7 +19,7 @@ Documents

  make html

-inside the ``doc/`` directory.
+inside the ``doc/`` directory.  The online document is hosted by `Read the Docs <https://readthedocs.org/>`__ where the imported project is managed by `Hyunsu Cho <https://github.com/hcho3>`__ and `Jiaming Yuan <https://github.com/trivialfis>`__.

 ********
 Examples
--- a/doc/contrib/release.rst
+++ b/doc/contrib/release.rst
@@ -18,13 +18,22 @@ Making a Release

 1. Create an issue for the release, noting the estimated date and expected features or major fixes, pin that issue.
 2. Bump release version.
+
   1. Modify ``CMakeLists.txt`` in source tree and ``cmake/Python_version.in`` if needed, run CMake.
+
   2. Modify ``DESCRIPTION`` in R-package.
+
   3. Run ``change_version.sh`` in ``jvm-packages/dev``
+
 3. Commit the change, create a PR on GitHub on release branch.  Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.
 4. Create a tag on release branch, either on GitHub or locally.
 5. Make a release on GitHub tag page, which might be done with previous step if the tag is created on GitHub.
 6. Submit pip, CRAN, and Maven packages.
-   - The pip package is maintained by [Hyunsu Cho](http://hyunsu-cho.io/) and [Jiaming Yuan](https://github.com/trivialfis).  There's a helper script for downloading pre-built wheels on ``xgboost/dev/release-pypi.py`` along with simple instructions for using ``twine``.
-   - The CRAN package is maintained by [Tong He](https://github.com/hetong007).
-   - The Maven package is maintained by [Nan Zhu](https://github.com/CodingCat).
+
+   + The pip package is maintained by `Hyunsu Cho <https://github.com/hcho3>`__ and `Jiaming Yuan <https://github.com/trivialfis>`__.  There's a helper script for downloading pre-built wheels and R packages ``xgboost/dev/release-pypi-r.py`` along with simple instructions for using ``twine``.
+
+   + The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__.
+
+     Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first.  Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
+
+   + The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.
--- a/doc/gpu/index.rst
+++ b/doc/gpu/index.rst
@@ -4,10 +4,10 @@ XGBoost GPU Support

 This page contains information about GPU algorithms supported in XGBoost.

-.. note:: CUDA 10.0, Compute Capability 3.5 required
+.. note:: CUDA 10.1, Compute Capability 3.5 required

  The GPU algorithms in XGBoost require a graphics card with compute capability 3.5 or higher, with
-  CUDA toolkits 10.0 or later.
+  CUDA toolkits 10.1 or later.
  (See `this list <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_ to look up compute capability of your GPU card.)

 *********************************************
@@ -179,7 +179,7 @@ Following table shows current support status for evaluation metrics on the GPU.
 +------------------------------+-------------+
 | auc                          | |tick|      |
 +------------------------------+-------------+
-| aucpr                        | |cross|     |
+| aucpr                        | |tick|      |
 +------------------------------+-------------+
 | ndcg                         | |tick|      |
 +------------------------------+-------------+
@@ -224,25 +224,19 @@ Training time on 1,000,000 rows x 50 columns of random data with 500 boosting it

 Memory usage
 ============
-The following are some guidelines on the device memory usage of the `gpu_hist` updater.
-
-If you train xgboost in a loop you may notice xgboost is not freeing device memory after each training iteration. This is because memory is allocated over the lifetime of the booster object and does not get freed until the booster is freed. A workaround is to serialise the booster object after training. See `demo/gpu_acceleration/memory.py` for a simple example.
+The following are some guidelines on the device memory usage of the `gpu_hist` tree method.

 Memory inside xgboost training is generally allocated for two reasons - storing the dataset and working memory.

 The dataset itself is stored on device in a compressed ELLPACK format. The ELLPACK format is a type of sparse matrix that stores elements with a constant row stride. This format is convenient for parallel computation when compared to CSR because the row index of each element is known directly from its address in memory. The disadvantage of the ELLPACK format is that it becomes less memory efficient if the maximum row length is significantly more than the average row length. Elements are quantised and stored as integers. These integers are compressed to a minimum bit length. Depending on the number of features, we usually don't need the full range of a 32 bit integer to store elements and so compress this down. The compressed, quantised ELLPACK format will commonly use 1/4 the space of a CSR matrix stored in floating point.

-In some cases the full CSR matrix stored in floating point needs to be allocated on the device. This currently occurs for prediction in multiclass classification. If this is a problem consider setting `'predictor'='cpu_predictor'`. This also occurs when the external data itself comes from a source on device e.g. a cudf DataFrame. These are known issues we hope to resolve.
-
 Working memory is allocated inside the algorithm proportional to the number of rows to keep track of gradients, tree positions and other per row statistics. Memory is allocated for histogram bins proportional to the number of bins, number of features and nodes in the tree. For performance reasons we keep histograms in memory from previous nodes in the tree, when a certain threshold of memory usage is passed we stop doing this to conserve memory at some performance loss.

-The quantile finding algorithm also uses some amount of working device memory. It is able to operate in batches, but is not currently well optimised for sparse data.
-
-If you are getting out-of-memory errors on a big dataset, try the :doc:`external memory version </tutorials/external_memory>`.
+If you are getting out-of-memory errors on a big dataset, try the or :py:class:`xgboost.DeviceQuantileDMatrix` or :doc:`external memory version </tutorials/external_memory>`.

 Developer notes
 ===============
-The application may be profiled with annotations by specifying USE_NTVX to cmake and providing the path to the stand-alone nvtx header via NVTX_HEADER_DIR. Regions covered by the 'Monitor' class in CUDA code will automatically appear in the nsight profiler.
+The application may be profiled with annotations by specifying USE_NTVX to cmake. Regions covered by the 'Monitor' class in CUDA code will automatically appear in the nsight profiler when `verbosity` is set to 3.

 **********
 References
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -16,7 +16,7 @@ Stable Release
 Python
 ------

-Pre-built binary are uploaded to PyPI (Python Package Index) for each release.  Supported platforms are Linux (x86_64, aarch64), Windows (x86_64) and MacOS (x86_64).
+Pre-built binary are uploaded to PyPI (Python Package Index) for each release.  Supported platforms are Linux (x86_64, aarch64), Windows (x86_64) and MacOS (x86_64, Apple Silicon).

 .. code-block:: bash

@@ -29,17 +29,40 @@ into permission errors.  Python pre-built binary capability for each platform:
 .. |tick| unicode:: U+2714
 .. |cross| unicode:: U+2718

-+-------------------+---------+----------------------+
-| Platform          | GPU     | Multi-Node-Multi-GPU |
-+===================+=========+======================+
-| Linux x86_64      | |tick|  |  |tick|              |
-+-------------------+---------+----------------------+
-| Linux aarch64     | |cross| |  |cross|             |
-+-------------------+---------+----------------------+
-| MacOS             | |cross| |  |cross|             |
-+-------------------+---------+----------------------+
-| Windows           | |tick|  |  |cross|             |
-+-------------------+---------+----------------------+
+---------------------+---------+----------------------+
+| Platform            | GPU     | Multi-Node-Multi-GPU |
+=====================+=========+======================+
+| Linux x86_64        | |tick|  |  |tick|              |
+---------------------+---------+----------------------+
+| Linux aarch64       | |cross| |  |cross|             |
+---------------------+---------+----------------------+
+| MacOS x86_64        | |cross| |  |cross|             |
+---------------------+---------+----------------------+
+| MacOS Apple Silicon | |cross| |  |cross|             |
+---------------------+---------+----------------------+
+| Windows             | |tick|  |  |cross|             |
+---------------------+---------+----------------------+
+
+Conda
+*****
+
+You may use the Conda packaging manager to install XGBoost:
+
+.. code-block:: bash
+
+   conda install -c conda-forge py-xgboost
+
+Conda should be able to detect the existence of a GPU on your machine and install the correct variant of XGBoost. If you run into issues, try indicating the variant explicitly:
+
+.. code-block:: bash
+
+   # CPU only
+   conda install -c conda-forge py-xgboost-cpu
+   # Use NVIDIA GPU
+   conda install -c conda-forge py-xgboost-gpu
+
+
+Visit the `Miniconda website <https://docs.conda.io/en/latest/miniconda.html>`_ to obtain Conda.

 R
 -
@@ -78,7 +101,7 @@ R
 JVM
 ---

-You can use XGBoost4J in your Java/Scala application by adding XGBoost4J as a dependency:
+* XGBoost4j/XGBoost4j-Spark

 .. code-block:: xml
  :caption: Maven
@@ -111,6 +134,39 @@ You can use XGBoost4J in your Java/Scala application by adding XGBoost4J as a de
    "ml.dmlc" %% "xgboost4j-spark" % "latest_version_num"
  )

+* XGBoost4j-GPU/XGBoost4j-Spark-GPU
+
+.. code-block:: xml
+  :caption: Maven
+
+  <properties>
+    ...
+    <!-- Specify Scala version in package name -->
+    <scala.binary.version>2.12</scala.binary.version>
+  </properties>
+
+  <dependencies>
+    ...
+    <dependency>
+        <groupId>ml.dmlc</groupId>
+        <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
+        <version>latest_version_num</version>
+    </dependency>
+    <dependency>
+        <groupId>ml.dmlc</groupId>
+        <artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
+        <version>latest_version_num</version>
+    </dependency>
+  </dependencies>
+
+.. code-block:: scala
+  :caption: sbt
+
+  libraryDependencies ++= Seq(
+    "ml.dmlc" %% "xgboost4j-gpu" % "latest_version_num",
+    "ml.dmlc" %% "xgboost4j-spark-gpu" % "latest_version_num"
+  )
+
 This will check out the latest stable version from the Maven Central.

 For the latest release version number, please check `release page <https://github.com/dmlc/xgboost/releases>`_.
@@ -162,7 +218,7 @@ and Windows.) Download it and run the following commands:
 JVM
 ---

-First add the following Maven repository hosted by the XGBoost project:
+* XGBoost4j/XGBoost4j-Spark

 .. code-block:: xml
  :caption: Maven
@@ -211,6 +267,40 @@ Then add XGBoost4J as a dependency:
    "ml.dmlc" %% "xgboost4j-spark" % "latest_version_num-SNAPSHOT"
  )

+* XGBoost4j-GPU/XGBoost4j-Spark-GPU
+
+.. code-block:: xml
+  :caption: maven
+
+  <properties>
+    ...
+    <!-- Specify Scala version in package name -->
+    <scala.binary.version>2.12</scala.binary.version>
+  </properties>
+
+  <dependencies>
+    ...
+    <dependency>
+        <groupId>ml.dmlc</groupId>
+        <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
+        <version>latest_version_num-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+        <groupId>ml.dmlc</groupId>
+        <artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
+        <version>latest_version_num-SNAPSHOT</version>
+    </dependency>
+  </dependencies>
+
+.. code-block:: scala
+  :caption: sbt
+
+  libraryDependencies ++= Seq(
+    "ml.dmlc" %% "xgboost4j-gpu" % "latest_version_num-SNAPSHOT",
+    "ml.dmlc" %% "xgboost4j-spark-gpu" % "latest_version_num-SNAPSHOT"
+  )
+
+
 Look up the ``version`` field in `pom.xml <https://github.com/dmlc/xgboost/blob/master/jvm-packages/pom.xml>`_ to get the correct version number.

 The SNAPSHOT JARs are hosted by the XGBoost project. Every commit in the ``master`` branch will automatically trigger generation of a new SNAPSHOT JAR. You can control how often Maven should upgrade your SNAPSHOT installation by specifying ``updatePolicy``. See `here <http://maven.apache.org/pom.html#Repositories>`_ for details.
--- a/doc/jvm/index.rst
+++ b/doc/jvm/index.rst
@@ -35,6 +35,7 @@ Contents

  java_intro
  XGBoost4J-Spark Tutorial <xgboost4j_spark_tutorial>
+  XGBoost4J-Spark-GPU Tutorial <xgboost4j_spark_gpu_tutorial>
  Code Examples <https://github.com/dmlc/xgboost/tree/master/jvm-packages/xgboost4j-example>
  XGBoost4J Java API <javadocs/index>
  XGBoost4J Scala API <scaladocs/xgboost4j/index>
--- a/doc/jvm/xgboost4j_spark_gpu_tutorial.rst
+++ b/doc/jvm/xgboost4j_spark_gpu_tutorial.rst
@@ -0,0 +1,246 @@
+#############################################
+XGBoost4J-Spark-GPU Tutorial (version 1.6.0+)
+#############################################
+
+**XGBoost4J-Spark-GPU** is an open source library aiming to accelerate distributed XGBoost training on Apache Spark cluster from
+end to end with GPUs by leveraging the `RAPIDS Accelerator for Apache Spark <https://nvidia.github.io/spark-rapids/>`_ product.
+
+This tutorial will show you how to use **XGBoost4J-Spark-GPU**.
+
+.. contents::
+  :backlinks: none
+  :local:
+
+************************************************
+Build an ML Application with XGBoost4J-Spark-GPU
+************************************************
+
+Add XGBoost to Your Project
+===========================
+
+Before we go into the tour of how to use XGBoost4J-Spark-GPU, you should first consult
+:ref:`Installation from Maven repository <install_jvm_packages>` in order to add XGBoost4J-Spark-GPU as
+a dependency for your project. We provide both stable releases and snapshots.
+
+Data Preparation
+================
+
+In this section, we use the `Iris <https://archive.ics.uci.edu/ml/datasets/iris>`_ dataset as an example to
+showcase how we use Apache Spark to transform a raw dataset and make it fit the data interface of XGBoost.
+
+The Iris dataset is shipped in CSV format. Each instance contains 4 features, "sepal length", "sepal width",
+"petal length" and "petal width". In addition, it contains the "class" column, which is essentially the
+label with three possible values: "Iris Setosa", "Iris Versicolour" and "Iris Virginica".
+
+Read Dataset with Spark's Built-In Reader
+-----------------------------------------
+
+.. code-block:: scala
+
+  import org.apache.spark.sql.SparkSession
+  import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
+
+  val spark = SparkSession.builder().getOrCreate()
+
+  val labelName = "class"
+  val schema = new StructType(Array(
+      StructField("sepal length", DoubleType, true),
+      StructField("sepal width", DoubleType, true),
+      StructField("petal length", DoubleType, true),
+      StructField("petal width", DoubleType, true),
+      StructField(labelName, StringType, true)))
+
+  val xgbInput = spark.read.option("header", "false")
+      .schema(schema)
+      .csv(dataPath)
+
+In the first line, we create an instance of a `SparkSession <https://spark.apache.org/docs/latest/sql-getting-started.html#starting-point-sparksession>`_
+which is the entry point of any Spark application working with DataFrames. The ``schema`` variable
+defines the schema of the DataFrame wrapping Iris data. With this explicitly set schema, we
+can define the column names as well as their types; otherwise the column names would be
+the default ones derived by Spark, such as ``_col0``, etc. Finally, we can use Spark's
+built-in CSV reader to load the Iris CSV file as a DataFrame named ``xgbInput``.
+
+Apache Spark also contains many built-in readers for other formats such as ORC, Parquet, Avro, JSON.
+
+
+Transform Raw Iris Dataset
+--------------------------
+
+To make the Iris dataset recognizable to XGBoost, we need to encode the String-typed
+label, i.e. "class", to the Double-typed label.
+
+One way to convert the String-typed label to Double is to use Spark's built-in feature transformer
+`StringIndexer <https://spark.apache.org/docs/2.3.1/api/scala/index.html#org.apache.spark.ml.feature.StringIndexer>`_.
+But this feature is not accelerated in RAPIDS Accelerator, which means it will fall back
+to CPU. Instead, we use an alternative way to achieve the same goal with the following code:
+
+.. code-block:: scala
+
+  import org.apache.spark.sql.expressions.Window
+  import org.apache.spark.sql.functions._
+
+  val spec = Window.orderBy(labelName)
+  val Array(train, test) = xgbInput
+      .withColumn("tmpClassName", dense_rank().over(spec) - 1)
+      .drop(labelName)
+      .withColumnRenamed("tmpClassName", labelName)
+      .randomSplit(Array(0.7, 0.3), seed = 1)
+
+  train.show(5)
+
+.. code-block:: none
+
+	+------------+-----------+------------+-----------+-----+
+	|sepal length|sepal width|petal length|petal width|class|
+	+------------+-----------+------------+-----------+-----+
+	|         4.3|        3.0|         1.1|        0.1|    0|
+	|         4.4|        2.9|         1.4|        0.2|    0|
+	|         4.4|        3.0|         1.3|        0.2|    0|
+	|         4.4|        3.2|         1.3|        0.2|    0|
+	|         4.6|        3.2|         1.4|        0.2|    0|
+	+------------+-----------+------------+-----------+-----+
+
+
+With window operations, we have mapped the string column of labels to label indices.
+
+Training
+========
+
+The GPU version of XGBoost-Spark supports both regression and classification
+models. Although we use the Iris dataset in this tutorial to show how we use
+``XGBoost/XGBoost4J-Spark-GPU`` to resolve a multi-classes classification problem, the
+usage in Regression is very similar to classification.
+
+To train a XGBoost model for classification, we need to claim a XGBoostClassifier first:
+
+.. code-block:: scala
+
+  import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
+  val xgbParam = Map(
+      "objective" -> "multi:softprob",
+      "num_class" -> 3,
+      "num_round" -> 100,
+      "tree_method" -> "gpu_hist",
+      "num_workers" -> 1)
+
+  val featuresNames = schema.fieldNames.filter(name => name != labelName)
+
+  val xgbClassifier = new XGBoostClassifier(xgbParam)
+      .setFeaturesCol(featuresNames)
+      .setLabelCol(labelName)
+
+The available parameters for training a XGBoost model can be found in :doc:`here </parameter>`.
+Similar to the XGBoost4J-Spark package, in addition to the default set of parameters,
+XGBoost4J-Spark-GPU also supports the camel-case variant of these parameters to be
+consistent with Spark's MLlib naming convention.
+
+Specifically, each parameter in :doc:`this page </parameter>` has its equivalent form in
+XGBoost4J-Spark-GPU with camel case. For example, to set ``max_depth`` for each tree, you can pass
+parameter just like what we did in the above code snippet (as ``max_depth`` wrapped in a Map), or
+you can do it through setters in XGBoostClassifer:
+
+.. code-block:: scala
+
+  val xgbClassifier = new XGBoostClassifier(xgbParam)
+      .setFeaturesCol(featuresNames)
+      .setLabelCol(labelName)
+  xgbClassifier.setMaxDepth(2)
+
+.. note::
+
+  In contrast with XGBoost4j-Spark which accepts both a feature column with VectorUDT type and
+  an array of feature column names, XGBoost4j-Spark-GPU only accepts an array of feature
+  column names by ``setFeaturesCol(value: Array[String])``.
+
+After setting XGBoostClassifier parameters and feature/label columns, we can build a
+transformer, XGBoostClassificationModel by fitting XGBoostClassifier with the input
+DataFrame. This ``fit`` operation is essentially the training process and the generated
+model can then be used in other tasks like prediction.
+
+.. code-block:: scala
+
+  val xgbClassificationModel = xgbClassifier.fit(train)
+
+Prediction
+==========
+
+When we get a model, either a XGBoostClassificationModel or a XGBoostRegressionModel, it takes a DataFrame as an input,
+reads the column containing feature vectors, predicts for each feature vector, and outputs a new DataFrame
+with the following columns by default:
+
+* XGBoostClassificationModel will output margins (``rawPredictionCol``), probabilities(``probabilityCol``) and the eventual prediction labels (``predictionCol``) for each possible label.
+* XGBoostRegressionModel will output prediction a label(``predictionCol``).
+
+.. code-block:: scala
+
+  val xgbClassificationModel = xgbClassifier.fit(train)
+  val results = xgbClassificationModel.transform(test)
+  results.show()
+
+With the above code snippet, we get a DataFrame as result, which contains the margin, probability for each class,
+and the prediction for each instance.
+
+.. code-block:: none
+
+	+------------+-----------+------------------+-------------------+-----+--------------------+--------------------+----------+
+	|sepal length|sepal width|      petal length|        petal width|class|       rawPrediction|         probability|prediction|
+	+------------+-----------+------------------+-------------------+-----+--------------------+--------------------+----------+
+	|         4.5|        2.3|               1.3|0.30000000000000004|    0|[3.16666603088378...|[0.98853939771652...|       0.0|
+	|         4.6|        3.1|               1.5|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         4.8|        3.1|               1.6|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         4.8|        3.4|               1.6|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         4.8|        3.4|1.9000000000000001|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         4.9|        2.4|               3.3|                1.0|    1|[-2.1498908996582...|[0.00596602633595...|       1.0|
+	|         4.9|        2.5|               4.5|                1.7|    2|[-2.1498908996582...|[0.00596602633595...|       1.0|
+	|         5.0|        3.5|               1.3|0.30000000000000004|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         5.1|        2.5|               3.0|                1.1|    1|[3.16666603088378...|[0.98853939771652...|       0.0|
+	|         5.1|        3.3|               1.7|                0.5|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         5.1|        3.5|               1.4|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         5.1|        3.8|               1.6|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         5.2|        3.4|               1.4|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         5.2|        3.5|               1.5|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         5.2|        4.1|               1.5|                0.1|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         5.4|        3.9|               1.7|                0.4|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         5.5|        2.4|               3.8|                1.1|    1|[-2.1498908996582...|[0.00596602633595...|       1.0|
+	|         5.5|        4.2|               1.4|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|
+	|         5.7|        2.5|               5.0|                2.0|    2|[-2.1498908996582...|[0.00280966912396...|       2.0|
+	|         5.7|        3.0|               4.2|                1.2|    1|[-2.1498908996582...|[0.00643939292058...|       1.0|
+	+------------+-----------+------------------+-------------------+-----+--------------------+--------------------+----------+
+
+**********************
+Submit the application
+**********************
+
+Here’s an example to submit an end-to-end XGBoost-4j-Spark-GPU Spark application to an
+Apache Spark Standalone cluster, assuming the application main class is Iris and the
+application jar is iris-1.0.0.jar
+
+.. code-block:: bash
+
+  cudf_version=22.02.0
+  rapids_version=22.02.0
+  xgboost_version=1.6.0
+  main_class=Iris
+  app_jar=iris-1.0.0.jar
+
+  spark-submit \
+    --master $master \
+    --packages ai.rapids:cudf:${cudf_version},com.nvidia:rapids-4-spark_2.12:${rapids_version},ml.dmlc:xgboost4j-gpu_2.12:${xgboost_version},ml.dmlc:xgboost4j-spark-gpu_2.12:${xgboost_version} \
+    --conf spark.executor.cores=12 \
+    --conf spark.task.cpus=1 \
+    --conf spark.executor.resource.gpu.amount=1 \
+    --conf spark.task.resource.gpu.amount=0.08 \
+    --conf spark.rapids.sql.csv.read.double.enabled=true \
+    --conf spark.rapids.sql.hasNans=false \
+    --conf spark.plugins=com.nvidia.spark.SQLPlugin \
+    --class ${main_class} \
+     ${app_jar}
+
+* First, we need to specify the ``RAPIDS Accelerator, cudf, xgboost4j-gpu, xgboost4j-spark-gpu`` packages by ``--packages``
+* Second, ``RAPIDS Accelerator`` is a Spark plugin, so we need to configure it by specifying ``spark.plugins=com.nvidia.spark.SQLPlugin``
+
+For details about other ``RAPIDS Accelerator`` other configurations, please refer to the `configuration <https://nvidia.github.io/spark-rapids/docs/configs.html>`_.
+
+For ``RAPIDS Accelerator Frequently Asked Questions``, please refer to the
+`frequently-asked-questions <https://nvidia.github.io/spark-rapids/docs/FAQ.html#frequently-asked-questions>`_.
--- a/doc/jvm/xgboost4j_spark_tutorial.rst
+++ b/doc/jvm/xgboost4j_spark_tutorial.rst
@@ -45,7 +45,7 @@ Installation from maven repo

 .. note:: Use of Python in XGBoost4J-Spark

-  By default, we use the tracker in `dmlc-core <https://github.com/dmlc/dmlc-core/tree/master/tracker>`_ to drive the training with XGBoost4J-Spark. It requires Python 2.7+. We also have an experimental Scala version of tracker which can be enabled by passing the parameter ``tracker_conf`` as ``scala``.
+  By default, we use the tracker in `Python package <https://github.com/dmlc/xgboost/blob/master/python-package/xgboost/tracker.py>`_ to drive the training with XGBoost4J-Spark. It requires Python 3.6+. We also have an experimental Scala version of tracker which can be enabled by passing the parameter ``tracker_conf`` as ``scala``.

 Data Preparation
 ================
@@ -127,6 +127,11 @@ Now, we have a DataFrame containing only two columns, "features" which contains
 "sepal length", "sepal width", "petal length" and "petal width" and "classIndex" which has Double-typed
 labels. A DataFrame like this (containing vector-represented features and numeric labels) can be fed to XGBoost4J-Spark's training engine directly.

+.. note::
+
+  There is no need to assemble feature columns from version 1.6.0+. Instead, users can specify an array of
+  feture column names by ``setFeaturesCol(value: Array[String])`` and XGBoost4j-Spark will do it.
+
 Dealing with missing values
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/doc/model.schema
+++ b/doc/model.schema
@@ -97,7 +97,7 @@
                  "default_left": {
                    "type": "array",
                    "items": {
-                      "type": "boolean"
+                      "type": "integer"
                    }
                  },
                  "categories": {
@@ -168,6 +168,9 @@
        "num_trees": {
          "type": "string"
        },
+        "num_parallel_tree": {
+          "type": "string"
+        },
        "size_leaf_vector": {
          "type": "string"
        }
@@ -204,6 +207,14 @@
        }
      }
    },
+    "pseduo_huber_param": {
+      "type": "object",
+      "properties": {
+        "huber_slope": {
+          "type": "string"
+        }
+      }
+    },
    "aft_loss_param": {
      "type": "object",
      "properties": {
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -16,10 +16,13 @@ Before running XGBoost, we must set three types of parameters: general parameter
  :backlinks: none
  :local:

+
+.. _global_config:
+
 ********************
 Global Configuration
 ********************
-The following parameters can be set in the global scope, using ``xgb.config_context()`` (Python) or ``xgb.set.config()`` (R).
+The following parameters can be set in the global scope, using :py:func:`xgboost.config_context()` (Python) or ``xgb.set.config()`` (R).

 * ``verbosity``: Verbosity of printing messages. Valid values of 0 (silent), 1 (warning), 2 (info), and 3 (debug).
 * ``use_rmm``: Whether to use RAPIDS Memory Manager (RMM) to allocate GPU memory. This option is only applicable when XGBoost is built (compiled) with the RMM plugin enabled. Valid values are ``true`` and ``false``.
@@ -38,7 +41,7 @@ General Parameters
    is displayed as warning message.  If there's unexpected behaviour, please try to
    increase value of verbosity.

-* ``validate_parameters`` [default to false, except for Python, R and CLI interface]
+* ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface]

  - When set to True, XGBoost will perform validation of input parameters to check whether
    a parameter is used or not.  The feature is still experimental.  It's expected to have
@@ -49,14 +52,10 @@ General Parameters
  - Number of parallel threads used to run XGBoost.  When choosing it, please keep thread
    contention and hyperthreading in mind.

-* ``disable_default_eval_metric`` [default=``false``]
+* ``disable_default_eval_metric`` [default= ``false``]

  - Flag to disable default metric. Set to 1 or ``true`` to disable.

-* ``num_pbuffer`` [set automatically by XGBoost, no need to be set by user]
-
-  - Size of prediction buffer, normally set to number of training instances. The buffers are used to save the prediction results of last boosting step.
-
 * ``num_feature`` [set automatically by XGBoost, no need to be set by user]

  - Feature dimension used in boosting, set to maximum dimension of the feature
@@ -75,8 +74,8 @@ Parameters for Tree Booster

 * ``max_depth`` [default=6]

-  - Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 is only accepted in ``lossguide`` growing policy when tree_method is set as ``hist`` or ``gpu_hist`` and it indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree.
-  - range: [0,∞] (0 is only accepted in ``lossguide`` growing policy when tree_method is set as ``hist`` or ``gpu_hist``)
+  - Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. ``exact`` tree method requires non-zero value.
+  - range: [0,∞]

 * ``min_child_weight`` [default=1]

@@ -116,10 +115,9 @@ Parameters for Tree Booster
    'colsample_bynode':0.5}`` with 64 features will leave 8 features to choose from at
    each split.

-    On Python interface, when using ``hist``, ``gpu_hist`` or ``exact`` tree method, one
-    can set the ``feature_weights`` for DMatrix to define the probability of each feature
-    being selected when using column sampling.  There's a similar parameter for ``fit``
-    method in sklearn interface.
+    Using the Python or the R package, one can set the ``feature_weights`` for DMatrix to
+    define the probability of each feature being selected when using column sampling.
+    There's a similar parameter for ``fit`` method in sklearn interface.

 * ``lambda`` [default=1, alias: ``reg_lambda``]

@@ -155,7 +153,7 @@ Parameters for Tree Booster

 * ``sketch_eps`` [default=0.03]

-  - Only used for ``tree_method=approx``.
+  - Only used for ``updater=grow_local_histmaker``.
  - This roughly translates into ``O(1 / sketch_eps)`` number of bins.
    Compared to directly select number of bins, this comes with theoretical guarantee with sketch accuracy.
  - Usually user does not have to tune this.
@@ -166,7 +164,7 @@ Parameters for Tree Booster

  - Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: ``sum(negative instances) / sum(positive instances)``. See :doc:`Parameters Tuning </tutorials/param_tuning>` for more discussion. Also, see Higgs Kaggle competition demo for examples: `R <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R>`_, `py1 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py>`_, `py2 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py>`_, `py3 <https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py>`_.

-* ``updater`` [default= ``grow_colmaker,prune``]
+* ``updater``

  - A comma separated string defining the sequence of tree updaters to run, providing a modular way to construct and to modify the trees. This is an advanced parameter that is usually set automatically, depending on some other parameters. However, it could be also set explicitly by a user. The following updaters exist:

@@ -179,8 +177,6 @@ Parameters for Tree Booster
    - ``refresh``: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed.
    - ``prune``: prunes the splits where loss < min_split_loss (or gamma) and nodes that have depth greater than ``max_depth``.

-  - In a distributed setting, the implicit updater sequence value would be adjusted to ``grow_histmaker,prune`` by default, and you can set ``tree_method`` as ``hist`` to use ``grow_histmaker``.
-
 * ``refresh_leaf`` [default=1]

  - This is a parameter of the ``refresh`` updater. When this flag is 1, tree leafs as well as tree nodes' stats are updated. When it is 0, only node stats are updated.
@@ -196,7 +192,7 @@ Parameters for Tree Booster
 * ``grow_policy`` [default= ``depthwise``]

  - Controls a way new nodes are added to the tree.
-  - Currently supported only if ``tree_method`` is set to ``hist`` or ``gpu_hist``.
+  - Currently supported only if ``tree_method`` is set to ``hist``, ``approx`` or ``gpu_hist``.
  - Choices: ``depthwise``, ``lossguide``

    - ``depthwise``: split at nodes closest to the root.
@@ -204,15 +200,15 @@ Parameters for Tree Booster

 * ``max_leaves`` [default=0]

-  - Maximum number of nodes to be added. Only relevant when ``grow_policy=lossguide`` is set.
+  - Maximum number of nodes to be added.  Not used by ``exact`` tree method.

 * ``max_bin``, [default=256]

-  - Only used if ``tree_method`` is set to ``hist`` or ``gpu_hist``.
+  - Only used if ``tree_method`` is set to ``hist``, ``approx`` or ``gpu_hist``.
  - Maximum number of discrete bins to bucket continuous features.
  - Increasing this number improves the optimality of splits at the cost of higher computation time.

-* ``predictor``, [default=``auto``]
+* ``predictor``, [default= ``auto``]

  - The type of predictor algorithm to use. Provides the same results but allows the use of GPU or CPU.

@@ -225,26 +221,39 @@ Parameters for Tree Booster
      recommended for performing prediction tasks.

 * ``num_parallel_tree``, [default=1]
+
  - Number of parallel trees constructed during each iteration. This option is used to support boosted random forest.

 * ``monotone_constraints``

-  - Constraint of variable monotonicity.  See tutorial for more information.
+  - Constraint of variable monotonicity.  See :doc:`/tutorials/monotonic` for more information.

 * ``interaction_constraints``

  - Constraints for interaction representing permitted interactions.  The constraints must
    be specified in the form of a nest list, e.g. ``[[0, 1], [2, 3, 4]]``, where each inner
    list is a group of indices of features that are allowed to interact with each other.
-    See tutorial for more information
+    See :doc:`/tutorials/feature_interaction_constraint` for more information.

-Additional parameters for ``hist`` and ``gpu_hist`` tree method
-================================================================
+Additional parameters for ``hist``, ``gpu_hist`` and ``approx`` tree method
+===========================================================================

-* ``single_precision_histogram``, [default=``false``]
+* ``single_precision_histogram``, [default= ``false``]

  - Use single precision to build histograms instead of double precision.

+* ``max_cat_to_onehot``
+
+  .. versionadded:: 1.6
+
+  .. note:: The support for this parameter is experimental.
+
+  - A threshold for deciding whether XGBoost should use one-hot encoding based split for
+    categorical data.  When number of categories is lesser than the threshold then one-hot
+    encoding is chosen, otherwise the categories will be partitioned into children nodes.
+    Only relevant for regression and binary classification. Also, ``exact`` tree method is
+    not supported
+
 Additional parameters for Dart Booster (``booster=dart``)
 =========================================================

@@ -252,12 +261,12 @@ Additional parameters for Dart Booster (``booster=dart``)

  If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
  some of the trees will be evaluated. This will produce incorrect results if ``data`` is
-  not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
+  not the training data. To obtain correct results on test sets, set ``iteration_range`` to
  a nonzero value, e.g.

  .. code-block:: python

-    preds = bst.predict(dtest, ntree_limit=num_round)
+    preds = bst.predict(dtest, iteration_range=(0, num_round))

 * ``sample_type`` [default= ``uniform``]

@@ -329,15 +338,6 @@ Parameters for Linear Booster (``booster=gblinear``)

  - The number of top features to select in ``greedy`` and ``thrifty`` feature selector. The value of 0 means using all the features.

-Parameters for Tweedie Regression (``objective=reg:tweedie``)
-=============================================================
-* ``tweedie_variance_power`` [default=1.5]
-
-  - Parameter that controls the variance of the Tweedie distribution ``var(y) ~ E(y)^tweedie_variance_power``
-  - range: (1,2)
-  - Set closer to 2 to shift towards a gamma distribution
-  - Set closer to 1 to shift towards a Poisson distribution.
-
 ************************
 Learning Task Parameters
 ************************
@@ -347,14 +347,14 @@ Specify the learning task and the corresponding learning objective. The objectiv

  - ``reg:squarederror``: regression with squared loss.
  - ``reg:squaredlogerror``: regression with squared log loss :math:`\frac{1}{2}[log(pred + 1) - log(label + 1)]^2`.  All input labels are required to be greater than -1.  Also, see metric ``rmsle`` for possible issue  with this objective.
-  - ``reg:logistic``: logistic regression
+  - ``reg:logistic``: logistic regression.
  - ``reg:pseudohubererror``: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.
  - ``binary:logistic``: logistic regression for binary classification, output probability
  - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation
  - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
-  - ``count:poisson`` --poisson regression for count data, output mean of Poisson distribution
+  - ``count:poisson``: Poisson regression for count data, output mean of Poisson distribution.

-    - ``max_delta_step`` is set to 0.7 by default in Poisson regression (used to safeguard optimization)
+    + ``max_delta_step`` is set to 0.7 by default in Poisson regression (used to safeguard optimization)

  - ``survival:cox``: Cox regression for right censored survival time data (negative values are considered right censored).
    Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function ``h(t) = h0(t) * HR``).
@@ -397,9 +397,13 @@ Specify the learning task and the corresponding learning objective. The objectiv
      - When used with multi-class classification, objective should be ``multi:softprob`` instead of ``multi:softmax``, as the latter doesn't output probability.  Also the AUC is calculated by 1-vs-rest with reference class weighted by class prevalence.
      - When used with LTR task, the AUC is computed by comparing pairs of documents to count correctly sorted pairs.  This corresponds to pairwise learning to rank.  The implementation has some issues with average AUC around groups and distributed workers not being well-defined.
      - On a single machine the AUC calculation is exact. In a distributed environment the AUC is a weighted average over the AUC of training rows on each node - therefore, distributed AUC is an approximation sensitive to the distribution of data across workers. Use another metric in distributed environments if precision and reproducibility are important.
-      - If input dataset contains only negative or positive samples the output is `NaN`.
+      - When input dataset contains only negative or positive samples, the output is `NaN`.  The behavior is implementation defined, for instance, ``scikit-learn`` returns :math:`0.5` instead.
+
+    - ``aucpr``: `Area under the PR curve <https://en.wikipedia.org/wiki/Precision_and_recall>`_.
+      Available for classification and learning-to-rank tasks.
+
+      After XGBoost 1.6, both of the requirements and restrictions for using ``aucpr`` in classification problem are similar to ``auc``.  For ranking task, only binary relevance label :math:`y \in [0, 1]` is supported.  Different from ``map (mean average precision)``, ``aucpr`` calculates the *interpolated* area under precision recall curve using continuous interpolation.

-    - ``aucpr``: `Area under the PR curve <https://en.wikipedia.org/wiki/Precision_and_recall>`_. Available for binary classification and learning-to-rank tasks.
    - ``ndcg``: `Normalized Discounted Cumulative Gain <http://en.wikipedia.org/wiki/NDCG>`_
    - ``map``: `Mean Average Precision <http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision>`_
    - ``ndcg@n``, ``map@n``: 'n' can be assigned as an integer to cut off the top positions in the lists for evaluation.
@@ -418,9 +422,23 @@ Specify the learning task and the corresponding learning objective. The objectiv

  - Random number seed.  This parameter is ignored in R package, use `set.seed()` instead.

-* ``seed_per_iteration`` [default=false]
+* ``seed_per_iteration`` [default= ``false``]

-  - Seed PRNG determnisticly via iterator number, this option will be switched on automatically on distributed mode.
+  - Seed PRNG determnisticly via iterator number.
+
+Parameters for Tweedie Regression (``objective=reg:tweedie``)
+=============================================================
+* ``tweedie_variance_power`` [default=1.5]
+
+  - Parameter that controls the variance of the Tweedie distribution ``var(y) ~ E(y)^tweedie_variance_power``
+  - range: (1,2)
+  - Set closer to 2 to shift towards a gamma distribution
+  - Set closer to 1 to shift towards a Poisson distribution.
+
+Parameter for using Pseudo-Huber (``reg:pseudohubererror``)
+===========================================================
+
+* ``huber_slope`` : A parameter used for Pseudo-Huber loss to define the :math:`\delta` term. [default = 1.0]

 ***********************
 Command Line Parameters
--- a/doc/python/.gitignore
+++ b/doc/python/.gitignore
@@ -0,0 +1,2 @@
+examples
+dask-examples
--- a/doc/python/callbacks.rst
+++ b/doc/python/callbacks.rst
@@ -2,10 +2,11 @@
 Callback Functions
 ##################

-This document gives a basic walkthrough of callback function used in XGBoost Python
-package.  In XGBoost 1.3, a new callback interface is designed for Python package, which
-provides the flexibility of designing various extension for training.  Also, XGBoost has a
-number of pre-defined callbacks for supporting early stopping, checkpoints etc.
+This document gives a basic walkthrough of :ref:`callback API <callback_api>` used in
+XGBoost Python package.  In XGBoost 1.3, a new callback interface is designed for Python
+package, which provides the flexibility of designing various extension for training.
+Also, XGBoost has a number of pre-defined callbacks for supporting early stopping,
+checkpoints etc.


 Using builtin callbacks
@@ -14,8 +15,8 @@ Using builtin callbacks
 By default, training methods in XGBoost have parameters like ``early_stopping_rounds`` and
 ``verbose``/``verbose_eval``, when specified the training procedure will define the
 corresponding callbacks internally.  For example, when ``early_stopping_rounds`` is
-specified, ``EarlyStopping`` callback is invoked inside iteration loop.  You can also pass
-this callback function directly into XGBoost:
+specified, :py:class:`EarlyStopping <xgboost.callback.EarlyStopping>` callback is invoked
+inside iteration loop.  You can also pass this callback function directly into XGBoost:

 .. code-block:: python

@@ -54,6 +55,7 @@ this callback function directly into XGBoost:
 Defining your own callback
 --------------------------

-XGBoost provides an callback interface class: ``xgboost.callback.TrainingCallback``, user
-defined callbacks should inherit this class and override corresponding methods.  There's a
-working example in `demo/guide-python/callbacks.py <https://github.com/dmlc/xgboost/tree/master/demo/guide-python/callbacks.py>`_
+XGBoost provides an callback interface class: :py:class:`TrainingCallback
+<xgboost.callback.TrainingCallback>`, user defined callbacks should inherit this class and
+override corresponding methods.  There's a working example in
+:ref:`sphx_glr_python_examples_callbacks.py`.
--- a/doc/python/convert_090to100.py
+++ b/doc/python/convert_090to100.py
@@ -1,79 +0,0 @@
-'''This is a simple script that converts a pickled XGBoost
-Scikit-Learn interface object from 0.90 to a native model.  Pickle
-format is not stable as it's a direct serialization of Python object.
-We advice not to use it when stability is needed.
-
-'''
-import pickle
-import json
-import os
-import argparse
-import numpy as np
-import xgboost
-import warnings
-
-
-def save_label_encoder(le):
-    '''Save the label encoder in XGBClassifier'''
-    meta = dict()
-    for k, v in le.__dict__.items():
-        if isinstance(v, np.ndarray):
-            meta[k] = v.tolist()
-        else:
-            meta[k] = v
-    return meta
-
-
-def xgboost_skl_90to100(skl_model):
-    '''Extract the model and related metadata in SKL model.'''
-    model = {}
-    with open(skl_model, 'rb') as fd:
-        old = pickle.load(fd)
-        if not isinstance(old, xgboost.XGBModel):
-            raise TypeError(
-                'The script only handes Scikit-Learn interface object')
-
-    # Save Scikit-Learn specific Python attributes into a JSON document.
-    for k, v in old.__dict__.items():
-        if k == '_le':
-            model[k] = save_label_encoder(v)
-        elif k == 'classes_':
-            model[k] = v.tolist()
-        elif k == '_Booster':
-            continue
-        else:
-            try:
-                json.dumps({k: v})
-                model[k] = v
-            except TypeError:
-                warnings.warn(str(k) + ' is not saved in Scikit-Learn meta.')
-    booster = old.get_booster()
-    # Store the JSON serialization as an attribute
-    booster.set_attr(scikit_learn=json.dumps(model))
-
-    # Save it into a native model.
-    i = 0
-    while True:
-        path = 'xgboost_native_model_from_' + skl_model + '-' + str(i) + '.bin'
-        if os.path.exists(path):
-            i += 1
-            continue
-        booster.save_model(path)
-        break
-
-
-if __name__ == '__main__':
-    assert xgboost.__version__ != '1.0.0', ('Please use the XGBoost version'
-                                            ' that generates this pickle.')
-    parser = argparse.ArgumentParser(
-        description=('A simple script to convert pickle generated by'
-                     ' XGBoost 0.90 to XGBoost 1.0.0 model (not pickle).'))
-    parser.add_argument(
-        '--old-pickle',
-        type=str,
-        help='Path to old pickle file of Scikit-Learn interface object.  '
-        'Will output a native model converted from this pickle file',
-        required=True)
-    args = parser.parse_args()
-
-    xgboost_skl_90to100(args.old_pickle)
--- a/doc/python/index.rst
+++ b/doc/python/index.rst
@@ -13,4 +13,5 @@ Contents
  python_api
  callbacks
  model
-  Python examples <https://github.com/dmlc/xgboost/tree/master/demo/guide-python>
+  examples/index
+  dask-examples/index
--- a/doc/python/python_api.rst
+++ b/doc/python/python_api.rst
@@ -77,15 +77,29 @@ Plotting API

 Callback API
 ------------
-.. autofunction:: xgboost.callback.TrainingCallback
+.. automodule:: xgboost.callback
+.. autoclass:: xgboost.callback.TrainingCallback
+    :members:

-.. autofunction:: xgboost.callback.EvaluationMonitor
+.. autoclass:: xgboost.callback.EvaluationMonitor
+    :members:
+    :inherited-members:
+    :show-inheritance:

-.. autofunction:: xgboost.callback.EarlyStopping
+.. autoclass:: xgboost.callback.EarlyStopping
+    :members:
+    :inherited-members:
+    :show-inheritance:

-.. autofunction:: xgboost.callback.LearningRateScheduler
+.. autoclass:: xgboost.callback.LearningRateScheduler
+    :members:
+    :inherited-members:
+    :show-inheritance:

-.. autofunction:: xgboost.callback.TrainingCheckPoint
+.. autoclass:: xgboost.callback.TrainingCheckPoint
+    :members:
+    :inherited-members:
+    :show-inheritance:

 .. _dask_api:

--- a/Show More
+++ b/Show More