Bump release version to 1.7.2. (#8569 )

Properly await async method client.wait_for_workers (#8558 ) (#8567 )
* Properly await async method client.wait_for_workers * ignore mypy error. Co-authored-by: jiamingy <jm.yuan@outlook.com> Co-authored-by: Matthew Rocklin <mrocklin@gmail.com>
2022-12-08 21:46:26 +08:00 · 2022-12-07 23:25:05 +08:00 · 2022-12-07 03:19:35 +08:00 · 2022-12-07 02:07:37 +08:00 · 2022-12-06 21:35:26 +08:00 · 2022-12-06 18:21:14 +08:00
52 changed files with 467 additions and 212 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -75,19 +75,18 @@ jobs:
    - uses: actions/checkout@v2
      with:
        submodules: 'true'
-    - name: Install system packages
+    - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
      run: |
        sudo apt-get install -y --no-install-recommends ninja-build
    - uses: conda-incubator/setup-miniconda@v2
      with:
-        auto-update-conda: true
+        cache-downloads: true
-        python-version: ${{ matrix.python-version }}
+        cache-env: true
-        activate-environment: test
+        environment-name: cpp_test
        environment-file: tests/ci_build/conda_env/cpp_test.yml
    - name: Display Conda env
      shell: bash -l {0}
      run: |
        conda info
        conda list
    - name: Build and install XGBoost static library
      shell: bash -l {0}
      run: |
@@ -109,6 +108,7 @@ jobs:
        cd ..
        rm -rf ./build
        popd
    - name: Build and install XGBoost shared library
      shell: bash -l {0}
      run: |
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -41,12 +41,46 @@ jobs:
      run: |
        python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1
-  python-sdist-test:
+  python-sdist-test-on-Linux:
    # Mismatched glibcxx version between system and conda forge.
    runs-on: ${{ matrix.os }}
    name: Test installing XGBoost Python source package on ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ubuntu-latest, macos-11, windows-latest]
+        os: [ubuntu-latest]
    steps:
    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
      with:
        submodules: 'true'
    - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
      with:
        cache-downloads: true
        cache-env: false
        environment-name: sdist_test
        environment-file: tests/ci_build/conda_env/sdist_test.yml
    - name: Display Conda env
      shell: bash -l {0}
      run: |
        conda info
        conda list
    - name: Build and install XGBoost
      shell: bash -l {0}
      run: |
        cd python-package
        python --version
        python setup.py sdist
        pip install -v ./dist/xgboost-*.tar.gz
        cd ..
        python -c 'import xgboost'
  python-sdist-test:
    # Use system toolchain instead of conda toolchain for macos and windows.
    # MacOS has linker error if clang++ from conda-forge is used
    runs-on: ${{ matrix.os }}
    name: Test installing XGBoost Python source package on ${{ matrix.os }}
    strategy:
      matrix:
        os: [macos-11, windows-latest]
        python-version: ["3.8"]
    steps:
    - uses: actions/checkout@v2
@@ -56,11 +90,7 @@ jobs:
      if: matrix.os == 'macos-11'
      run: |
        brew install ninja libomp
-    - name: Install Ubuntu system dependencies
+    - uses: conda-incubator/setup-miniconda@35d1405e78aa3f784fe3ce9a2eb378d5eeb62169 # v2.1.1
      if: matrix.os == 'ubuntu-latest'
      run: |
        sudo apt-get install -y --no-install-recommends ninja-build
    - uses: conda-incubator/setup-miniconda@v2
      with:
        auto-update-conda: true
        python-version: ${{ matrix.python-version }}
@@ -80,6 +110,58 @@ jobs:
        cd ..
        python -c 'import xgboost'
  python-tests-on-macos:
    name: Test XGBoost Python package on ${{ matrix.config.os }}
    runs-on: ${{ matrix.config.os }}
    timeout-minutes: 60
    strategy:
      matrix:
        config:
          - {os: macos-11}
    steps:
    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
      with:
        submodules: 'true'
    - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
      with:
        cache-downloads: true
        cache-env: false
        environment-name: macos_test
        environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
    - name: Display Conda env
      shell: bash -l {0}
      run: |
        conda info
        conda list
    - name: Build XGBoost on macos
      shell: bash -l {0}
      run: |
        brew install ninja
        mkdir build
        cd build
        # Set prefix, to use OpenMP library from Conda env
        # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
        # to learn why we don't use libomp from Homebrew.
        cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
        ninja
    - name: Install Python package
      shell: bash -l {0}
      run: |
        cd python-package
        python --version
        python setup.py install
    - name: Test Python package
      shell: bash -l {0}
      run: |
        pytest -s -v -rxXs --durations=0 ./tests/python
  python-tests-on-win:
    name: Test XGBoost Python package on ${{ matrix.config.os }}
    runs-on: ${{ matrix.config.os }}
@@ -125,56 +207,4 @@ jobs:
    - name: Test Python package
      shell: bash -l {0}
      run: |
-        pytest -s -v ./tests/python
+        pytest -s -v -rxXs --durations=0 ./tests/python
  python-tests-on-macos:
    name: Test XGBoost Python package on ${{ matrix.config.os }}
    runs-on: ${{ matrix.config.os }}
    timeout-minutes: 90
    strategy:
      matrix:
        config:
          - {os: macos-11, python-version "3.8" }
    steps:
    - uses: actions/checkout@v2
      with:
        submodules: 'true'
    - uses: conda-incubator/setup-miniconda@v2
      with:
        auto-update-conda: true
        python-version: ${{ matrix.config.python-version }}
        activate-environment: macos_test
        environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
    - name: Display Conda env
      shell: bash -l {0}
      run: |
        conda info
        conda list
    - name: Build XGBoost on macos
      shell: bash -l {0}
      run: |
        brew install ninja
        mkdir build
        cd build
        # Set prefix, to use OpenMP library from Conda env
        # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
        # to learn why we don't use libomp from Homebrew.
        cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
        ninja
    - name: Install Python package
      shell: bash -l {0}
      run: |
        cd python-package
        python --version
        python setup.py install
    - name: Test Python package
      shell: bash -l {0}
      run: |
        pytest -s -v ./tests/python
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@@ -5,6 +5,7 @@ on: [push, pull_request]
 env:
  R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
  GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
  _R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
 permissions:
  contents: read # to fetch code (actions/checkout)
@@ -68,6 +69,7 @@ jobs:
          - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'}
    env:
      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
      _R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
      RSPM: ${{ matrix.config.rspm }}
    steps:
@@ -121,6 +123,10 @@ jobs:
        config:
          - {r: 'release'}
    env:
      _R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
      MAKE: "make -j$(nproc)"
    steps:
    - uses: actions/checkout@v2
      with:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-project(xgboost LANGUAGES CXX C VERSION 1.7.1)
+project(xgboost LANGUAGES CXX C VERSION 1.7.2)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
--- a/1
+++ b/1
@@ -126,7 +126,6 @@ Rpack: clean_all
 	cat R-package/src/Makevars.in|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.in
 	cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
 	rm -f xgboost/src/Makevars.win-e   # OSX sed create this extra file; remove it
 	rm -f xgboost/cleanup
 	bash R-package/remove_warning_suppression_pragma.sh
 	bash xgboost/remove_warning_suppression_pragma.sh
 	rm xgboost/remove_warning_suppression_pragma.sh
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 1.7.1.1
+Version: 1.7.2.1
-Date: 2022-11-03
+Date: 2022-12-08
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
         email = "tianqi.tchen@gmail.com"),
@@ -66,5 +66,5 @@ Imports:
    methods,
    data.table (>= 1.9.6),
    jsonlite (>= 1.0),
-RoxygenNote: 7.1.1
+RoxygenNote: 7.2.1
 SystemRequirements: GNU make, C++14
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -544,9 +544,11 @@ cb.cv.predict <- function(save_models = FALSE) {
 #'
 #' @return
 #' Results are stored in the \code{coefs} element of the closure.
-#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it.
+#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy
 #' way to access it.
 #' With \code{xgb.train}, it is either a dense of a sparse matrix.
-#' While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices.
+#' While with \code{xgb.cv}, it is a list (an element per each fold) of such
 #' matrices.
 #'
 #' @seealso
 #' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
@@ -558,7 +560,7 @@ cb.cv.predict <- function(save_models = FALSE) {
 #' # without considering the 2nd order interactions:
 #' x <- model.matrix(Species ~ .^2, iris)[,-1]
 #' colnames(x)
-#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
+#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
 #' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
 #'               lambda = 0.0003, alpha = 0.0003, nthread = 2)
 #' # For 'shotgun', which is a default linear updater, using high eta values may result in
@@ -583,14 +585,14 @@ cb.cv.predict <- function(save_models = FALSE) {
 #'
 #' # For xgb.cv:
 #' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
-#'              callbacks = list(cb.gblinear.history()))
+#'               callbacks = list(cb.gblinear.history()))
 #' # coefficients in the CV fold #3
 #' matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
 #'
 #'
 #' #### Multiclass classification:
 #' #
-#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1)
+#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
 #' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
 #'               lambda = 0.0003, alpha = 0.0003, nthread = 2)
 #' # For the default linear updater 'shotgun' it sometimes is helpful
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -18,7 +18,7 @@
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
 #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
 #' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
@@ -110,7 +110,7 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL, nth
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' train <- agaricus.train
-#' dtrain <- xgb.DMatrix(train$data, label=train$label)
+#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
 #'
 #' stopifnot(nrow(dtrain) == nrow(train$data))
 #' stopifnot(ncol(dtrain) == ncol(train$data))
@@ -138,7 +138,7 @@ dim.xgb.DMatrix <- function(x) {
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' train <- agaricus.train
-#' dtrain <- xgb.DMatrix(train$data, label=train$label)
+#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
 #' dimnames(dtrain)
 #' colnames(dtrain)
 #' colnames(dtrain) <- make.names(1:ncol(train$data))
@@ -193,7 +193,7 @@ dimnames.xgb.DMatrix <- function(x) {
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #'
 #' labels <- getinfo(dtrain, 'label')
 #' setinfo(dtrain, 'label', 1-labels)
@@ -249,7 +249,7 @@ getinfo.xgb.DMatrix <- function(object, name, ...) {
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #'
 #' labels <- getinfo(dtrain, 'label')
 #' setinfo(dtrain, 'label', 1-labels)
@@ -345,7 +345,7 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) {
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #'
 #' dsub <- slice(dtrain, 1:42)
 #' labels1 <- getinfo(dsub, 'label')
@@ -401,7 +401,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #'
 #' dtrain
 #' print(dtrain, verbose=TRUE)
--- a/R-package/R/xgb.DMatrix.save.R
+++ b/R-package/R/xgb.DMatrix.save.R
@@ -7,7 +7,7 @@
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
 #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
 #' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@@ -48,8 +48,8 @@
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
-#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
+#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
 #'
 #' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
 #' nrounds = 4
@@ -65,8 +65,12 @@
 #' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
 #'
 #' # learning with new features
-#' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
+#' new.dtrain <- xgb.DMatrix(
-#' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
+#'   data = new.features.train, label = agaricus.train$label, nthread = 2
 #' )
 #' new.dtest <- xgb.DMatrix(
 #'   data = new.features.test, label = agaricus.test$label, nthread = 2
 #' )
 #' watchlist <- list(train = new.dtrain)
 #' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 #'
@@ -79,7 +83,7 @@
 #'           accuracy.after, "!\n"))
 #'
 #' @export
-xgb.create.features <- function(model, data, ...){
+xgb.create.features <- function(model, data, ...) {
  check.deprecation(...)
  pred_with_leaf <- predict(model, data, predleaf = TRUE)
  cols <- lapply(as.data.frame(pred_with_leaf), factor)
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -110,9 +110,9 @@
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
-#'                   max_depth = 3, eta = 1, objective = "binary:logistic")
+#'              max_depth = 3, eta = 1, objective = "binary:logistic")
 #' print(cv)
 #' print(cv, verbose=TRUE)
 #'
@@ -192,7 +192,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
  # create the booster-folds
  # train_folds
-  dall <- xgb.get.DMatrix(data, label, missing)
+  dall <- xgb.get.DMatrix(data, label, missing, nthread = params$nthread)
  bst_folds <- lapply(seq_along(folds), function(k) {
    dtest  <- slice(dall, folds[[k]])
    # code originally contributed by @RolandASc on stackoverflow
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -192,8 +192,8 @@
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #'
-#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
-#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
+#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
 #' watchlist <- list(train = dtrain, eval = dtest)
 #'
 #' ## A simple xgb.train example:
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for xgboost 1.7.1.
+# Generated by GNU Autoconf 2.69 for xgboost 1.7.2.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -576,8 +576,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='xgboost'
 PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='1.7.1'
+PACKAGE_VERSION='1.7.2'
-PACKAGE_STRING='xgboost 1.7.1'
+PACKAGE_STRING='xgboost 1.7.2'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures xgboost 1.7.1 to adapt to many kinds of systems.
+\`configure' configures xgboost 1.7.2 to adapt to many kinds of systems.
 Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1257,7 +1257,7 @@ fi
 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of xgboost 1.7.1:";;
+     short | recursive ) echo "Configuration of xgboost 1.7.2:";;
   esac
  cat <<\_ACEOF
@@ -1336,7 +1336,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-xgboost configure 1.7.1
+xgboost configure 1.7.2
 generated by GNU Autoconf 2.69
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
-It was created by xgboost $as_me 1.7.1, which was
+It was created by xgboost $as_me 1.7.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
  $ $0 $@
@@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by xgboost $as_me 1.7.1, which was
+This file was extended by xgboost $as_me 1.7.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
  CONFIG_FILES    = $CONFIG_FILES
@@ -3347,7 +3347,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-xgboost config.status 1.7.1
+xgboost config.status 1.7.2
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@
 AC_PREREQ(2.69)
-AC_INIT([xgboost],[1.7.1],[],[xgboost],[])
+AC_INIT([xgboost],[1.7.2],[],[xgboost],[])
 # Use this line to set CC variable to a C compiler
 AC_PROG_CC
--- a/R-package/man/cb.gblinear.history.Rd
+++ b/R-package/man/cb.gblinear.history.Rd
@@ -15,9 +15,11 @@ selected per iteration.}
 }
 \value{
 Results are stored in the \code{coefs} element of the closure.
-The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it.
+The \code{\link{xgb.gblinear.history}} convenience function provides an easy
 way to access it.
 With \code{xgb.train}, it is either a dense of a sparse matrix.
-While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices.
+While with \code{xgb.cv}, it is a list (an element per each fold) of such
 matrices.
 }
 \description{
 Callback closure for collecting the model coefficients history of a gblinear booster
@@ -38,7 +40,7 @@ Callback function expects the following values to be set in its calling frame:
 # without considering the 2nd order interactions:
 x <- model.matrix(Species ~ .^2, iris)[,-1]
 colnames(x)
-dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
+dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
 param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
              lambda = 0.0003, alpha = 0.0003, nthread = 2)
 # For 'shotgun', which is a default linear updater, using high eta values may result in
@@ -63,14 +65,14 @@ matplot(xgb.gblinear.history(bst), type = 'l')
 # For xgb.cv:
 bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
-             callbacks = list(cb.gblinear.history()))
+              callbacks = list(cb.gblinear.history()))
 # coefficients in the CV fold #3
 matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
 #### Multiclass classification:
 #
-dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1)
+dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
 param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
              lambda = 0.0003, alpha = 0.0003, nthread = 2)
 # For the default linear updater 'shotgun' it sometimes is helpful
--- a/R-package/man/dim.xgb.DMatrix.Rd
+++ b/R-package/man/dim.xgb.DMatrix.Rd
@@ -19,7 +19,7 @@ be directly used with an \code{xgb.DMatrix} object.
 \examples{
 data(agaricus.train, package='xgboost')
 train <- agaricus.train
-dtrain <- xgb.DMatrix(train$data, label=train$label)
+dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
 stopifnot(nrow(dtrain) == nrow(train$data))
 stopifnot(ncol(dtrain) == ncol(train$data))
--- a/R-package/man/dimnames.xgb.DMatrix.Rd
+++ b/R-package/man/dimnames.xgb.DMatrix.Rd
@@ -26,7 +26,7 @@ Since row names are irrelevant, it is recommended to use \code{colnames} directl
 \examples{
 data(agaricus.train, package='xgboost')
 train <- agaricus.train
-dtrain <- xgb.DMatrix(train$data, label=train$label)
+dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
 dimnames(dtrain)
 colnames(dtrain)
 colnames(dtrain) <- make.names(1:ncol(train$data))
--- a/R-package/man/getinfo.Rd
+++ b/R-package/man/getinfo.Rd
@@ -34,7 +34,7 @@ The \code{name} field can be one of the following:
 }
 \examples{
 data(agaricus.train, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 labels <- getinfo(dtrain, 'label')
 setinfo(dtrain, 'label', 1-labels)
--- a/R-package/man/print.xgb.DMatrix.Rd
+++ b/R-package/man/print.xgb.DMatrix.Rd
@@ -19,7 +19,7 @@ Currently it displays dimensions and presence of info-fields and colnames.
 }
 \examples{
 data(agaricus.train, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 dtrain
 print(dtrain, verbose=TRUE)
--- a/R-package/man/setinfo.Rd
+++ b/R-package/man/setinfo.Rd
@@ -33,7 +33,7 @@ The \code{name} field can be one of the following:
 }
 \examples{
 data(agaricus.train, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 labels <- getinfo(dtrain, 'label')
 setinfo(dtrain, 'label', 1-labels)
--- a/R-package/man/slice.xgb.DMatrix.Rd
+++ b/R-package/man/slice.xgb.DMatrix.Rd
@@ -28,7 +28,7 @@ original xgb.DMatrix object
 }
 \examples{
 data(agaricus.train, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 dsub <- slice(dtrain, 1:42)
 labels1 <- getinfo(dsub, 'label')
--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@@ -38,7 +38,7 @@ Supported input file formats are either a LIBSVM text file or a binary file that
 }
 \examples{
 data(agaricus.train, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
 dtrain <- xgb.DMatrix('xgb.DMatrix.data')
 if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
--- a/R-package/man/xgb.DMatrix.save.Rd
+++ b/R-package/man/xgb.DMatrix.save.Rd
@@ -16,7 +16,7 @@ Save xgb.DMatrix object to binary file
 }
 \examples{
 data(agaricus.train, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
 dtrain <- xgb.DMatrix('xgb.DMatrix.data')
 if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -59,8 +59,8 @@ a rule on certain features."
 \examples{
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
-dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
+dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
 param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
 nrounds = 4
@@ -76,8 +76,12 @@ new.features.train <- xgb.create.features(model = bst, agaricus.train$data)
 new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
 # learning with new features
-new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
+new.dtrain <- xgb.DMatrix(
-new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
+  data = new.features.train, label = agaricus.train$label, nthread = 2
 )
 new.dtest <- xgb.DMatrix(
  data = new.features.test, label = agaricus.test$label, nthread = 2
 )
 watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -158,9 +158,9 @@ Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\
 }
 \examples{
 data(agaricus.train, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
-                  max_depth = 3, eta = 1, objective = "binary:logistic")
+             max_depth = 3, eta = 1, objective = "binary:logistic")
 print(cv)
 print(cv, verbose=TRUE)
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -241,8 +241,8 @@ The following callbacks are automatically created when certain parameters are se
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
+dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
-dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
+dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
 watchlist <- list(train = dtrain, eval = dtest)
 ## A simple xgb.train example:
--- a/doc/contrib/release.rst
+++ b/doc/contrib/release.rst
@@ -4,7 +4,7 @@ XGBoost Release Policy
 =======================
 Versioning Policy
---------------------------
+-----------------
 Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE]
@@ -34,6 +34,20 @@ Making a Release
   + The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__.
     Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first.  Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
   + The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.
 R CRAN Package
 --------------
 Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first.  Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
 According to the `CRAN policy <https://cran.r-project.org/web/packages/policies.html>`__:
    If running a package uses multiple threads/cores it must never use more than two simultaneously: the check farm is a shared resource and will typically be running many checks simultaneously.
 We need to check the number of CPUs used in examples. Export ``_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_=2.5`` before running ``R CMD check --as-cran`` `[1] <#references>`__ and make sure the machine you are using has enough CPU cores to reveal any potential policy violation.
 References
 ----------
 [1] https://stat.ethz.ch/pipermail/r-package-devel/2022q4/008610.html
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -44,8 +44,7 @@ General Parameters
 * ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface]
  - When set to True, XGBoost will perform validation of input parameters to check whether
-    a parameter is used or not.  The feature is still experimental.  It's expected to have
+    a parameter is used or not.
    some false positives.
 * ``nthread`` [default to maximum number of threads available if not set]
@@ -233,24 +232,21 @@ Parameters for Categorical Feature
 These parameters are only used for training with categorical data. See
 :doc:`/tutorials/categorical` for more information.
 .. note:: These parameters are experimental. ``exact`` tree method is not yet supported.
 * ``max_cat_to_onehot``
  .. versionadded:: 1.6.0
  .. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
  - A threshold for deciding whether XGBoost should use one-hot encoding based split for
    categorical data.  When number of categories is lesser than the threshold then one-hot
    encoding is chosen, otherwise the categories will be partitioned into children nodes.
    Only relevant for regression and binary classification. Also, ``exact`` tree method is
    not supported
 * ``max_cat_threshold``
  .. versionadded:: 1.7.0
  .. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
  - Maximum number of categories considered for each split. Used only by partition-based
    splits for preventing over-fitting.
--- a/doc/python/python_api.rst
+++ b/doc/python/python_api.rst
@@ -25,9 +25,6 @@ Core Data Structure
 .. autoclass:: xgboost.QuantileDMatrix
    :show-inheritance:
 .. autoclass:: xgboost.DeviceQuantileDMatrix
    :show-inheritance:
 .. autoclass:: xgboost.Booster
    :members:
    :show-inheritance:
@@ -115,7 +112,7 @@ Dask API
    :inherited-members:
    :show-inheritance:
-.. autoclass:: xgboost.dask.DaskDeviceQuantileDMatrix
+.. autoclass:: xgboost.dask.DaskQuantileDMatrix
    :members:
    :inherited-members:
    :show-inheritance:
--- a/doc/tutorials/dask.rst
+++ b/doc/tutorials/dask.rst
@@ -564,7 +564,7 @@ Here are some pratices on reducing memory usage with dask and xgboost.
  nice summary.
 - When using GPU input, like dataframe loaded by ``dask_cudf``, you can try
-  :py:class:`xgboost.dask.DaskDeviceQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``
+  :py:class:`xgboost.dask.DaskQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``
  to reduce overall memory usage.  See
  :ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example.
--- a/2
+++ b/2
--- a/include/xgboost/collective/socket.h
+++ b/include/xgboost/collective/socket.h
@@ -287,11 +287,22 @@ class TCPSocket {
 #elif defined(__APPLE__)
    return domain_;
 #elif defined(__unix__)
 #ifndef __PASE__
    std::int32_t domain;
    socklen_t len = sizeof(domain);
    xgboost_CHECK_SYS_CALL(
        getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast<char *>(&domain), &len), 0);
    return ret_iafamily(domain);
 #else
    struct sockaddr sa;
    socklen_t sizeofsa = sizeof(sa);
    xgboost_CHECK_SYS_CALL(
      getsockname(handle_, &sa, &sizeofsa), 0);
    if (sizeofsa < sizeof(uchar_t)*2) {
      return ret_iafamily(AF_INET);
    }
    return ret_iafamily(sa.sa_family);
 #endif   // __PASE__
 #else
    LOG(FATAL) << "Unknown platform.";
    return ret_iafamily(AF_INET);
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@
 #define XGBOOST_VER_MAJOR 1
 #define XGBOOST_VER_MINOR 7
-#define XGBOOST_VER_PATCH 0
+#define XGBOOST_VER_PATCH 2
 #endif  // XGBOOST_VERSION_CONFIG_H_
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@
    <groupId>ml.dmlc</groupId>
    <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>1.7.1</version>
+    <version>1.7.2</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
    <description>JVM Package for XGBoost</description>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.1</version>
+        <version>1.7.2</version>
    </parent>
    <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>1.7.1</version>
+    <version>1.7.2</version>
    <packaging>jar</packaging>
    <build>
        <plugins>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
-            <version>1.7.1</version>
+            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
-            <version>1.7.1</version>
+            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.1</version>
+        <version>1.7.2</version>
    </parent>
    <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>1.7.1</version>
+    <version>1.7.2</version>
    <build>
        <plugins>
            <plugin>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.7.1</version>
+            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.1</version>
+        <version>1.7.2</version>
    </parent>
    <artifactId>xgboost4j-gpu_2.12</artifactId>
-    <version>1.7.1</version>
+    <version>1.7.2</version>
    <packaging>jar</packaging>
    <dependencies>
--- a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu
+++ b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu
@@ -1,7 +1,7 @@
 #include <jni.h>
 #include <thrust/system/cuda/experimental/pinned_allocator.h>
 #include "../../../../src/common/device_helpers.cuh"
 #include "../../../../src/common/cuda_pinned_allocator.h"
 #include "../../../../src/data/array_interface.h"
 #include "jvm_utils.h"
 #include <xgboost/c_api.h>
@@ -131,7 +131,7 @@ class DataIteratorProxy {
  bool cache_on_host_{true}; // TODO(Bobby): Make this optional.
  template <typename T>
-  using Alloc = thrust::system::cuda::experimental::pinned_allocator<T>;
+  using Alloc = xgboost::common::cuda::pinned_allocator<T>;
  template <typename U>
  using HostVector = std::vector<U, Alloc<U>>;
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.1</version>
+        <version>1.7.2</version>
    </parent>
    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
-            <version>1.7.1</version>
+            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.1</version>
+        <version>1.7.2</version>
    </parent>
    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.7.1</version>
+            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.1</version>
+        <version>1.7.2</version>
    </parent>
    <artifactId>xgboost4j_2.12</artifactId>
-    <version>1.7.1</version>
+    <version>1.7.2</version>
    <packaging>jar</packaging>
    <dependencies>
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-1.7.1
+1.7.2
--- a/python-package/xgboost/compat.py
+++ b/python-package/xgboost/compat.py
@@ -43,6 +43,7 @@ except ImportError:
    pandas_concat = None
    PANDAS_INSTALLED = False
 # sklearn
 try:
    from sklearn.base import BaseEstimator as XGBModelBase
@@ -72,6 +73,22 @@ except ImportError:
    XGBStratifiedKFold = None
 _logger = logging.getLogger(__name__)
 def is_cudf_available() -> bool:
    """Check cuDF package available or not"""
    if importlib.util.find_spec("cudf") is None:
        return False
    try:
        import cudf
        return True
    except ImportError:
        _logger.exception("Importing cuDF failed, use DMatrix instead of QDM")
        return False
 class XGBoostLabelEncoder(LabelEncoder):
    """Label encoder with JSON serialization methods."""
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -853,7 +853,7 @@ async def _get_rabit_args(
        sched_addr = None
    # make sure all workers are online so that we can obtain reliable scheduler_info
-    client.wait_for_workers(n_workers)
+    await client.wait_for_workers(n_workers)  # type: ignore
    env = await client.run_on_scheduler(
        _start_tracker, n_workers, sched_addr, user_addr
    )
--- a/python-package/xgboost/spark/core.py
+++ b/python-package/xgboost/spark/core.py
@@ -1,7 +1,7 @@
 # type: ignore
 """Xgboost pyspark integration submodule for core code."""
 # pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
-# pylint: disable=too-few-public-methods, too-many-lines
+# pylint: disable=too-few-public-methods, too-many-lines, too-many-branches
 import json
 from typing import Iterator, Optional, Tuple
@@ -32,6 +32,7 @@ from pyspark.sql.types import (
    ShortType,
 )
 from scipy.special import expit, softmax  # pylint: disable=no-name-in-module
 from xgboost.compat import is_cudf_available
 from xgboost.core import Booster
 from xgboost.training import train as worker_train
@@ -728,6 +729,10 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
            else:
                dataset = dataset.repartition(num_workers)
        if self.isDefined(self.qid_col) and self.getOrDefault(self.qid_col):
            # XGBoost requires qid to be sorted for each partition
            dataset = dataset.sortWithinPartitions(alias.qid, ascending=True)
        train_params = self._get_distributed_train_params(dataset)
        booster_params, train_call_kwargs_params = self._get_xgb_train_call_args(
            train_params
@@ -755,7 +760,8 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
            k: v for k, v in train_call_kwargs_params.items() if v is not None
        }
        dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None}
-        use_qdm = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
+
        use_hist = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
        def _train_booster(pandas_df_iter):
            """Takes in an RDD partition and outputs a booster for that partition after
@@ -769,6 +775,15 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
            gpu_id = None
            # If cuDF is not installed, then using DMatrix instead of QDM,
            # because without cuDF, DMatrix performs better than QDM.
            # Note: Checking `is_cudf_available` in spark worker side because
            # spark worker might has different python environment with driver side.
            if use_gpu:
                use_qdm = use_hist and is_cudf_available()
            else:
                use_qdm = use_hist
            if use_qdm and (booster_params.get("max_bin", None) is not None):
                dmatrix_kwargs["max_bin"] = booster_params["max_bin"]
--- a/src/common/cuda_pinned_allocator.h
+++ b/src/common/cuda_pinned_allocator.h
@@ -0,0 +1,91 @@
 /*!
 * Copyright 2022 by XGBoost Contributors
 * \file common.h
 * \brief cuda pinned allocator for usage with thrust containers
 */
 #pragma once
 #include <cstddef>
 #include <limits>
 #include "common.h"
 namespace xgboost {
 namespace common {
 namespace cuda {
 // \p pinned_allocator is a CUDA-specific host memory allocator
 //  that employs \c cudaMallocHost for allocation.
 //
 // This implementation is ported from the experimental/pinned_allocator
 // that Thrust used to provide.
 //
 //  \see https://en.cppreference.com/w/cpp/memory/allocator
 template <typename T>
 class pinned_allocator;
 template <>
 class pinned_allocator<void> {
 public:
  using value_type      = void;            // NOLINT: The type of the elements in the allocator
  using pointer         = void*;           // NOLINT: The type returned by address() / allocate()
  using const_pointer   = const void*;     // NOLINT: The type returned by address()
  using size_type       = std::size_t;     // NOLINT: The type used for the size of the allocation
  using difference_type = std::ptrdiff_t;  // NOLINT: The type of the distance between two pointers
  template <typename U>
  struct rebind {                       // NOLINT
    using other = pinned_allocator<U>;  // NOLINT: The rebound type
  };
 };
 template <typename T>
 class pinned_allocator {
 public:
  using value_type      = T;               // NOLINT: The type of the elements in the allocator
  using pointer         = T*;              // NOLINT: The type returned by address() / allocate()
  using const_pointer   = const T*;        // NOLINT: The type returned by address()
  using reference       = T&;              // NOLINT: The parameter type for address()
  using const_reference = const T&;        // NOLINT: The parameter type for address()
  using size_type       = std::size_t;     // NOLINT: The type used for the size of the allocation
  using difference_type = std::ptrdiff_t;  // NOLINT: The type of the distance between two pointers
  template <typename U>
  struct rebind {                       // NOLINT
    using other = pinned_allocator<U>;  // NOLINT: The rebound type
  };
  XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions
  XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions
  XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions
  template <typename U>
  XGBOOST_DEVICE inline pinned_allocator(pinned_allocator<U> const&) {} // NOLINT
  XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT
  XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT
  inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT
    if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
    pointer result(nullptr);
    dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
    return result;
  }
  inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
  inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT
  XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; }
  XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const {
    return !operator==(x);
  }
 };
 }  // namespace cuda
 }  // namespace common
 }  // namespace xgboost
--- a/src/data/array_interface.h
+++ b/src/data/array_interface.h
@@ -101,7 +101,7 @@ class ArrayInterfaceHandler {
  template <typename PtrType>
  static PtrType GetPtrFromArrayData(Object::Map const &obj) {
    auto data_it = obj.find("data");
-    if (data_it == obj.cend()) {
+    if (data_it == obj.cend() || IsA<Null>(data_it->second)) {
      LOG(FATAL) << "Empty data passed in.";
    }
    auto p_data = reinterpret_cast<PtrType>(
@@ -111,7 +111,7 @@ class ArrayInterfaceHandler {
  static void Validate(Object::Map const &array) {
    auto version_it = array.find("version");
-    if (version_it == array.cend()) {
+    if (version_it == array.cend() || IsA<Null>(version_it->second)) {
      LOG(FATAL) << "Missing `version' field for array interface";
    }
    if (get<Integer const>(version_it->second) > 3) {
@@ -119,17 +119,19 @@ class ArrayInterfaceHandler {
    }
    auto typestr_it = array.find("typestr");
-    if (typestr_it == array.cend()) {
+    if (typestr_it == array.cend() || IsA<Null>(typestr_it->second)) {
      LOG(FATAL) << "Missing `typestr' field for array interface";
    }
    auto typestr = get<String const>(typestr_it->second);
    CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();
-    if (array.find("shape") == array.cend()) {
+    auto shape_it = array.find("shape");
    if (shape_it == array.cend() || IsA<Null>(shape_it->second)) {
      LOG(FATAL) << "Missing `shape' field for array interface";
    }
-    if (array.find("data") == array.cend()) {
+    auto data_it = array.find("data");
    if (data_it == array.cend() || IsA<Null>(data_it->second)) {
      LOG(FATAL) << "Missing `data' field for array interface";
    }
  }
@@ -139,8 +141,9 @@ class ArrayInterfaceHandler {
  static size_t ExtractMask(Object::Map const &column,
                            common::Span<RBitField8::value_type> *p_out) {
    auto &s_mask = *p_out;
-    if (column.find("mask") != column.cend()) {
+    auto const &mask_it = column.find("mask");
-      auto const &j_mask = get<Object const>(column.at("mask"));
+    if (mask_it != column.cend() && !IsA<Null>(mask_it->second)) {
      auto const &j_mask = get<Object const>(mask_it->second);
      Validate(j_mask);
      auto p_mask = GetPtrFromArrayData<RBitField8::value_type *>(j_mask);
@@ -173,8 +176,9 @@ class ArrayInterfaceHandler {
      // assume 1 byte alignment.
      size_t const span_size = RBitField8::ComputeStorageSize(n_bits);
-      if (j_mask.find("strides") != j_mask.cend()) {
+      auto strides_it = j_mask.find("strides");
-        auto strides = get<Array const>(column.at("strides"));
+      if (strides_it != j_mask.cend() && !IsA<Null>(strides_it->second)) {
        auto strides = get<Array const>(strides_it->second);
        CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1);
        CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous();
      }
@@ -401,7 +405,9 @@ class ArrayInterface {
                            << "XGBoost doesn't support internal broadcasting.";
      }
    } else {
-      CHECK(array.find("mask") == array.cend()) << "Masked array is not yet supported.";
+      auto mask_it = array.find("mask");
      CHECK(mask_it == array.cend() || IsA<Null>(mask_it->second))
          << "Masked array is not yet supported.";
    }
    auto stream_it = array.find("stream");
--- a/src/tree/gpu_hist/evaluate_splits.cuh
+++ b/src/tree/gpu_hist/evaluate_splits.cuh
@@ -3,10 +3,10 @@
 */
 #ifndef EVALUATE_SPLITS_CUH_
 #define EVALUATE_SPLITS_CUH_
 #include <thrust/system/cuda/experimental/pinned_allocator.h>
 #include <xgboost/span.h>
 #include "../../common/categorical.h"
 #include "../../common/cuda_pinned_allocator.h"
 #include "../split_evaluator.h"
 #include "../updater_gpu_common.cuh"
 #include "expand_entry.cuh"
@@ -57,7 +57,7 @@ struct CatAccessor {
 class GPUHistEvaluator {
  using CatST = common::CatBitField::value_type;  // categorical storage type
  // use pinned memory to stage the categories, used for sort based splits.
-  using Alloc = thrust::system::cuda::experimental::pinned_allocator<CatST>;
+  using Alloc = xgboost::common::cuda::pinned_allocator<CatST>;
 private:
  TreeEvaluator tree_evaluator_;
--- a/tests/ci_build/conda_env/cpp_test.yml
+++ b/tests/ci_build/conda_env/cpp_test.yml
@@ -0,0 +1,11 @@
 # conda environment for CPP test on Linux distributions
 name: cpp_test
 channels:
 - defaults
 - conda-forge
 dependencies:
 - cmake
 - ninja
 - c-compiler
 - cxx-compiler
 - gtest
--- a/tests/ci_build/conda_env/sdist_test.yml
+++ b/tests/ci_build/conda_env/sdist_test.yml
@@ -0,0 +1,13 @@
 # conda environment for source distribution test.
 name: sdist_test
 channels:
 - defaults
 - conda-forge
 dependencies:
 - python=3.8
 - pip
 - wheel
 - cmake
 - ninja
 - c-compiler
 - cxx-compiler
--- a/tests/cpp/data/test_array_interface.cc
+++ b/tests/cpp/data/test_array_interface.cc
@@ -33,9 +33,8 @@ TEST(ArrayInterface, Error) {
  Json column { Object() };
  std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
  column["shape"] = Array(j_shape);
-  std::vector<Json> j_data {
+  std::vector<Json> j_data{Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),
-    Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),
+                           Json(Boolean(false))};
        Json(Boolean(false))};
  auto const& column_obj = get<Object>(column);
  std::string typestr{"<f4"};
@@ -45,6 +44,10 @@ TEST(ArrayInterface, Error) {
  EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), dmlc::Error);
  column["version"] = 3;
  // missing data
  EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
               dmlc::Error);
  // null data
  column["data"] = Null{};
  EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
               dmlc::Error);
  column["data"] = j_data;
@@ -63,6 +66,11 @@ TEST(ArrayInterface, Error) {
      Json(Boolean(false))};
  column["data"] = j_data;
  EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n));
  // null data in mask
  column["mask"] = Object{};
  column["mask"]["data"] = Null{};
  common::Span<RBitField8::value_type> s_mask;
  EXPECT_THROW(ArrayInterfaceHandler::ExtractMask(column_obj, &s_mask), dmlc::Error);
 }
 TEST(ArrayInterface, GetElement) {
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -390,28 +390,6 @@ class XgboostLocalTest(SparkTestCase):
                "expected_prediction_with_base_margin",
            ],
        )
        self.ranker_df_train = self.session.createDataFrame(
            [
                (Vectors.dense(1.0, 2.0, 3.0), 0, 0),
                (Vectors.dense(4.0, 5.0, 6.0), 1, 0),
                (Vectors.dense(9.0, 4.0, 8.0), 2, 0),
                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
                (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
            ],
            ["features", "label", "qid"],
        )
        self.ranker_df_test = self.session.createDataFrame(
            [
                (Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988),
                (Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556),
                (Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570),
                (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988),
                (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612),
                (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826),
            ],
            ["features", "qid", "expected_prediction"],
        )
        self.reg_df_sparse_train = self.session.createDataFrame(
            [
@@ -1039,15 +1017,6 @@ class XgboostLocalTest(SparkTestCase):
        for row1, row2 in zip(pred_result, pred_result2):
            self.assertTrue(np.allclose(row1.probability, row2.probability, rtol=1e-3))
    def test_ranker(self):
        ranker = SparkXGBRanker(qid_col="qid")
        assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
        model = ranker.fit(self.ranker_df_train)
        pred_result = model.transform(self.ranker_df_test).collect()
        for row in pred_result:
            assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3)
    def test_empty_validation_data(self) -> None:
        for tree_method in [
            "hist",
@@ -1130,3 +1099,63 @@ class XgboostLocalTest(SparkTestCase):
    def test_unsupported_params(self):
        with pytest.raises(ValueError, match="evals_result"):
            SparkXGBClassifier(evals_result={})
 class XgboostRankerLocalTest(SparkTestCase):
    def setUp(self):
        self.session.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "8")
        self.ranker_df_train = self.session.createDataFrame(
            [
                (Vectors.dense(1.0, 2.0, 3.0), 0, 0),
                (Vectors.dense(4.0, 5.0, 6.0), 1, 0),
                (Vectors.dense(9.0, 4.0, 8.0), 2, 0),
                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
                (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
            ],
            ["features", "label", "qid"],
        )
        self.ranker_df_test = self.session.createDataFrame(
            [
                (Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988),
                (Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556),
                (Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570),
                (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988),
                (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612),
                (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826),
            ],
            ["features", "qid", "expected_prediction"],
        )
        self.ranker_df_train_1 = self.session.createDataFrame(
            [
                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9),
                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9),
                (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9),
                (Vectors.dense(1.0, 2.0, 3.0), 0, 8),
                (Vectors.dense(4.0, 5.0, 6.0), 1, 8),
                (Vectors.dense(9.0, 4.0, 8.0), 2, 8),
                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7),
                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7),
                (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7),
                (Vectors.dense(1.0, 2.0, 3.0), 0, 6),
                (Vectors.dense(4.0, 5.0, 6.0), 1, 6),
                (Vectors.dense(9.0, 4.0, 8.0), 2, 6),
            ]
            * 4,
            ["features", "label", "qid"],
        )
    def test_ranker(self):
        ranker = SparkXGBRanker(qid_col="qid")
        assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
        model = ranker.fit(self.ranker_df_train)
        pred_result = model.transform(self.ranker_df_test).collect()
        for row in pred_result:
            assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3)
    def test_ranker_qid_sorted(self):
        ranker = SparkXGBRanker(qid_col="qid", num_workers=4)
        assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
        model = ranker.fit(self.ranker_df_train_1)
        model.transform(self.ranker_df_test).collect()
Author	SHA1	Message	Date
Jiaming Yuan	62ed8b5fef	Bump release version to 1.7.2. (#8569 )	2022-12-08 21:46:26 +08:00
Jiaming Yuan	a980e10744	Properly await async method client.wait_for_workers (#8558 ) (#8567 ) * Properly await async method client.wait_for_workers * ignore mypy error. Co-authored-by: jiamingy <jm.yuan@outlook.com> Co-authored-by: Matthew Rocklin <mrocklin@gmail.com>	2022-12-07 23:25:05 +08:00
Jiaming Yuan	59c54e361b	[pyspark] Make QDM optional based on cuDF check (#8471 ) (#8556 ) Co-authored-by: WeichenXu <weichen.xu@databricks.com>	2022-12-07 03:19:35 +08:00
Jiaming Yuan	60a8c8ebba	[pyspark] sort qid for SparkRanker (#8497 ) (#8555 ) * [pyspark] sort qid for SparkRandker * resolve comments Co-authored-by: Bobby Wang <wbo4958@gmail.com>	2022-12-07 02:07:37 +08:00
Jiaming Yuan	58bc225657	[backport] [CI] Fix github action mismatched glibcxx. (#8551 ) (#8552 ) Split up the Linux test to use the toolchain from conda forge.	2022-12-06 21:35:26 +08:00
Jiaming Yuan	850b53100f	[backport] [doc] Fix outdated document [skip ci] (#8527 ) (#8553 ) * [doc] Fix document around categorical parameters. [skip ci] * note on validate parameter [skip ci] * Fix dask doc as well [skip ci]	2022-12-06 18:21:14 +08:00
Philip Hyunsu Cho	67b657dad0	SO_DOMAIN do not support on IBM i, using getsockname instead (#8437 ) (#8500 )	2022-11-30 11:47:59 -08:00
Philip Hyunsu Cho	db14e3feb7	Support null value in CUDA array interface. (#8486 ) (#8499 )	2022-11-30 11:44:54 -08:00
Robert Maynard	9372370dda	Work with newer thrust and libcudacxx (#8432 ) * Thrust 1.17 removes the experimental/pinned_allocator. When xgboost is brought into a large project it can be compiled against Thrust 1.17+ which don't offer this experimental allocator. To ensure that going forward xgboost works in all environments we provide a xgboost namespaced version of the pinned_allocator that previously was in Thrust. * Update gputreeshap to work with libcudacxx 1.9	2022-11-11 01:15:25 +08:00
Jiaming Yuan	1136a7e0c3	Fix CRAN note on cleanup. (#8447 )	2022-11-09 14:22:54 +08:00
Jiaming Yuan	a347cd512b	[backport] [R] Fix CRAN test notes. (#8428 ) (#8440 ) - Limit the number of used CPU cores in examples. - Add a note for the constraint. - Bring back the cleanup script.	2022-11-09 07:12:46 +08:00
Jiaming Yuan	9ff0c0832a	Fix 1.7.1 version file. (#8427 )	2022-11-06 03:19:54 +08:00