Compare commits

..

12 Commits

Author SHA1 Message Date
Jiaming Yuan
62ed8b5fef Bump release version to 1.7.2. (#8569) 2022-12-08 21:46:26 +08:00
Jiaming Yuan
a980e10744 Properly await async method client.wait_for_workers (#8558) (#8567)
* Properly await async method client.wait_for_workers

* ignore mypy error.

Co-authored-by: jiamingy <jm.yuan@outlook.com>

Co-authored-by: Matthew Rocklin <mrocklin@gmail.com>
2022-12-07 23:25:05 +08:00
Jiaming Yuan
59c54e361b [pyspark] Make QDM optional based on cuDF check (#8471) (#8556)
Co-authored-by: WeichenXu <weichen.xu@databricks.com>
2022-12-07 03:19:35 +08:00
Jiaming Yuan
60a8c8ebba [pyspark] sort qid for SparkRanker (#8497) (#8555)
* [pyspark] sort qid for SparkRandker

* resolve comments

Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2022-12-07 02:07:37 +08:00
Jiaming Yuan
58bc225657 [backport] [CI] Fix github action mismatched glibcxx. (#8551) (#8552)
Split up the Linux test to use the toolchain from conda forge.
2022-12-06 21:35:26 +08:00
Jiaming Yuan
850b53100f [backport] [doc] Fix outdated document [skip ci] (#8527) (#8553)
* [doc] Fix document around categorical parameters. [skip ci]

* note on validate parameter [skip ci]

* Fix dask doc as well [skip ci]
2022-12-06 18:21:14 +08:00
Philip Hyunsu Cho
67b657dad0 SO_DOMAIN do not support on IBM i, using getsockname instead (#8437) (#8500) 2022-11-30 11:47:59 -08:00
Philip Hyunsu Cho
db14e3feb7 Support null value in CUDA array interface. (#8486) (#8499) 2022-11-30 11:44:54 -08:00
Robert Maynard
9372370dda Work with newer thrust and libcudacxx (#8432)
* Thrust 1.17 removes the experimental/pinned_allocator.

When xgboost is brought into a large project it can
be compiled against Thrust 1.17+ which don't offer
this experimental allocator.

To ensure that going forward xgboost works in all environments we provide a xgboost namespaced version of
the pinned_allocator that previously was in Thrust.

* Update gputreeshap to work with libcudacxx 1.9
2022-11-11 01:15:25 +08:00
Jiaming Yuan
1136a7e0c3 Fix CRAN note on cleanup. (#8447) 2022-11-09 14:22:54 +08:00
Jiaming Yuan
a347cd512b [backport] [R] Fix CRAN test notes. (#8428) (#8440)
- Limit the number of used CPU cores in examples.
- Add a note for the constraint.
- Bring back the cleanup script.
2022-11-09 07:12:46 +08:00
Jiaming Yuan
9ff0c0832a Fix 1.7.1 version file. (#8427) 2022-11-06 03:19:54 +08:00
52 changed files with 467 additions and 212 deletions

View File

@@ -75,19 +75,18 @@ jobs:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
with: with:
submodules: 'true' submodules: 'true'
- name: Install system packages - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
run: |
sudo apt-get install -y --no-install-recommends ninja-build
- uses: conda-incubator/setup-miniconda@v2
with: with:
auto-update-conda: true cache-downloads: true
python-version: ${{ matrix.python-version }} cache-env: true
activate-environment: test environment-name: cpp_test
environment-file: tests/ci_build/conda_env/cpp_test.yml
- name: Display Conda env - name: Display Conda env
shell: bash -l {0} shell: bash -l {0}
run: | run: |
conda info conda info
conda list conda list
- name: Build and install XGBoost static library - name: Build and install XGBoost static library
shell: bash -l {0} shell: bash -l {0}
run: | run: |
@@ -109,6 +108,7 @@ jobs:
cd .. cd ..
rm -rf ./build rm -rf ./build
popd popd
- name: Build and install XGBoost shared library - name: Build and install XGBoost shared library
shell: bash -l {0} shell: bash -l {0}
run: | run: |

View File

@@ -41,12 +41,46 @@ jobs:
run: | run: |
python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1 python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1
python-sdist-test: python-sdist-test-on-Linux:
# Mismatched glibcxx version between system and conda forge.
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
name: Test installing XGBoost Python source package on ${{ matrix.os }} name: Test installing XGBoost Python source package on ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ubuntu-latest, macos-11, windows-latest] os: [ubuntu-latest]
steps:
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
with:
submodules: 'true'
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
with:
cache-downloads: true
cache-env: false
environment-name: sdist_test
environment-file: tests/ci_build/conda_env/sdist_test.yml
- name: Display Conda env
shell: bash -l {0}
run: |
conda info
conda list
- name: Build and install XGBoost
shell: bash -l {0}
run: |
cd python-package
python --version
python setup.py sdist
pip install -v ./dist/xgboost-*.tar.gz
cd ..
python -c 'import xgboost'
python-sdist-test:
# Use system toolchain instead of conda toolchain for macos and windows.
# MacOS has linker error if clang++ from conda-forge is used
runs-on: ${{ matrix.os }}
name: Test installing XGBoost Python source package on ${{ matrix.os }}
strategy:
matrix:
os: [macos-11, windows-latest]
python-version: ["3.8"] python-version: ["3.8"]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@@ -56,11 +90,7 @@ jobs:
if: matrix.os == 'macos-11' if: matrix.os == 'macos-11'
run: | run: |
brew install ninja libomp brew install ninja libomp
- name: Install Ubuntu system dependencies - uses: conda-incubator/setup-miniconda@35d1405e78aa3f784fe3ce9a2eb378d5eeb62169 # v2.1.1
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt-get install -y --no-install-recommends ninja-build
- uses: conda-incubator/setup-miniconda@v2
with: with:
auto-update-conda: true auto-update-conda: true
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
@@ -80,6 +110,58 @@ jobs:
cd .. cd ..
python -c 'import xgboost' python -c 'import xgboost'
python-tests-on-macos:
name: Test XGBoost Python package on ${{ matrix.config.os }}
runs-on: ${{ matrix.config.os }}
timeout-minutes: 60
strategy:
matrix:
config:
- {os: macos-11}
steps:
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
with:
submodules: 'true'
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
with:
cache-downloads: true
cache-env: false
environment-name: macos_test
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
- name: Display Conda env
shell: bash -l {0}
run: |
conda info
conda list
- name: Build XGBoost on macos
shell: bash -l {0}
run: |
brew install ninja
mkdir build
cd build
# Set prefix, to use OpenMP library from Conda env
# See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
# to learn why we don't use libomp from Homebrew.
cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
ninja
- name: Install Python package
shell: bash -l {0}
run: |
cd python-package
python --version
python setup.py install
- name: Test Python package
shell: bash -l {0}
run: |
pytest -s -v -rxXs --durations=0 ./tests/python
python-tests-on-win: python-tests-on-win:
name: Test XGBoost Python package on ${{ matrix.config.os }} name: Test XGBoost Python package on ${{ matrix.config.os }}
runs-on: ${{ matrix.config.os }} runs-on: ${{ matrix.config.os }}
@@ -125,56 +207,4 @@ jobs:
- name: Test Python package - name: Test Python package
shell: bash -l {0} shell: bash -l {0}
run: | run: |
pytest -s -v ./tests/python pytest -s -v -rxXs --durations=0 ./tests/python
python-tests-on-macos:
name: Test XGBoost Python package on ${{ matrix.config.os }}
runs-on: ${{ matrix.config.os }}
timeout-minutes: 90
strategy:
matrix:
config:
- {os: macos-11, python-version "3.8" }
steps:
- uses: actions/checkout@v2
with:
submodules: 'true'
- uses: conda-incubator/setup-miniconda@v2
with:
auto-update-conda: true
python-version: ${{ matrix.config.python-version }}
activate-environment: macos_test
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
- name: Display Conda env
shell: bash -l {0}
run: |
conda info
conda list
- name: Build XGBoost on macos
shell: bash -l {0}
run: |
brew install ninja
mkdir build
cd build
# Set prefix, to use OpenMP library from Conda env
# See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
# to learn why we don't use libomp from Homebrew.
cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
ninja
- name: Install Python package
shell: bash -l {0}
run: |
cd python-package
python --version
python setup.py install
- name: Test Python package
shell: bash -l {0}
run: |
pytest -s -v ./tests/python

View File

@@ -5,6 +5,7 @@ on: [push, pull_request]
env: env:
R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic') R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
permissions: permissions:
contents: read # to fetch code (actions/checkout) contents: read # to fetch code (actions/checkout)
@@ -68,6 +69,7 @@ jobs:
- {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'} - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'}
env: env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
RSPM: ${{ matrix.config.rspm }} RSPM: ${{ matrix.config.rspm }}
steps: steps:
@@ -121,6 +123,10 @@ jobs:
config: config:
- {r: 'release'} - {r: 'release'}
env:
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
MAKE: "make -j$(nproc)"
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
with: with:

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR) cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 1.7.1) project(xgboost LANGUAGES CXX C VERSION 1.7.2)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0022 NEW)

View File

@@ -126,7 +126,6 @@ Rpack: clean_all
cat R-package/src/Makevars.in|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.in cat R-package/src/Makevars.in|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.in
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it
rm -f xgboost/cleanup
bash R-package/remove_warning_suppression_pragma.sh bash R-package/remove_warning_suppression_pragma.sh
bash xgboost/remove_warning_suppression_pragma.sh bash xgboost/remove_warning_suppression_pragma.sh
rm xgboost/remove_warning_suppression_pragma.sh rm xgboost/remove_warning_suppression_pragma.sh

View File

@@ -1,8 +1,8 @@
Package: xgboost Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 1.7.1.1 Version: 1.7.2.1
Date: 2022-11-03 Date: 2022-12-08
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),
@@ -66,5 +66,5 @@ Imports:
methods, methods,
data.table (>= 1.9.6), data.table (>= 1.9.6),
jsonlite (>= 1.0), jsonlite (>= 1.0),
RoxygenNote: 7.1.1 RoxygenNote: 7.2.1
SystemRequirements: GNU make, C++14 SystemRequirements: GNU make, C++14

View File

@@ -544,9 +544,11 @@ cb.cv.predict <- function(save_models = FALSE) {
#' #'
#' @return #' @return
#' Results are stored in the \code{coefs} element of the closure. #' Results are stored in the \code{coefs} element of the closure.
#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it. #' The \code{\link{xgb.gblinear.history}} convenience function provides an easy
#' way to access it.
#' With \code{xgb.train}, it is either a dense of a sparse matrix. #' With \code{xgb.train}, it is either a dense of a sparse matrix.
#' While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices. #' While with \code{xgb.cv}, it is a list (an element per each fold) of such
#' matrices.
#' #'
#' @seealso #' @seealso
#' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}. #' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
@@ -558,7 +560,7 @@ cb.cv.predict <- function(save_models = FALSE) {
#' # without considering the 2nd order interactions: #' # without considering the 2nd order interactions:
#' x <- model.matrix(Species ~ .^2, iris)[,-1] #' x <- model.matrix(Species ~ .^2, iris)[,-1]
#' colnames(x) #' colnames(x)
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor")) #' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc", #' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
#' lambda = 0.0003, alpha = 0.0003, nthread = 2) #' lambda = 0.0003, alpha = 0.0003, nthread = 2)
#' # For 'shotgun', which is a default linear updater, using high eta values may result in #' # For 'shotgun', which is a default linear updater, using high eta values may result in
@@ -590,7 +592,7 @@ cb.cv.predict <- function(save_models = FALSE) {
#' #'
#' #### Multiclass classification: #' #### Multiclass classification:
#' # #' #
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1) #' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3, #' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
#' lambda = 0.0003, alpha = 0.0003, nthread = 2) #' lambda = 0.0003, alpha = 0.0003, nthread = 2)
#' # For the default linear updater 'shotgun' it sometimes is helpful #' # For the default linear updater 'shotgun' it sometimes is helpful

View File

@@ -18,7 +18,7 @@
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data') #' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
@@ -110,7 +110,7 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL, nth
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' train <- agaricus.train #' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label) #' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
#' #'
#' stopifnot(nrow(dtrain) == nrow(train$data)) #' stopifnot(nrow(dtrain) == nrow(train$data))
#' stopifnot(ncol(dtrain) == ncol(train$data)) #' stopifnot(ncol(dtrain) == ncol(train$data))
@@ -138,7 +138,7 @@ dim.xgb.DMatrix <- function(x) {
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' train <- agaricus.train #' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label) #' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
#' dimnames(dtrain) #' dimnames(dtrain)
#' colnames(dtrain) #' colnames(dtrain)
#' colnames(dtrain) <- make.names(1:ncol(train$data)) #' colnames(dtrain) <- make.names(1:ncol(train$data))
@@ -193,7 +193,7 @@ dimnames.xgb.DMatrix <- function(x) {
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' #'
#' labels <- getinfo(dtrain, 'label') #' labels <- getinfo(dtrain, 'label')
#' setinfo(dtrain, 'label', 1-labels) #' setinfo(dtrain, 'label', 1-labels)
@@ -249,7 +249,7 @@ getinfo.xgb.DMatrix <- function(object, name, ...) {
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' #'
#' labels <- getinfo(dtrain, 'label') #' labels <- getinfo(dtrain, 'label')
#' setinfo(dtrain, 'label', 1-labels) #' setinfo(dtrain, 'label', 1-labels)
@@ -345,7 +345,7 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) {
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' #'
#' dsub <- slice(dtrain, 1:42) #' dsub <- slice(dtrain, 1:42)
#' labels1 <- getinfo(dsub, 'label') #' labels1 <- getinfo(dsub, 'label')
@@ -401,7 +401,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' #'
#' dtrain #' dtrain
#' print(dtrain, verbose=TRUE) #' print(dtrain, verbose=TRUE)

View File

@@ -7,7 +7,7 @@
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data') #' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')

View File

@@ -48,8 +48,8 @@
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost') #' data(agaricus.test, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label)) #' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
#' #'
#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic') #' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
#' nrounds = 4 #' nrounds = 4
@@ -65,8 +65,12 @@
#' new.features.test <- xgb.create.features(model = bst, agaricus.test$data) #' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
#' #'
#' # learning with new features #' # learning with new features
#' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label) #' new.dtrain <- xgb.DMatrix(
#' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label) #' data = new.features.train, label = agaricus.train$label, nthread = 2
#' )
#' new.dtest <- xgb.DMatrix(
#' data = new.features.test, label = agaricus.test$label, nthread = 2
#' )
#' watchlist <- list(train = new.dtrain) #' watchlist <- list(train = new.dtrain)
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2) #' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
#' #'
@@ -79,7 +83,7 @@
#' accuracy.after, "!\n")) #' accuracy.after, "!\n"))
#' #'
#' @export #' @export
xgb.create.features <- function(model, data, ...){ xgb.create.features <- function(model, data, ...) {
check.deprecation(...) check.deprecation(...)
pred_with_leaf <- predict(model, data, predleaf = TRUE) pred_with_leaf <- predict(model, data, predleaf = TRUE)
cols <- lapply(as.data.frame(pred_with_leaf), factor) cols <- lapply(as.data.frame(pred_with_leaf), factor)

View File

@@ -110,7 +110,7 @@
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"), #' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
#' max_depth = 3, eta = 1, objective = "binary:logistic") #' max_depth = 3, eta = 1, objective = "binary:logistic")
#' print(cv) #' print(cv)
@@ -192,7 +192,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# create the booster-folds # create the booster-folds
# train_folds # train_folds
dall <- xgb.get.DMatrix(data, label, missing) dall <- xgb.get.DMatrix(data, label, missing, nthread = params$nthread)
bst_folds <- lapply(seq_along(folds), function(k) { bst_folds <- lapply(seq_along(folds), function(k) {
dtest <- slice(dall, folds[[k]]) dtest <- slice(dall, folds[[k]])
# code originally contributed by @RolandASc on stackoverflow # code originally contributed by @RolandASc on stackoverflow

View File

@@ -192,8 +192,8 @@
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost') #' data(agaricus.test, package='xgboost')
#' #'
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label)) #' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
#' watchlist <- list(train = dtrain, eval = dtest) #' watchlist <- list(train = dtrain, eval = dtest)
#' #'
#' ## A simple xgb.train example: #' ## A simple xgb.train example:

18
R-package/configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for xgboost 1.7.1. # Generated by GNU Autoconf 2.69 for xgboost 1.7.2.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -576,8 +576,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='xgboost' PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost' PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='1.7.1' PACKAGE_VERSION='1.7.2'
PACKAGE_STRING='xgboost 1.7.1' PACKAGE_STRING='xgboost 1.7.2'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures xgboost 1.7.1 to adapt to many kinds of systems. \`configure' configures xgboost 1.7.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1257,7 +1257,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 1.7.1:";; short | recursive ) echo "Configuration of xgboost 1.7.2:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1336,7 +1336,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
xgboost configure 1.7.1 xgboost configure 1.7.2
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by xgboost $as_me 1.7.1, which was It was created by xgboost $as_me 1.7.2, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by xgboost $as_me 1.7.1, which was This file was extended by xgboost $as_me 1.7.2, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -3347,7 +3347,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
xgboost config.status 1.7.1 xgboost config.status 1.7.2
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -2,7 +2,7 @@
AC_PREREQ(2.69) AC_PREREQ(2.69)
AC_INIT([xgboost],[1.7.1],[],[xgboost],[]) AC_INIT([xgboost],[1.7.2],[],[xgboost],[])
# Use this line to set CC variable to a C compiler # Use this line to set CC variable to a C compiler
AC_PROG_CC AC_PROG_CC

View File

@@ -15,9 +15,11 @@ selected per iteration.}
} }
\value{ \value{
Results are stored in the \code{coefs} element of the closure. Results are stored in the \code{coefs} element of the closure.
The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it. The \code{\link{xgb.gblinear.history}} convenience function provides an easy
way to access it.
With \code{xgb.train}, it is either a dense of a sparse matrix. With \code{xgb.train}, it is either a dense of a sparse matrix.
While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices. While with \code{xgb.cv}, it is a list (an element per each fold) of such
matrices.
} }
\description{ \description{
Callback closure for collecting the model coefficients history of a gblinear booster Callback closure for collecting the model coefficients history of a gblinear booster
@@ -38,7 +40,7 @@ Callback function expects the following values to be set in its calling frame:
# without considering the 2nd order interactions: # without considering the 2nd order interactions:
x <- model.matrix(Species ~ .^2, iris)[,-1] x <- model.matrix(Species ~ .^2, iris)[,-1]
colnames(x) colnames(x)
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor")) dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc", param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
lambda = 0.0003, alpha = 0.0003, nthread = 2) lambda = 0.0003, alpha = 0.0003, nthread = 2)
# For 'shotgun', which is a default linear updater, using high eta values may result in # For 'shotgun', which is a default linear updater, using high eta values may result in
@@ -70,7 +72,7 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
#### Multiclass classification: #### Multiclass classification:
# #
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1) dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3, param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
lambda = 0.0003, alpha = 0.0003, nthread = 2) lambda = 0.0003, alpha = 0.0003, nthread = 2)
# For the default linear updater 'shotgun' it sometimes is helpful # For the default linear updater 'shotgun' it sometimes is helpful

View File

@@ -19,7 +19,7 @@ be directly used with an \code{xgb.DMatrix} object.
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
train <- agaricus.train train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label) dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
stopifnot(nrow(dtrain) == nrow(train$data)) stopifnot(nrow(dtrain) == nrow(train$data))
stopifnot(ncol(dtrain) == ncol(train$data)) stopifnot(ncol(dtrain) == ncol(train$data))

View File

@@ -26,7 +26,7 @@ Since row names are irrelevant, it is recommended to use \code{colnames} directl
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
train <- agaricus.train train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label) dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
dimnames(dtrain) dimnames(dtrain)
colnames(dtrain) colnames(dtrain)
colnames(dtrain) <- make.names(1:ncol(train$data)) colnames(dtrain) <- make.names(1:ncol(train$data))

View File

@@ -34,7 +34,7 @@ The \code{name} field can be one of the following:
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
labels <- getinfo(dtrain, 'label') labels <- getinfo(dtrain, 'label')
setinfo(dtrain, 'label', 1-labels) setinfo(dtrain, 'label', 1-labels)

View File

@@ -19,7 +19,7 @@ Currently it displays dimensions and presence of info-fields and colnames.
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
dtrain dtrain
print(dtrain, verbose=TRUE) print(dtrain, verbose=TRUE)

View File

@@ -33,7 +33,7 @@ The \code{name} field can be one of the following:
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
labels <- getinfo(dtrain, 'label') labels <- getinfo(dtrain, 'label')
setinfo(dtrain, 'label', 1-labels) setinfo(dtrain, 'label', 1-labels)

View File

@@ -28,7 +28,7 @@ original xgb.DMatrix object
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
dsub <- slice(dtrain, 1:42) dsub <- slice(dtrain, 1:42)
labels1 <- getinfo(dsub, 'label') labels1 <- getinfo(dsub, 'label')

View File

@@ -38,7 +38,7 @@ Supported input file formats are either a LIBSVM text file or a binary file that
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
dtrain <- xgb.DMatrix('xgb.DMatrix.data') dtrain <- xgb.DMatrix('xgb.DMatrix.data')
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data') if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')

View File

@@ -16,7 +16,7 @@ Save xgb.DMatrix object to binary file
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
dtrain <- xgb.DMatrix('xgb.DMatrix.data') dtrain <- xgb.DMatrix('xgb.DMatrix.data')
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data') if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')

View File

@@ -59,8 +59,8 @@ a rule on certain features."
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost') data(agaricus.test, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label)) dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic') param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
nrounds = 4 nrounds = 4
@@ -76,8 +76,12 @@ new.features.train <- xgb.create.features(model = bst, agaricus.train$data)
new.features.test <- xgb.create.features(model = bst, agaricus.test$data) new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
# learning with new features # learning with new features
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label) new.dtrain <- xgb.DMatrix(
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label) data = new.features.train, label = agaricus.train$label, nthread = 2
)
new.dtest <- xgb.DMatrix(
data = new.features.test, label = agaricus.test$label, nthread = 2
)
watchlist <- list(train = new.dtrain) watchlist <- list(train = new.dtrain)
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2) bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)

View File

@@ -158,7 +158,7 @@ Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"), cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
max_depth = 3, eta = 1, objective = "binary:logistic") max_depth = 3, eta = 1, objective = "binary:logistic")
print(cv) print(cv)

View File

@@ -241,8 +241,8 @@ The following callbacks are automatically created when certain parameters are se
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost') data(agaricus.test, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label)) dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
watchlist <- list(train = dtrain, eval = dtest) watchlist <- list(train = dtrain, eval = dtest)
## A simple xgb.train example: ## A simple xgb.train example:

View File

@@ -4,7 +4,7 @@ XGBoost Release Policy
======================= =======================
Versioning Policy Versioning Policy
--------------------------- -----------------
Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE] Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE]
@@ -34,6 +34,20 @@ Making a Release
+ The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__. + The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__.
Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
+ The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_. + The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.
R CRAN Package
--------------
Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
According to the `CRAN policy <https://cran.r-project.org/web/packages/policies.html>`__:
If running a package uses multiple threads/cores it must never use more than two simultaneously: the check farm is a shared resource and will typically be running many checks simultaneously.
We need to check the number of CPUs used in examples. Export ``_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_=2.5`` before running ``R CMD check --as-cran`` `[1] <#references>`__ and make sure the machine you are using has enough CPU cores to reveal any potential policy violation.
References
----------
[1] https://stat.ethz.ch/pipermail/r-package-devel/2022q4/008610.html

View File

@@ -44,8 +44,7 @@ General Parameters
* ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface] * ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface]
- When set to True, XGBoost will perform validation of input parameters to check whether - When set to True, XGBoost will perform validation of input parameters to check whether
a parameter is used or not. The feature is still experimental. It's expected to have a parameter is used or not.
some false positives.
* ``nthread`` [default to maximum number of threads available if not set] * ``nthread`` [default to maximum number of threads available if not set]
@@ -233,24 +232,21 @@ Parameters for Categorical Feature
These parameters are only used for training with categorical data. See These parameters are only used for training with categorical data. See
:doc:`/tutorials/categorical` for more information. :doc:`/tutorials/categorical` for more information.
.. note:: These parameters are experimental. ``exact`` tree method is not yet supported.
* ``max_cat_to_onehot`` * ``max_cat_to_onehot``
.. versionadded:: 1.6.0 .. versionadded:: 1.6.0
.. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
- A threshold for deciding whether XGBoost should use one-hot encoding based split for - A threshold for deciding whether XGBoost should use one-hot encoding based split for
categorical data. When number of categories is lesser than the threshold then one-hot categorical data. When number of categories is lesser than the threshold then one-hot
encoding is chosen, otherwise the categories will be partitioned into children nodes. encoding is chosen, otherwise the categories will be partitioned into children nodes.
Only relevant for regression and binary classification. Also, ``exact`` tree method is
not supported
* ``max_cat_threshold`` * ``max_cat_threshold``
.. versionadded:: 1.7.0 .. versionadded:: 1.7.0
.. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
- Maximum number of categories considered for each split. Used only by partition-based - Maximum number of categories considered for each split. Used only by partition-based
splits for preventing over-fitting. splits for preventing over-fitting.

View File

@@ -25,9 +25,6 @@ Core Data Structure
.. autoclass:: xgboost.QuantileDMatrix .. autoclass:: xgboost.QuantileDMatrix
:show-inheritance: :show-inheritance:
.. autoclass:: xgboost.DeviceQuantileDMatrix
:show-inheritance:
.. autoclass:: xgboost.Booster .. autoclass:: xgboost.Booster
:members: :members:
:show-inheritance: :show-inheritance:
@@ -115,7 +112,7 @@ Dask API
:inherited-members: :inherited-members:
:show-inheritance: :show-inheritance:
.. autoclass:: xgboost.dask.DaskDeviceQuantileDMatrix .. autoclass:: xgboost.dask.DaskQuantileDMatrix
:members: :members:
:inherited-members: :inherited-members:
:show-inheritance: :show-inheritance:

View File

@@ -564,7 +564,7 @@ Here are some pratices on reducing memory usage with dask and xgboost.
nice summary. nice summary.
- When using GPU input, like dataframe loaded by ``dask_cudf``, you can try - When using GPU input, like dataframe loaded by ``dask_cudf``, you can try
:py:class:`xgboost.dask.DaskDeviceQuantileDMatrix` as a drop in replacement for ``DaskDMatrix`` :py:class:`xgboost.dask.DaskQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``
to reduce overall memory usage. See to reduce overall memory usage. See
:ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example. :ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example.

View File

@@ -287,11 +287,22 @@ class TCPSocket {
#elif defined(__APPLE__) #elif defined(__APPLE__)
return domain_; return domain_;
#elif defined(__unix__) #elif defined(__unix__)
#ifndef __PASE__
std::int32_t domain; std::int32_t domain;
socklen_t len = sizeof(domain); socklen_t len = sizeof(domain);
xgboost_CHECK_SYS_CALL( xgboost_CHECK_SYS_CALL(
getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast<char *>(&domain), &len), 0); getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast<char *>(&domain), &len), 0);
return ret_iafamily(domain); return ret_iafamily(domain);
#else
struct sockaddr sa;
socklen_t sizeofsa = sizeof(sa);
xgboost_CHECK_SYS_CALL(
getsockname(handle_, &sa, &sizeofsa), 0);
if (sizeofsa < sizeof(uchar_t)*2) {
return ret_iafamily(AF_INET);
}
return ret_iafamily(sa.sa_family);
#endif // __PASE__
#else #else
LOG(FATAL) << "Unknown platform."; LOG(FATAL) << "Unknown platform.";
return ret_iafamily(AF_INET); return ret_iafamily(AF_INET);

View File

@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MAJOR 1
#define XGBOOST_VER_MINOR 7 #define XGBOOST_VER_MINOR 7
#define XGBOOST_VER_PATCH 0 #define XGBOOST_VER_PATCH 2
#endif // XGBOOST_VERSION_CONFIG_H_ #endif // XGBOOST_VERSION_CONFIG_H_

View File

@@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</parent> </parent>
<artifactId>xgboost4j-example_2.12</artifactId> <artifactId>xgboost4j-example_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</parent> </parent>
<artifactId>xgboost4j-flink_2.12</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</parent> </parent>
<artifactId>xgboost4j-gpu_2.12</artifactId> <artifactId>xgboost4j-gpu_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@@ -1,7 +1,7 @@
#include <jni.h> #include <jni.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
#include "../../../../src/common/device_helpers.cuh" #include "../../../../src/common/device_helpers.cuh"
#include "../../../../src/common/cuda_pinned_allocator.h"
#include "../../../../src/data/array_interface.h" #include "../../../../src/data/array_interface.h"
#include "jvm_utils.h" #include "jvm_utils.h"
#include <xgboost/c_api.h> #include <xgboost/c_api.h>
@@ -131,7 +131,7 @@ class DataIteratorProxy {
bool cache_on_host_{true}; // TODO(Bobby): Make this optional. bool cache_on_host_{true}; // TODO(Bobby): Make this optional.
template <typename T> template <typename T>
using Alloc = thrust::system::cuda::experimental::pinned_allocator<T>; using Alloc = xgboost::common::cuda::pinned_allocator<T>;
template <typename U> template <typename U>
using HostVector = std::vector<U, Alloc<U>>; using HostVector = std::vector<U, Alloc<U>>;

View File

@@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</parent> </parent>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId> <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build> <build>
@@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</parent> </parent>
<artifactId>xgboost4j-spark_2.12</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<build> <build>
@@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
</parent> </parent>
<artifactId>xgboost4j_2.12</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>1.7.1</version> <version>1.7.2</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@@ -1 +1 @@
1.7.1 1.7.2

View File

@@ -43,6 +43,7 @@ except ImportError:
pandas_concat = None pandas_concat = None
PANDAS_INSTALLED = False PANDAS_INSTALLED = False
# sklearn # sklearn
try: try:
from sklearn.base import BaseEstimator as XGBModelBase from sklearn.base import BaseEstimator as XGBModelBase
@@ -72,6 +73,22 @@ except ImportError:
XGBStratifiedKFold = None XGBStratifiedKFold = None
_logger = logging.getLogger(__name__)
def is_cudf_available() -> bool:
"""Check cuDF package available or not"""
if importlib.util.find_spec("cudf") is None:
return False
try:
import cudf
return True
except ImportError:
_logger.exception("Importing cuDF failed, use DMatrix instead of QDM")
return False
class XGBoostLabelEncoder(LabelEncoder): class XGBoostLabelEncoder(LabelEncoder):
"""Label encoder with JSON serialization methods.""" """Label encoder with JSON serialization methods."""

View File

@@ -853,7 +853,7 @@ async def _get_rabit_args(
sched_addr = None sched_addr = None
# make sure all workers are online so that we can obtain reliable scheduler_info # make sure all workers are online so that we can obtain reliable scheduler_info
client.wait_for_workers(n_workers) await client.wait_for_workers(n_workers) # type: ignore
env = await client.run_on_scheduler( env = await client.run_on_scheduler(
_start_tracker, n_workers, sched_addr, user_addr _start_tracker, n_workers, sched_addr, user_addr
) )

View File

@@ -1,7 +1,7 @@
# type: ignore # type: ignore
"""Xgboost pyspark integration submodule for core code.""" """Xgboost pyspark integration submodule for core code."""
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name # pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
# pylint: disable=too-few-public-methods, too-many-lines # pylint: disable=too-few-public-methods, too-many-lines, too-many-branches
import json import json
from typing import Iterator, Optional, Tuple from typing import Iterator, Optional, Tuple
@@ -32,6 +32,7 @@ from pyspark.sql.types import (
ShortType, ShortType,
) )
from scipy.special import expit, softmax # pylint: disable=no-name-in-module from scipy.special import expit, softmax # pylint: disable=no-name-in-module
from xgboost.compat import is_cudf_available
from xgboost.core import Booster from xgboost.core import Booster
from xgboost.training import train as worker_train from xgboost.training import train as worker_train
@@ -728,6 +729,10 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
else: else:
dataset = dataset.repartition(num_workers) dataset = dataset.repartition(num_workers)
if self.isDefined(self.qid_col) and self.getOrDefault(self.qid_col):
# XGBoost requires qid to be sorted for each partition
dataset = dataset.sortWithinPartitions(alias.qid, ascending=True)
train_params = self._get_distributed_train_params(dataset) train_params = self._get_distributed_train_params(dataset)
booster_params, train_call_kwargs_params = self._get_xgb_train_call_args( booster_params, train_call_kwargs_params = self._get_xgb_train_call_args(
train_params train_params
@@ -755,7 +760,8 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
k: v for k, v in train_call_kwargs_params.items() if v is not None k: v for k, v in train_call_kwargs_params.items() if v is not None
} }
dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None} dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None}
use_qdm = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
use_hist = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
def _train_booster(pandas_df_iter): def _train_booster(pandas_df_iter):
"""Takes in an RDD partition and outputs a booster for that partition after """Takes in an RDD partition and outputs a booster for that partition after
@@ -769,6 +775,15 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
gpu_id = None gpu_id = None
# If cuDF is not installed, then using DMatrix instead of QDM,
# because without cuDF, DMatrix performs better than QDM.
# Note: Checking `is_cudf_available` in spark worker side because
# spark worker might has different python environment with driver side.
if use_gpu:
use_qdm = use_hist and is_cudf_available()
else:
use_qdm = use_hist
if use_qdm and (booster_params.get("max_bin", None) is not None): if use_qdm and (booster_params.get("max_bin", None) is not None):
dmatrix_kwargs["max_bin"] = booster_params["max_bin"] dmatrix_kwargs["max_bin"] = booster_params["max_bin"]

View File

@@ -0,0 +1,91 @@
/*!
* Copyright 2022 by XGBoost Contributors
* \file common.h
* \brief cuda pinned allocator for usage with thrust containers
*/
#pragma once
#include <cstddef>
#include <limits>
#include "common.h"
namespace xgboost {
namespace common {
namespace cuda {
// \p pinned_allocator is a CUDA-specific host memory allocator
// that employs \c cudaMallocHost for allocation.
//
// This implementation is ported from the experimental/pinned_allocator
// that Thrust used to provide.
//
// \see https://en.cppreference.com/w/cpp/memory/allocator
template <typename T>
class pinned_allocator;
template <>
class pinned_allocator<void> {
public:
using value_type = void; // NOLINT: The type of the elements in the allocator
using pointer = void*; // NOLINT: The type returned by address() / allocate()
using const_pointer = const void*; // NOLINT: The type returned by address()
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers
template <typename U>
struct rebind { // NOLINT
using other = pinned_allocator<U>; // NOLINT: The rebound type
};
};
template <typename T>
class pinned_allocator {
public:
using value_type = T; // NOLINT: The type of the elements in the allocator
using pointer = T*; // NOLINT: The type returned by address() / allocate()
using const_pointer = const T*; // NOLINT: The type returned by address()
using reference = T&; // NOLINT: The parameter type for address()
using const_reference = const T&; // NOLINT: The parameter type for address()
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers
template <typename U>
struct rebind { // NOLINT
using other = pinned_allocator<U>; // NOLINT: The rebound type
};
XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions
XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions
template <typename U>
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator<U> const&) {} // NOLINT
XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT
XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT
inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
pointer result(nullptr);
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
return result;
}
inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT
XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; }
XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const {
return !operator==(x);
}
};
} // namespace cuda
} // namespace common
} // namespace xgboost

View File

@@ -101,7 +101,7 @@ class ArrayInterfaceHandler {
template <typename PtrType> template <typename PtrType>
static PtrType GetPtrFromArrayData(Object::Map const &obj) { static PtrType GetPtrFromArrayData(Object::Map const &obj) {
auto data_it = obj.find("data"); auto data_it = obj.find("data");
if (data_it == obj.cend()) { if (data_it == obj.cend() || IsA<Null>(data_it->second)) {
LOG(FATAL) << "Empty data passed in."; LOG(FATAL) << "Empty data passed in.";
} }
auto p_data = reinterpret_cast<PtrType>( auto p_data = reinterpret_cast<PtrType>(
@@ -111,7 +111,7 @@ class ArrayInterfaceHandler {
static void Validate(Object::Map const &array) { static void Validate(Object::Map const &array) {
auto version_it = array.find("version"); auto version_it = array.find("version");
if (version_it == array.cend()) { if (version_it == array.cend() || IsA<Null>(version_it->second)) {
LOG(FATAL) << "Missing `version' field for array interface"; LOG(FATAL) << "Missing `version' field for array interface";
} }
if (get<Integer const>(version_it->second) > 3) { if (get<Integer const>(version_it->second) > 3) {
@@ -119,17 +119,19 @@ class ArrayInterfaceHandler {
} }
auto typestr_it = array.find("typestr"); auto typestr_it = array.find("typestr");
if (typestr_it == array.cend()) { if (typestr_it == array.cend() || IsA<Null>(typestr_it->second)) {
LOG(FATAL) << "Missing `typestr' field for array interface"; LOG(FATAL) << "Missing `typestr' field for array interface";
} }
auto typestr = get<String const>(typestr_it->second); auto typestr = get<String const>(typestr_it->second);
CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat(); CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();
if (array.find("shape") == array.cend()) { auto shape_it = array.find("shape");
if (shape_it == array.cend() || IsA<Null>(shape_it->second)) {
LOG(FATAL) << "Missing `shape' field for array interface"; LOG(FATAL) << "Missing `shape' field for array interface";
} }
if (array.find("data") == array.cend()) { auto data_it = array.find("data");
if (data_it == array.cend() || IsA<Null>(data_it->second)) {
LOG(FATAL) << "Missing `data' field for array interface"; LOG(FATAL) << "Missing `data' field for array interface";
} }
} }
@@ -139,8 +141,9 @@ class ArrayInterfaceHandler {
static size_t ExtractMask(Object::Map const &column, static size_t ExtractMask(Object::Map const &column,
common::Span<RBitField8::value_type> *p_out) { common::Span<RBitField8::value_type> *p_out) {
auto &s_mask = *p_out; auto &s_mask = *p_out;
if (column.find("mask") != column.cend()) { auto const &mask_it = column.find("mask");
auto const &j_mask = get<Object const>(column.at("mask")); if (mask_it != column.cend() && !IsA<Null>(mask_it->second)) {
auto const &j_mask = get<Object const>(mask_it->second);
Validate(j_mask); Validate(j_mask);
auto p_mask = GetPtrFromArrayData<RBitField8::value_type *>(j_mask); auto p_mask = GetPtrFromArrayData<RBitField8::value_type *>(j_mask);
@@ -173,8 +176,9 @@ class ArrayInterfaceHandler {
// assume 1 byte alignment. // assume 1 byte alignment.
size_t const span_size = RBitField8::ComputeStorageSize(n_bits); size_t const span_size = RBitField8::ComputeStorageSize(n_bits);
if (j_mask.find("strides") != j_mask.cend()) { auto strides_it = j_mask.find("strides");
auto strides = get<Array const>(column.at("strides")); if (strides_it != j_mask.cend() && !IsA<Null>(strides_it->second)) {
auto strides = get<Array const>(strides_it->second);
CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1); CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1);
CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous(); CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous();
} }
@@ -401,7 +405,9 @@ class ArrayInterface {
<< "XGBoost doesn't support internal broadcasting."; << "XGBoost doesn't support internal broadcasting.";
} }
} else { } else {
CHECK(array.find("mask") == array.cend()) << "Masked array is not yet supported."; auto mask_it = array.find("mask");
CHECK(mask_it == array.cend() || IsA<Null>(mask_it->second))
<< "Masked array is not yet supported.";
} }
auto stream_it = array.find("stream"); auto stream_it = array.find("stream");

View File

@@ -3,10 +3,10 @@
*/ */
#ifndef EVALUATE_SPLITS_CUH_ #ifndef EVALUATE_SPLITS_CUH_
#define EVALUATE_SPLITS_CUH_ #define EVALUATE_SPLITS_CUH_
#include <thrust/system/cuda/experimental/pinned_allocator.h>
#include <xgboost/span.h> #include <xgboost/span.h>
#include "../../common/categorical.h" #include "../../common/categorical.h"
#include "../../common/cuda_pinned_allocator.h"
#include "../split_evaluator.h" #include "../split_evaluator.h"
#include "../updater_gpu_common.cuh" #include "../updater_gpu_common.cuh"
#include "expand_entry.cuh" #include "expand_entry.cuh"
@@ -57,7 +57,7 @@ struct CatAccessor {
class GPUHistEvaluator { class GPUHistEvaluator {
using CatST = common::CatBitField::value_type; // categorical storage type using CatST = common::CatBitField::value_type; // categorical storage type
// use pinned memory to stage the categories, used for sort based splits. // use pinned memory to stage the categories, used for sort based splits.
using Alloc = thrust::system::cuda::experimental::pinned_allocator<CatST>; using Alloc = xgboost::common::cuda::pinned_allocator<CatST>;
private: private:
TreeEvaluator tree_evaluator_; TreeEvaluator tree_evaluator_;

View File

@@ -0,0 +1,11 @@
# conda environment for CPP test on Linux distributions
name: cpp_test
channels:
- defaults
- conda-forge
dependencies:
- cmake
- ninja
- c-compiler
- cxx-compiler
- gtest

View File

@@ -0,0 +1,13 @@
# conda environment for source distribution test.
name: sdist_test
channels:
- defaults
- conda-forge
dependencies:
- python=3.8
- pip
- wheel
- cmake
- ninja
- c-compiler
- cxx-compiler

View File

@@ -33,8 +33,7 @@ TEST(ArrayInterface, Error) {
Json column { Object() }; Json column { Object() };
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))}; std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
column["shape"] = Array(j_shape); column["shape"] = Array(j_shape);
std::vector<Json> j_data { std::vector<Json> j_data{Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),
Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),
Json(Boolean(false))}; Json(Boolean(false))};
auto const& column_obj = get<Object>(column); auto const& column_obj = get<Object>(column);
@@ -45,6 +44,10 @@ TEST(ArrayInterface, Error) {
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), dmlc::Error); EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), dmlc::Error);
column["version"] = 3; column["version"] = 3;
// missing data // missing data
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
dmlc::Error);
// null data
column["data"] = Null{};
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
dmlc::Error); dmlc::Error);
column["data"] = j_data; column["data"] = j_data;
@@ -63,6 +66,11 @@ TEST(ArrayInterface, Error) {
Json(Boolean(false))}; Json(Boolean(false))};
column["data"] = j_data; column["data"] = j_data;
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n)); EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n));
// null data in mask
column["mask"] = Object{};
column["mask"]["data"] = Null{};
common::Span<RBitField8::value_type> s_mask;
EXPECT_THROW(ArrayInterfaceHandler::ExtractMask(column_obj, &s_mask), dmlc::Error);
} }
TEST(ArrayInterface, GetElement) { TEST(ArrayInterface, GetElement) {

View File

@@ -390,28 +390,6 @@ class XgboostLocalTest(SparkTestCase):
"expected_prediction_with_base_margin", "expected_prediction_with_base_margin",
], ],
) )
self.ranker_df_train = self.session.createDataFrame(
[
(Vectors.dense(1.0, 2.0, 3.0), 0, 0),
(Vectors.dense(4.0, 5.0, 6.0), 1, 0),
(Vectors.dense(9.0, 4.0, 8.0), 2, 0),
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
],
["features", "label", "qid"],
)
self.ranker_df_test = self.session.createDataFrame(
[
(Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988),
(Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556),
(Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570),
(Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988),
(Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612),
(Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826),
],
["features", "qid", "expected_prediction"],
)
self.reg_df_sparse_train = self.session.createDataFrame( self.reg_df_sparse_train = self.session.createDataFrame(
[ [
@@ -1039,15 +1017,6 @@ class XgboostLocalTest(SparkTestCase):
for row1, row2 in zip(pred_result, pred_result2): for row1, row2 in zip(pred_result, pred_result2):
self.assertTrue(np.allclose(row1.probability, row2.probability, rtol=1e-3)) self.assertTrue(np.allclose(row1.probability, row2.probability, rtol=1e-3))
def test_ranker(self):
ranker = SparkXGBRanker(qid_col="qid")
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
model = ranker.fit(self.ranker_df_train)
pred_result = model.transform(self.ranker_df_test).collect()
for row in pred_result:
assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3)
def test_empty_validation_data(self) -> None: def test_empty_validation_data(self) -> None:
for tree_method in [ for tree_method in [
"hist", "hist",
@@ -1130,3 +1099,63 @@ class XgboostLocalTest(SparkTestCase):
def test_unsupported_params(self): def test_unsupported_params(self):
with pytest.raises(ValueError, match="evals_result"): with pytest.raises(ValueError, match="evals_result"):
SparkXGBClassifier(evals_result={}) SparkXGBClassifier(evals_result={})
class XgboostRankerLocalTest(SparkTestCase):
def setUp(self):
self.session.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "8")
self.ranker_df_train = self.session.createDataFrame(
[
(Vectors.dense(1.0, 2.0, 3.0), 0, 0),
(Vectors.dense(4.0, 5.0, 6.0), 1, 0),
(Vectors.dense(9.0, 4.0, 8.0), 2, 0),
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
],
["features", "label", "qid"],
)
self.ranker_df_test = self.session.createDataFrame(
[
(Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988),
(Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556),
(Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570),
(Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988),
(Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612),
(Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826),
],
["features", "qid", "expected_prediction"],
)
self.ranker_df_train_1 = self.session.createDataFrame(
[
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9),
(Vectors.dense(1.0, 2.0, 3.0), 0, 8),
(Vectors.dense(4.0, 5.0, 6.0), 1, 8),
(Vectors.dense(9.0, 4.0, 8.0), 2, 8),
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7),
(Vectors.dense(1.0, 2.0, 3.0), 0, 6),
(Vectors.dense(4.0, 5.0, 6.0), 1, 6),
(Vectors.dense(9.0, 4.0, 8.0), 2, 6),
]
* 4,
["features", "label", "qid"],
)
def test_ranker(self):
ranker = SparkXGBRanker(qid_col="qid")
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
model = ranker.fit(self.ranker_df_train)
pred_result = model.transform(self.ranker_df_test).collect()
for row in pred_result:
assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3)
def test_ranker_qid_sorted(self):
ranker = SparkXGBRanker(qid_col="qid", num_workers=4)
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
model = ranker.fit(self.ranker_df_train_1)
model.transform(self.ranker_df_test).collect()