Compare commits
22 Commits
dependabot
...
v1.7.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
62ed8b5fef | ||
|
|
a980e10744 | ||
|
|
59c54e361b | ||
|
|
60a8c8ebba | ||
|
|
58bc225657 | ||
|
|
850b53100f | ||
|
|
67b657dad0 | ||
|
|
db14e3feb7 | ||
|
|
9372370dda | ||
|
|
1136a7e0c3 | ||
|
|
a347cd512b | ||
|
|
9ff0c0832a | ||
|
|
534c940a7e | ||
|
|
5b76acccff | ||
|
|
4bc59ef7c3 | ||
|
|
e43cd60c0e | ||
|
|
3f92970a39 | ||
|
|
e17f7010bf | ||
|
|
aa30ce10da | ||
|
|
153d995b58 | ||
|
|
463313d9be | ||
|
|
7cf58a2c65 |
14
.github/workflows/main.yml
vendored
14
.github/workflows/main.yml
vendored
@@ -75,19 +75,18 @@ jobs:
|
|||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
with:
|
with:
|
||||||
submodules: 'true'
|
submodules: 'true'
|
||||||
- name: Install system packages
|
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
|
||||||
run: |
|
|
||||||
sudo apt-get install -y --no-install-recommends ninja-build
|
|
||||||
- uses: conda-incubator/setup-miniconda@v2
|
|
||||||
with:
|
with:
|
||||||
auto-update-conda: true
|
cache-downloads: true
|
||||||
python-version: ${{ matrix.python-version }}
|
cache-env: true
|
||||||
activate-environment: test
|
environment-name: cpp_test
|
||||||
|
environment-file: tests/ci_build/conda_env/cpp_test.yml
|
||||||
- name: Display Conda env
|
- name: Display Conda env
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
conda info
|
conda info
|
||||||
conda list
|
conda list
|
||||||
|
|
||||||
- name: Build and install XGBoost static library
|
- name: Build and install XGBoost static library
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
@@ -109,6 +108,7 @@ jobs:
|
|||||||
cd ..
|
cd ..
|
||||||
rm -rf ./build
|
rm -rf ./build
|
||||||
popd
|
popd
|
||||||
|
|
||||||
- name: Build and install XGBoost shared library
|
- name: Build and install XGBoost shared library
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
150
.github/workflows/python_tests.yml
vendored
150
.github/workflows/python_tests.yml
vendored
@@ -41,12 +41,46 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1
|
python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1
|
||||||
|
|
||||||
python-sdist-test:
|
python-sdist-test-on-Linux:
|
||||||
|
# Mismatched glibcxx version between system and conda forge.
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
name: Test installing XGBoost Python source package on ${{ matrix.os }}
|
name: Test installing XGBoost Python source package on ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, macos-11, windows-latest]
|
os: [ubuntu-latest]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
|
||||||
|
with:
|
||||||
|
submodules: 'true'
|
||||||
|
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
|
||||||
|
with:
|
||||||
|
cache-downloads: true
|
||||||
|
cache-env: false
|
||||||
|
environment-name: sdist_test
|
||||||
|
environment-file: tests/ci_build/conda_env/sdist_test.yml
|
||||||
|
- name: Display Conda env
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
conda info
|
||||||
|
conda list
|
||||||
|
- name: Build and install XGBoost
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
cd python-package
|
||||||
|
python --version
|
||||||
|
python setup.py sdist
|
||||||
|
pip install -v ./dist/xgboost-*.tar.gz
|
||||||
|
cd ..
|
||||||
|
python -c 'import xgboost'
|
||||||
|
|
||||||
|
python-sdist-test:
|
||||||
|
# Use system toolchain instead of conda toolchain for macos and windows.
|
||||||
|
# MacOS has linker error if clang++ from conda-forge is used
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
name: Test installing XGBoost Python source package on ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [macos-11, windows-latest]
|
||||||
python-version: ["3.8"]
|
python-version: ["3.8"]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
@@ -56,11 +90,7 @@ jobs:
|
|||||||
if: matrix.os == 'macos-11'
|
if: matrix.os == 'macos-11'
|
||||||
run: |
|
run: |
|
||||||
brew install ninja libomp
|
brew install ninja libomp
|
||||||
- name: Install Ubuntu system dependencies
|
- uses: conda-incubator/setup-miniconda@35d1405e78aa3f784fe3ce9a2eb378d5eeb62169 # v2.1.1
|
||||||
if: matrix.os == 'ubuntu-latest'
|
|
||||||
run: |
|
|
||||||
sudo apt-get install -y --no-install-recommends ninja-build
|
|
||||||
- uses: conda-incubator/setup-miniconda@v2
|
|
||||||
with:
|
with:
|
||||||
auto-update-conda: true
|
auto-update-conda: true
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
@@ -80,6 +110,58 @@ jobs:
|
|||||||
cd ..
|
cd ..
|
||||||
python -c 'import xgboost'
|
python -c 'import xgboost'
|
||||||
|
|
||||||
|
python-tests-on-macos:
|
||||||
|
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
||||||
|
runs-on: ${{ matrix.config.os }}
|
||||||
|
timeout-minutes: 60
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
config:
|
||||||
|
- {os: macos-11}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
|
||||||
|
with:
|
||||||
|
submodules: 'true'
|
||||||
|
|
||||||
|
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
|
||||||
|
with:
|
||||||
|
cache-downloads: true
|
||||||
|
cache-env: false
|
||||||
|
environment-name: macos_test
|
||||||
|
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
|
||||||
|
|
||||||
|
- name: Display Conda env
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
conda info
|
||||||
|
conda list
|
||||||
|
|
||||||
|
- name: Build XGBoost on macos
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
brew install ninja
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
# Set prefix, to use OpenMP library from Conda env
|
||||||
|
# See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
|
||||||
|
# to learn why we don't use libomp from Homebrew.
|
||||||
|
cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
|
||||||
|
ninja
|
||||||
|
|
||||||
|
- name: Install Python package
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
cd python-package
|
||||||
|
python --version
|
||||||
|
python setup.py install
|
||||||
|
|
||||||
|
- name: Test Python package
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
pytest -s -v -rxXs --durations=0 ./tests/python
|
||||||
|
|
||||||
python-tests-on-win:
|
python-tests-on-win:
|
||||||
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
||||||
runs-on: ${{ matrix.config.os }}
|
runs-on: ${{ matrix.config.os }}
|
||||||
@@ -125,56 +207,4 @@ jobs:
|
|||||||
- name: Test Python package
|
- name: Test Python package
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
pytest -s -v ./tests/python
|
pytest -s -v -rxXs --durations=0 ./tests/python
|
||||||
|
|
||||||
python-tests-on-macos:
|
|
||||||
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
|
||||||
runs-on: ${{ matrix.config.os }}
|
|
||||||
timeout-minutes: 90
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
config:
|
|
||||||
- {os: macos-11, python-version "3.8" }
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: 'true'
|
|
||||||
|
|
||||||
- uses: conda-incubator/setup-miniconda@v2
|
|
||||||
with:
|
|
||||||
auto-update-conda: true
|
|
||||||
python-version: ${{ matrix.config.python-version }}
|
|
||||||
activate-environment: macos_test
|
|
||||||
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
|
|
||||||
|
|
||||||
- name: Display Conda env
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
conda info
|
|
||||||
conda list
|
|
||||||
|
|
||||||
- name: Build XGBoost on macos
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
brew install ninja
|
|
||||||
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
# Set prefix, to use OpenMP library from Conda env
|
|
||||||
# See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
|
|
||||||
# to learn why we don't use libomp from Homebrew.
|
|
||||||
cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
|
|
||||||
ninja
|
|
||||||
|
|
||||||
- name: Install Python package
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
cd python-package
|
|
||||||
python --version
|
|
||||||
python setup.py install
|
|
||||||
|
|
||||||
- name: Test Python package
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
pytest -s -v ./tests/python
|
|
||||||
|
|||||||
6
.github/workflows/r_tests.yml
vendored
6
.github/workflows/r_tests.yml
vendored
@@ -5,6 +5,7 @@ on: [push, pull_request]
|
|||||||
env:
|
env:
|
||||||
R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
||||||
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: read # to fetch code (actions/checkout)
|
contents: read # to fetch code (actions/checkout)
|
||||||
@@ -68,6 +69,7 @@ jobs:
|
|||||||
- {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'}
|
- {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'}
|
||||||
env:
|
env:
|
||||||
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
|
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
|
||||||
|
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
|
||||||
RSPM: ${{ matrix.config.rspm }}
|
RSPM: ${{ matrix.config.rspm }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -121,6 +123,10 @@ jobs:
|
|||||||
config:
|
config:
|
||||||
- {r: 'release'}
|
- {r: 'release'}
|
||||||
|
|
||||||
|
env:
|
||||||
|
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
|
||||||
|
MAKE: "make -j$(nproc)"
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
with:
|
with:
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
||||||
project(xgboost LANGUAGES CXX C VERSION 1.7.0)
|
project(xgboost LANGUAGES CXX C VERSION 1.7.2)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
cmake_policy(SET CMP0022 NEW)
|
||||||
@@ -171,8 +171,24 @@ if (USE_OPENMP)
|
|||||||
# Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating
|
# Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating
|
||||||
# OpenMP on Mac. See https://github.com/dmlc/xgboost/pull/5146#issuecomment-568312706
|
# OpenMP on Mac. See https://github.com/dmlc/xgboost/pull/5146#issuecomment-568312706
|
||||||
cmake_minimum_required(VERSION 3.16)
|
cmake_minimum_required(VERSION 3.16)
|
||||||
endif (APPLE)
|
find_package(OpenMP)
|
||||||
find_package(OpenMP REQUIRED)
|
if (NOT OpenMP_FOUND)
|
||||||
|
# Try again with extra path info; required for libomp 15+ from Homebrew
|
||||||
|
execute_process(COMMAND brew --prefix libomp
|
||||||
|
OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
set(OpenMP_C_FLAGS
|
||||||
|
"-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
|
||||||
|
set(OpenMP_CXX_FLAGS
|
||||||
|
"-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
|
||||||
|
set(OpenMP_C_LIB_NAMES omp)
|
||||||
|
set(OpenMP_CXX_LIB_NAMES omp)
|
||||||
|
set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
|
||||||
|
find_package(OpenMP REQUIRED)
|
||||||
|
endif ()
|
||||||
|
else ()
|
||||||
|
find_package(OpenMP REQUIRED)
|
||||||
|
endif ()
|
||||||
endif (USE_OPENMP)
|
endif (USE_OPENMP)
|
||||||
#Add for IBM i
|
#Add for IBM i
|
||||||
if (${CMAKE_SYSTEM_NAME} MATCHES "OS400")
|
if (${CMAKE_SYSTEM_NAME} MATCHES "OS400")
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 1.7.0.1
|
Version: 1.7.2.1
|
||||||
Date: 2022-10-18
|
Date: 2022-12-08
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Tianqi", "Chen", role = c("aut"),
|
person("Tianqi", "Chen", role = c("aut"),
|
||||||
email = "tianqi.tchen@gmail.com"),
|
email = "tianqi.tchen@gmail.com"),
|
||||||
@@ -66,5 +66,5 @@ Imports:
|
|||||||
methods,
|
methods,
|
||||||
data.table (>= 1.9.6),
|
data.table (>= 1.9.6),
|
||||||
jsonlite (>= 1.0),
|
jsonlite (>= 1.0),
|
||||||
RoxygenNote: 7.1.1
|
RoxygenNote: 7.2.1
|
||||||
SystemRequirements: GNU make, C++14
|
SystemRequirements: GNU make, C++14
|
||||||
|
|||||||
@@ -544,9 +544,11 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#'
|
#'
|
||||||
#' @return
|
#' @return
|
||||||
#' Results are stored in the \code{coefs} element of the closure.
|
#' Results are stored in the \code{coefs} element of the closure.
|
||||||
#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it.
|
#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy
|
||||||
|
#' way to access it.
|
||||||
#' With \code{xgb.train}, it is either a dense of a sparse matrix.
|
#' With \code{xgb.train}, it is either a dense of a sparse matrix.
|
||||||
#' While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices.
|
#' While with \code{xgb.cv}, it is a list (an element per each fold) of such
|
||||||
|
#' matrices.
|
||||||
#'
|
#'
|
||||||
#' @seealso
|
#' @seealso
|
||||||
#' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
|
#' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
|
||||||
@@ -558,7 +560,7 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#' # without considering the 2nd order interactions:
|
#' # without considering the 2nd order interactions:
|
||||||
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||||
#' colnames(x)
|
#' colnames(x)
|
||||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
|
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
||||||
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||||
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||||
@@ -583,14 +585,14 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#'
|
#'
|
||||||
#' # For xgb.cv:
|
#' # For xgb.cv:
|
||||||
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||||
#' callbacks = list(cb.gblinear.history()))
|
#' callbacks = list(cb.gblinear.history()))
|
||||||
#' # coefficients in the CV fold #3
|
#' # coefficients in the CV fold #3
|
||||||
#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||||
#'
|
#'
|
||||||
#'
|
#'
|
||||||
#' #### Multiclass classification:
|
#' #### Multiclass classification:
|
||||||
#' #
|
#' #
|
||||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1)
|
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
|
||||||
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||||
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
||||||
|
|||||||
@@ -18,7 +18,7 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
@@ -110,7 +110,7 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL, nth
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||||
#'
|
#'
|
||||||
#' stopifnot(nrow(dtrain) == nrow(train$data))
|
#' stopifnot(nrow(dtrain) == nrow(train$data))
|
||||||
#' stopifnot(ncol(dtrain) == ncol(train$data))
|
#' stopifnot(ncol(dtrain) == ncol(train$data))
|
||||||
@@ -138,7 +138,7 @@ dim.xgb.DMatrix <- function(x) {
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||||
#' dimnames(dtrain)
|
#' dimnames(dtrain)
|
||||||
#' colnames(dtrain)
|
#' colnames(dtrain)
|
||||||
#' colnames(dtrain) <- make.names(1:ncol(train$data))
|
#' colnames(dtrain) <- make.names(1:ncol(train$data))
|
||||||
@@ -193,7 +193,7 @@ dimnames.xgb.DMatrix <- function(x) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' labels <- getinfo(dtrain, 'label')
|
#' labels <- getinfo(dtrain, 'label')
|
||||||
#' setinfo(dtrain, 'label', 1-labels)
|
#' setinfo(dtrain, 'label', 1-labels)
|
||||||
@@ -249,7 +249,7 @@ getinfo.xgb.DMatrix <- function(object, name, ...) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' labels <- getinfo(dtrain, 'label')
|
#' labels <- getinfo(dtrain, 'label')
|
||||||
#' setinfo(dtrain, 'label', 1-labels)
|
#' setinfo(dtrain, 'label', 1-labels)
|
||||||
@@ -345,7 +345,7 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' dsub <- slice(dtrain, 1:42)
|
#' dsub <- slice(dtrain, 1:42)
|
||||||
#' labels1 <- getinfo(dsub, 'label')
|
#' labels1 <- getinfo(dsub, 'label')
|
||||||
@@ -401,7 +401,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' dtrain
|
#' dtrain
|
||||||
#' print(dtrain, verbose=TRUE)
|
#' print(dtrain, verbose=TRUE)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@@ -48,8 +48,8 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||||
#' nrounds = 4
|
#' nrounds = 4
|
||||||
@@ -65,8 +65,12 @@
|
|||||||
#' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
#' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
||||||
#'
|
#'
|
||||||
#' # learning with new features
|
#' # learning with new features
|
||||||
#' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
#' new.dtrain <- xgb.DMatrix(
|
||||||
#' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
#' data = new.features.train, label = agaricus.train$label, nthread = 2
|
||||||
|
#' )
|
||||||
|
#' new.dtest <- xgb.DMatrix(
|
||||||
|
#' data = new.features.test, label = agaricus.test$label, nthread = 2
|
||||||
|
#' )
|
||||||
#' watchlist <- list(train = new.dtrain)
|
#' watchlist <- list(train = new.dtrain)
|
||||||
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||||
#'
|
#'
|
||||||
@@ -79,7 +83,7 @@
|
|||||||
#' accuracy.after, "!\n"))
|
#' accuracy.after, "!\n"))
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.create.features <- function(model, data, ...){
|
xgb.create.features <- function(model, data, ...) {
|
||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
pred_with_leaf <- predict(model, data, predleaf = TRUE)
|
pred_with_leaf <- predict(model, data, predleaf = TRUE)
|
||||||
cols <- lapply(as.data.frame(pred_with_leaf), factor)
|
cols <- lapply(as.data.frame(pred_with_leaf), factor)
|
||||||
|
|||||||
@@ -110,9 +110,9 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
#' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
||||||
#' max_depth = 3, eta = 1, objective = "binary:logistic")
|
#' max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||||
#' print(cv)
|
#' print(cv)
|
||||||
#' print(cv, verbose=TRUE)
|
#' print(cv, verbose=TRUE)
|
||||||
#'
|
#'
|
||||||
@@ -192,7 +192,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
|
|
||||||
# create the booster-folds
|
# create the booster-folds
|
||||||
# train_folds
|
# train_folds
|
||||||
dall <- xgb.get.DMatrix(data, label, missing)
|
dall <- xgb.get.DMatrix(data, label, missing, nthread = params$nthread)
|
||||||
bst_folds <- lapply(seq_along(folds), function(k) {
|
bst_folds <- lapply(seq_along(folds), function(k) {
|
||||||
dtest <- slice(dall, folds[[k]])
|
dtest <- slice(dall, folds[[k]])
|
||||||
# code originally contributed by @RolandASc on stackoverflow
|
# code originally contributed by @RolandASc on stackoverflow
|
||||||
|
|||||||
@@ -192,8 +192,8 @@
|
|||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#'
|
#'
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' watchlist <- list(train = dtrain, eval = dtest)
|
#' watchlist <- list(train = dtrain, eval = dtest)
|
||||||
#'
|
#'
|
||||||
#' ## A simple xgb.train example:
|
#' ## A simple xgb.train example:
|
||||||
|
|||||||
29
R-package/configure
vendored
29
R-package/configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for xgboost 1.7.0.
|
# Generated by GNU Autoconf 2.69 for xgboost 1.7.2.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
@@ -576,8 +576,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='xgboost'
|
PACKAGE_NAME='xgboost'
|
||||||
PACKAGE_TARNAME='xgboost'
|
PACKAGE_TARNAME='xgboost'
|
||||||
PACKAGE_VERSION='1.7.0'
|
PACKAGE_VERSION='1.7.2'
|
||||||
PACKAGE_STRING='xgboost 1.7.0'
|
PACKAGE_STRING='xgboost 1.7.2'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures xgboost 1.7.0 to adapt to many kinds of systems.
|
\`configure' configures xgboost 1.7.2 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1257,7 +1257,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of xgboost 1.7.0:";;
|
short | recursive ) echo "Configuration of xgboost 1.7.2:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1336,7 +1336,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
xgboost configure 1.7.0
|
xgboost configure 1.7.2
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by xgboost $as_me 1.7.0, which was
|
It was created by xgboost $as_me 1.7.2, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2709,8 +2709,15 @@ fi
|
|||||||
|
|
||||||
if test `uname -s` = "Darwin"
|
if test `uname -s` = "Darwin"
|
||||||
then
|
then
|
||||||
OPENMP_CXXFLAGS='-Xclang -fopenmp'
|
if command -v brew &> /dev/null
|
||||||
OPENMP_LIB='-lomp'
|
then
|
||||||
|
HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp`
|
||||||
|
else
|
||||||
|
# Homebrew not found
|
||||||
|
HOMEBREW_LIBOMP_PREFIX=''
|
||||||
|
fi
|
||||||
|
OPENMP_CXXFLAGS="-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include"
|
||||||
|
OPENMP_LIB="-lomp -L${HOMEBREW_LIBOMP_PREFIX}/lib"
|
||||||
ac_pkg_openmp=no
|
ac_pkg_openmp=no
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package" >&5
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package" >&5
|
||||||
$as_echo_n "checking whether OpenMP will work in a package... " >&6; }
|
$as_echo_n "checking whether OpenMP will work in a package... " >&6; }
|
||||||
@@ -3287,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by xgboost $as_me 1.7.0, which was
|
This file was extended by xgboost $as_me 1.7.2, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -3340,7 +3347,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
xgboost config.status 1.7.0
|
xgboost config.status 1.7.2
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
AC_PREREQ(2.69)
|
AC_PREREQ(2.69)
|
||||||
|
|
||||||
AC_INIT([xgboost],[1.7.0],[],[xgboost],[])
|
AC_INIT([xgboost],[1.7.2],[],[xgboost],[])
|
||||||
|
|
||||||
# Use this line to set CC variable to a C compiler
|
# Use this line to set CC variable to a C compiler
|
||||||
AC_PROG_CC
|
AC_PROG_CC
|
||||||
@@ -28,8 +28,15 @@ fi
|
|||||||
|
|
||||||
if test `uname -s` = "Darwin"
|
if test `uname -s` = "Darwin"
|
||||||
then
|
then
|
||||||
OPENMP_CXXFLAGS='-Xclang -fopenmp'
|
if command -v brew &> /dev/null
|
||||||
OPENMP_LIB='-lomp'
|
then
|
||||||
|
HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp`
|
||||||
|
else
|
||||||
|
# Homebrew not found
|
||||||
|
HOMEBREW_LIBOMP_PREFIX=''
|
||||||
|
fi
|
||||||
|
OPENMP_CXXFLAGS="-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include"
|
||||||
|
OPENMP_LIB="-lomp -L${HOMEBREW_LIBOMP_PREFIX}/lib"
|
||||||
ac_pkg_openmp=no
|
ac_pkg_openmp=no
|
||||||
AC_MSG_CHECKING([whether OpenMP will work in a package])
|
AC_MSG_CHECKING([whether OpenMP will work in a package])
|
||||||
AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <omp.h>]], [[ return (omp_get_max_threads() <= 1); ]])])
|
AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <omp.h>]], [[ return (omp_get_max_threads() <= 1); ]])])
|
||||||
|
|||||||
@@ -15,9 +15,11 @@ selected per iteration.}
|
|||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
Results are stored in the \code{coefs} element of the closure.
|
Results are stored in the \code{coefs} element of the closure.
|
||||||
The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it.
|
The \code{\link{xgb.gblinear.history}} convenience function provides an easy
|
||||||
|
way to access it.
|
||||||
With \code{xgb.train}, it is either a dense of a sparse matrix.
|
With \code{xgb.train}, it is either a dense of a sparse matrix.
|
||||||
While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices.
|
While with \code{xgb.cv}, it is a list (an element per each fold) of such
|
||||||
|
matrices.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Callback closure for collecting the model coefficients history of a gblinear booster
|
Callback closure for collecting the model coefficients history of a gblinear booster
|
||||||
@@ -38,7 +40,7 @@ Callback function expects the following values to be set in its calling frame:
|
|||||||
# without considering the 2nd order interactions:
|
# without considering the 2nd order interactions:
|
||||||
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||||
colnames(x)
|
colnames(x)
|
||||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
|
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
||||||
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||||
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||||
@@ -63,14 +65,14 @@ matplot(xgb.gblinear.history(bst), type = 'l')
|
|||||||
|
|
||||||
# For xgb.cv:
|
# For xgb.cv:
|
||||||
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||||
callbacks = list(cb.gblinear.history()))
|
callbacks = list(cb.gblinear.history()))
|
||||||
# coefficients in the CV fold #3
|
# coefficients in the CV fold #3
|
||||||
matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||||
|
|
||||||
|
|
||||||
#### Multiclass classification:
|
#### Multiclass classification:
|
||||||
#
|
#
|
||||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1)
|
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
|
||||||
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||||
# For the default linear updater 'shotgun' it sometimes is helpful
|
# For the default linear updater 'shotgun' it sometimes is helpful
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ be directly used with an \code{xgb.DMatrix} object.
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||||
|
|
||||||
stopifnot(nrow(dtrain) == nrow(train$data))
|
stopifnot(nrow(dtrain) == nrow(train$data))
|
||||||
stopifnot(ncol(dtrain) == ncol(train$data))
|
stopifnot(ncol(dtrain) == ncol(train$data))
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ Since row names are irrelevant, it is recommended to use \code{colnames} directl
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||||
dimnames(dtrain)
|
dimnames(dtrain)
|
||||||
colnames(dtrain)
|
colnames(dtrain)
|
||||||
colnames(dtrain) <- make.names(1:ncol(train$data))
|
colnames(dtrain) <- make.names(1:ncol(train$data))
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ The \code{name} field can be one of the following:
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
labels <- getinfo(dtrain, 'label')
|
labels <- getinfo(dtrain, 'label')
|
||||||
setinfo(dtrain, 'label', 1-labels)
|
setinfo(dtrain, 'label', 1-labels)
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ Currently it displays dimensions and presence of info-fields and colnames.
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
dtrain
|
dtrain
|
||||||
print(dtrain, verbose=TRUE)
|
print(dtrain, verbose=TRUE)
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ The \code{name} field can be one of the following:
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
labels <- getinfo(dtrain, 'label')
|
labels <- getinfo(dtrain, 'label')
|
||||||
setinfo(dtrain, 'label', 1-labels)
|
setinfo(dtrain, 'label', 1-labels)
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ original xgb.DMatrix object
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
dsub <- slice(dtrain, 1:42)
|
dsub <- slice(dtrain, 1:42)
|
||||||
labels1 <- getinfo(dsub, 'label')
|
labels1 <- getinfo(dsub, 'label')
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ Supported input file formats are either a LIBSVM text file or a binary file that
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ Save xgb.DMatrix object to binary file
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@@ -59,8 +59,8 @@ a rule on certain features."
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||||
nrounds = 4
|
nrounds = 4
|
||||||
@@ -76,8 +76,12 @@ new.features.train <- xgb.create.features(model = bst, agaricus.train$data)
|
|||||||
new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
||||||
|
|
||||||
# learning with new features
|
# learning with new features
|
||||||
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
new.dtrain <- xgb.DMatrix(
|
||||||
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
data = new.features.train, label = agaricus.train$label, nthread = 2
|
||||||
|
)
|
||||||
|
new.dtest <- xgb.DMatrix(
|
||||||
|
data = new.features.test, label = agaricus.test$label, nthread = 2
|
||||||
|
)
|
||||||
watchlist <- list(train = new.dtrain)
|
watchlist <- list(train = new.dtrain)
|
||||||
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||||
|
|
||||||
|
|||||||
@@ -158,9 +158,9 @@ Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
||||||
max_depth = 3, eta = 1, objective = "binary:logistic")
|
max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||||
print(cv)
|
print(cv)
|
||||||
print(cv, verbose=TRUE)
|
print(cv, verbose=TRUE)
|
||||||
|
|
||||||
|
|||||||
@@ -241,8 +241,8 @@ The following callbacks are automatically created when certain parameters are se
|
|||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
watchlist <- list(train = dtrain, eval = dtest)
|
watchlist <- list(train = dtrain, eval = dtest)
|
||||||
|
|
||||||
## A simple xgb.train example:
|
## A simple xgb.train example:
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-dev
|
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ XGBoost Release Policy
|
|||||||
=======================
|
=======================
|
||||||
|
|
||||||
Versioning Policy
|
Versioning Policy
|
||||||
---------------------------
|
-----------------
|
||||||
|
|
||||||
Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE]
|
Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE]
|
||||||
|
|
||||||
@@ -34,6 +34,20 @@ Making a Release
|
|||||||
|
|
||||||
+ The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__.
|
+ The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__.
|
||||||
|
|
||||||
Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
|
|
||||||
|
|
||||||
+ The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.
|
+ The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.
|
||||||
|
|
||||||
|
|
||||||
|
R CRAN Package
|
||||||
|
--------------
|
||||||
|
Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
|
||||||
|
|
||||||
|
According to the `CRAN policy <https://cran.r-project.org/web/packages/policies.html>`__:
|
||||||
|
|
||||||
|
If running a package uses multiple threads/cores it must never use more than two simultaneously: the check farm is a shared resource and will typically be running many checks simultaneously.
|
||||||
|
|
||||||
|
We need to check the number of CPUs used in examples. Export ``_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_=2.5`` before running ``R CMD check --as-cran`` `[1] <#references>`__ and make sure the machine you are using has enough CPU cores to reveal any potential policy violation.
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
|
||||||
|
[1] https://stat.ethz.ch/pipermail/r-package-devel/2022q4/008610.html
|
||||||
|
|||||||
@@ -4,36 +4,21 @@ XGBoost GPU Support
|
|||||||
|
|
||||||
This page contains information about GPU algorithms supported in XGBoost.
|
This page contains information about GPU algorithms supported in XGBoost.
|
||||||
|
|
||||||
.. note:: CUDA 10.1, Compute Capability 3.5 required
|
.. note:: CUDA 11.0, Compute Capability 5.0 required (See `this list <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_ to look up compute capability of your GPU card.)
|
||||||
|
|
||||||
The GPU algorithms in XGBoost require a graphics card with compute capability 3.5 or higher, with
|
|
||||||
CUDA toolkits 10.1 or later.
|
|
||||||
(See `this list <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_ to look up compute capability of your GPU card.)
|
|
||||||
|
|
||||||
*********************************************
|
*********************************************
|
||||||
CUDA Accelerated Tree Construction Algorithms
|
CUDA Accelerated Tree Construction Algorithms
|
||||||
*********************************************
|
*********************************************
|
||||||
Tree construction (training) and prediction can be accelerated with CUDA-capable GPUs.
|
|
||||||
|
Most of the algorithms in XGBoost including training, prediction and evaluation can be accelerated with CUDA-capable GPUs.
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
=====
|
=====
|
||||||
Specify the ``tree_method`` parameter as one of the following algorithms.
|
Specify the ``tree_method`` parameter as ``gpu_hist``. For details around the ``tree_method`` parameter, see :doc:`tree method </treemethod>`.
|
||||||
|
|
||||||
Algorithms
|
|
||||||
----------
|
|
||||||
|
|
||||||
+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
|
||||||
| tree_method | Description |
|
|
||||||
+=======================+=======================================================================================================================================================================+
|
|
||||||
| gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Much faster and uses considerably less memory. NOTE: May run very slowly on GPUs older than Pascal architecture. |
|
|
||||||
+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
|
||||||
|
|
||||||
Supported parameters
|
Supported parameters
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
.. |tick| unicode:: U+2714
|
|
||||||
.. |cross| unicode:: U+2718
|
|
||||||
|
|
||||||
GPU accelerated prediction is enabled by default for the above mentioned ``tree_method`` parameters but can be switched to CPU prediction by setting ``predictor`` to ``cpu_predictor``. This could be useful if you want to conserve GPU memory. Likewise when using CPU algorithms, GPU accelerated prediction can be enabled by setting ``predictor`` to ``gpu_predictor``.
|
GPU accelerated prediction is enabled by default for the above mentioned ``tree_method`` parameters but can be switched to CPU prediction by setting ``predictor`` to ``cpu_predictor``. This could be useful if you want to conserve GPU memory. Likewise when using CPU algorithms, GPU accelerated prediction can be enabled by setting ``predictor`` to ``gpu_predictor``.
|
||||||
|
|
||||||
The device ordinal (which GPU to use if you have many of them) can be selected using the
|
The device ordinal (which GPU to use if you have many of them) can be selected using the
|
||||||
@@ -69,128 +54,9 @@ See examples `here
|
|||||||
|
|
||||||
Multi-node Multi-GPU Training
|
Multi-node Multi-GPU Training
|
||||||
=============================
|
=============================
|
||||||
XGBoost supports fully distributed GPU training using `Dask <https://dask.org/>`_. For
|
|
||||||
getting started see our tutorial :doc:`/tutorials/dask` and worked examples `here
|
|
||||||
<https://github.com/dmlc/xgboost/tree/master/demo/dask>`__, also Python documentation
|
|
||||||
:ref:`dask_api` for complete reference.
|
|
||||||
|
|
||||||
|
XGBoost supports fully distributed GPU training using `Dask <https://dask.org/>`_, ``Spark`` and ``PySpark``. For getting started with Dask see our tutorial :doc:`/tutorials/dask` and worked examples `here <https://github.com/dmlc/xgboost/tree/master/demo/dask>`__, also Python documentation :ref:`dask_api` for complete reference. For usage with ``Spark`` using Scala see :doc:`/jvm/xgboost4j_spark_gpu_tutorial`. Lastly for distributed GPU training with ``PySpark``, see :doc:`/tutorials/spark_estimator`.
|
||||||
|
|
||||||
Objective functions
|
|
||||||
===================
|
|
||||||
Most of the objective functions implemented in XGBoost can be run on GPU. Following table shows current support status.
|
|
||||||
|
|
||||||
+----------------------+-------------+
|
|
||||||
| Objectives | GPU support |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| reg:squarederror | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| reg:squaredlogerror | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| reg:logistic | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| reg:pseudohubererror | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| binary:logistic | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| binary:logitraw | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| binary:hinge | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| count:poisson | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| reg:gamma | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| reg:tweedie | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| multi:softmax | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| multi:softprob | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| survival:cox | |cross| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| survival:aft | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| rank:pairwise | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| rank:ndcg | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
| rank:map | |tick| |
|
|
||||||
+----------------------+-------------+
|
|
||||||
|
|
||||||
Objective will run on GPU if GPU updater (``gpu_hist``), otherwise they will run on CPU by
|
|
||||||
default. For unsupported objectives XGBoost will fall back to using CPU implementation by
|
|
||||||
default. Note that when using GPU ranking objective, the result is not deterministic due
|
|
||||||
to the non-associative aspect of floating point summation.
|
|
||||||
|
|
||||||
Metric functions
|
|
||||||
===================
|
|
||||||
Following table shows current support status for evaluation metrics on the GPU.
|
|
||||||
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| Metric | GPU Support |
|
|
||||||
+==============================+=============+
|
|
||||||
| rmse | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| rmsle | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| mae | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| mape | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| mphe | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| logloss | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| error | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| merror | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| mlogloss | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| auc | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| aucpr | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| ndcg | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| map | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| poisson-nloglik | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| gamma-nloglik | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| cox-nloglik | |cross| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| aft-nloglik | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| interval-regression-accuracy | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| gamma-deviance | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
| tweedie-nloglik | |tick| |
|
|
||||||
+------------------------------+-------------+
|
|
||||||
|
|
||||||
Similar to objective functions, default device for metrics is selected based on tree
|
|
||||||
updater and predictor (which is selected based on tree updater).
|
|
||||||
|
|
||||||
Benchmarks
|
|
||||||
==========
|
|
||||||
You can run benchmarks on synthetic data for binary classification:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
python tests/benchmark/benchmark_tree.py --tree_method=gpu_hist
|
|
||||||
python tests/benchmark/benchmark_tree.py --tree_method=hist
|
|
||||||
|
|
||||||
Training time on 1,000,000 rows x 50 columns of random data with 500 boosting iterations and 0.25/0.75 test/train split with AMD Ryzen 7 2700 8 core @3.20GHz and NVIDIA 1080ti yields the following results:
|
|
||||||
|
|
||||||
+--------------+----------+
|
|
||||||
| tree_method | Time (s) |
|
|
||||||
+==============+==========+
|
|
||||||
| gpu_hist | 12.57 |
|
|
||||||
+--------------+----------+
|
|
||||||
| hist | 36.01 |
|
|
||||||
+--------------+----------+
|
|
||||||
|
|
||||||
Memory usage
|
Memory usage
|
||||||
============
|
============
|
||||||
@@ -202,7 +68,7 @@ The dataset itself is stored on device in a compressed ELLPACK format. The ELLPA
|
|||||||
|
|
||||||
Working memory is allocated inside the algorithm proportional to the number of rows to keep track of gradients, tree positions and other per row statistics. Memory is allocated for histogram bins proportional to the number of bins, number of features and nodes in the tree. For performance reasons we keep histograms in memory from previous nodes in the tree, when a certain threshold of memory usage is passed we stop doing this to conserve memory at some performance loss.
|
Working memory is allocated inside the algorithm proportional to the number of rows to keep track of gradients, tree positions and other per row statistics. Memory is allocated for histogram bins proportional to the number of bins, number of features and nodes in the tree. For performance reasons we keep histograms in memory from previous nodes in the tree, when a certain threshold of memory usage is passed we stop doing this to conserve memory at some performance loss.
|
||||||
|
|
||||||
If you are getting out-of-memory errors on a big dataset, try the or :py:class:`xgboost.DeviceQuantileDMatrix` or :doc:`external memory version </tutorials/external_memory>`.
|
If you are getting out-of-memory errors on a big dataset, try the or :py:class:`xgboost.QuantileDMatrix` or :doc:`external memory version </tutorials/external_memory>`. Note that when ``external memory`` is used for GPU hist, it's best to employ gradient based sampling as well. Last but not least, ``inplace_predict`` can be preferred over ``predict`` when data is already on GPU. Both ``QuantileDMatrix`` and ``inplace_predict`` are automatically enabled if you are using the scikit-learn interface.
|
||||||
|
|
||||||
Developer notes
|
Developer notes
|
||||||
===============
|
===============
|
||||||
|
|||||||
@@ -44,8 +44,7 @@ General Parameters
|
|||||||
* ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface]
|
* ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface]
|
||||||
|
|
||||||
- When set to True, XGBoost will perform validation of input parameters to check whether
|
- When set to True, XGBoost will perform validation of input parameters to check whether
|
||||||
a parameter is used or not. The feature is still experimental. It's expected to have
|
a parameter is used or not.
|
||||||
some false positives.
|
|
||||||
|
|
||||||
* ``nthread`` [default to maximum number of threads available if not set]
|
* ``nthread`` [default to maximum number of threads available if not set]
|
||||||
|
|
||||||
@@ -233,24 +232,21 @@ Parameters for Categorical Feature
|
|||||||
These parameters are only used for training with categorical data. See
|
These parameters are only used for training with categorical data. See
|
||||||
:doc:`/tutorials/categorical` for more information.
|
:doc:`/tutorials/categorical` for more information.
|
||||||
|
|
||||||
|
.. note:: These parameters are experimental. ``exact`` tree method is not yet supported.
|
||||||
|
|
||||||
|
|
||||||
* ``max_cat_to_onehot``
|
* ``max_cat_to_onehot``
|
||||||
|
|
||||||
.. versionadded:: 1.6.0
|
.. versionadded:: 1.6.0
|
||||||
|
|
||||||
.. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
|
|
||||||
|
|
||||||
- A threshold for deciding whether XGBoost should use one-hot encoding based split for
|
- A threshold for deciding whether XGBoost should use one-hot encoding based split for
|
||||||
categorical data. When number of categories is lesser than the threshold then one-hot
|
categorical data. When number of categories is lesser than the threshold then one-hot
|
||||||
encoding is chosen, otherwise the categories will be partitioned into children nodes.
|
encoding is chosen, otherwise the categories will be partitioned into children nodes.
|
||||||
Only relevant for regression and binary classification. Also, ``exact`` tree method is
|
|
||||||
not supported
|
|
||||||
|
|
||||||
* ``max_cat_threshold``
|
* ``max_cat_threshold``
|
||||||
|
|
||||||
.. versionadded:: 1.7.0
|
.. versionadded:: 1.7.0
|
||||||
|
|
||||||
.. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
|
|
||||||
|
|
||||||
- Maximum number of categories considered for each split. Used only by partition-based
|
- Maximum number of categories considered for each split. Used only by partition-based
|
||||||
splits for preventing over-fitting.
|
splits for preventing over-fitting.
|
||||||
|
|
||||||
|
|||||||
@@ -25,9 +25,6 @@ Core Data Structure
|
|||||||
.. autoclass:: xgboost.QuantileDMatrix
|
.. autoclass:: xgboost.QuantileDMatrix
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
.. autoclass:: xgboost.DeviceQuantileDMatrix
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
.. autoclass:: xgboost.Booster
|
.. autoclass:: xgboost.Booster
|
||||||
:members:
|
:members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
@@ -115,7 +112,7 @@ Dask API
|
|||||||
:inherited-members:
|
:inherited-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
.. autoclass:: xgboost.dask.DaskDeviceQuantileDMatrix
|
.. autoclass:: xgboost.dask.DaskQuantileDMatrix
|
||||||
:members:
|
:members:
|
||||||
:inherited-members:
|
:inherited-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|||||||
@@ -564,7 +564,7 @@ Here are some pratices on reducing memory usage with dask and xgboost.
|
|||||||
nice summary.
|
nice summary.
|
||||||
|
|
||||||
- When using GPU input, like dataframe loaded by ``dask_cudf``, you can try
|
- When using GPU input, like dataframe loaded by ``dask_cudf``, you can try
|
||||||
:py:class:`xgboost.dask.DaskDeviceQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``
|
:py:class:`xgboost.dask.DaskQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``
|
||||||
to reduce overall memory usage. See
|
to reduce overall memory usage. See
|
||||||
:ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example.
|
:ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example.
|
||||||
|
|
||||||
|
|||||||
@@ -83,17 +83,52 @@ generate result dataset with 3 new columns:
|
|||||||
XGBoost PySpark GPU support
|
XGBoost PySpark GPU support
|
||||||
***************************
|
***************************
|
||||||
|
|
||||||
XGBoost PySpark supports GPU training and prediction. To enable GPU support, first you
|
XGBoost PySpark fully supports GPU acceleration. Users are not only able to enable
|
||||||
need to install the XGBoost and the `cuDF <https://docs.rapids.ai/api/cudf/stable/>`_
|
efficient training but also utilize their GPUs for the whole PySpark pipeline including
|
||||||
package. Then you can set `use_gpu` parameter to `True`.
|
ETL and inference. In below sections, we will walk through an example of training on a
|
||||||
|
PySpark standalone GPU cluster. To get started, first we need to install some additional
|
||||||
|
packages, then we can set the `use_gpu` parameter to `True`.
|
||||||
|
|
||||||
Below tutorial demonstrates how to train a model with XGBoost PySpark GPU on Spark
|
Prepare the necessary packages
|
||||||
standalone cluster.
|
==============================
|
||||||
|
|
||||||
|
Aside from the PySpark and XGBoost modules, we also need the `cuDF
|
||||||
|
<https://docs.rapids.ai/api/cudf/stable/>`_ package for handling Spark dataframe. We
|
||||||
|
recommend using either Conda or Virtualenv to manage python dependencies for PySpark
|
||||||
|
jobs. Please refer to `How to Manage Python Dependencies in PySpark
|
||||||
|
<https://www.databricks.com/blog/2020/12/22/how-to-manage-python-dependencies-in-pyspark.html>`_
|
||||||
|
for more details on PySpark dependency management.
|
||||||
|
|
||||||
|
In short, to create a Python environment that can be sent to a remote cluster using
|
||||||
|
virtualenv and pip:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
python -m venv xgboost_env
|
||||||
|
source xgboost_env/bin/activate
|
||||||
|
pip install pyarrow pandas venv-pack xgboost
|
||||||
|
# https://rapids.ai/pip.html#install
|
||||||
|
pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
|
||||||
|
venv-pack -o xgboost_env.tar.gz
|
||||||
|
|
||||||
|
With Conda:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
conda create -y -n xgboost_env -c conda-forge conda-pack python=3.9
|
||||||
|
conda activate xgboost_env
|
||||||
|
# use conda when the supported version of xgboost (1.7) is released on conda-forge
|
||||||
|
pip install xgboost
|
||||||
|
conda install cudf pyarrow pandas -c rapids -c nvidia -c conda-forge
|
||||||
|
conda pack -f -o xgboost_env.tar.gz
|
||||||
|
|
||||||
|
|
||||||
Write your PySpark application
|
Write your PySpark application
|
||||||
==============================
|
==============================
|
||||||
|
|
||||||
|
Below snippet is a small example for training xgboost model with PySpark. Notice that we are
|
||||||
|
using a list of feature names and the additional parameter ``use_gpu``:
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from xgboost.spark import SparkXGBRegressor
|
from xgboost.spark import SparkXGBRegressor
|
||||||
@@ -127,26 +162,11 @@ Write your PySpark application
|
|||||||
predict_df = model.transform(test_df)
|
predict_df = model.transform(test_df)
|
||||||
predict_df.show()
|
predict_df.show()
|
||||||
|
|
||||||
Prepare the necessary packages
|
|
||||||
==============================
|
|
||||||
|
|
||||||
We recommend using Conda or Virtualenv to manage python dependencies
|
|
||||||
in PySpark. Please refer to
|
|
||||||
`How to Manage Python Dependencies in PySpark <https://www.databricks.com/blog/2020/12/22/how-to-manage-python-dependencies-in-pyspark.html>`_.
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
conda create -y -n xgboost-env -c conda-forge conda-pack python=3.9
|
|
||||||
conda activate xgboost-env
|
|
||||||
pip install xgboost
|
|
||||||
conda install cudf -c rapids -c nvidia -c conda-forge
|
|
||||||
conda pack -f -o xgboost-env.tar.gz
|
|
||||||
|
|
||||||
|
|
||||||
Submit the PySpark application
|
Submit the PySpark application
|
||||||
==============================
|
==============================
|
||||||
|
|
||||||
Assuming you have configured your Spark cluster with GPU support, if not yet, please
|
Assuming you have configured your Spark cluster with GPU support. Otherwise, please
|
||||||
refer to `spark standalone configuration with GPU support <https://nvidia.github.io/spark-rapids/docs/get-started/getting-started-on-prem.html#spark-standalone-cluster>`_.
|
refer to `spark standalone configuration with GPU support <https://nvidia.github.io/spark-rapids/docs/get-started/getting-started-on-prem.html#spark-standalone-cluster>`_.
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
@@ -158,10 +178,13 @@ refer to `spark standalone configuration with GPU support <https://nvidia.github
|
|||||||
--master spark://<master-ip>:7077 \
|
--master spark://<master-ip>:7077 \
|
||||||
--conf spark.executor.resource.gpu.amount=1 \
|
--conf spark.executor.resource.gpu.amount=1 \
|
||||||
--conf spark.task.resource.gpu.amount=1 \
|
--conf spark.task.resource.gpu.amount=1 \
|
||||||
--archives xgboost-env.tar.gz#environment \
|
--archives xgboost_env.tar.gz#environment \
|
||||||
xgboost_app.py
|
xgboost_app.py
|
||||||
|
|
||||||
|
|
||||||
|
The submit command sends the Python environment created by pip or conda along with the
|
||||||
|
specification of GPU allocation. We will revisit this command later on.
|
||||||
|
|
||||||
Model Persistence
|
Model Persistence
|
||||||
=================
|
=================
|
||||||
|
|
||||||
@@ -186,26 +209,27 @@ To export the underlying booster model used by XGBoost:
|
|||||||
# the same booster object returned by xgboost.train
|
# the same booster object returned by xgboost.train
|
||||||
booster: xgb.Booster = model.get_booster()
|
booster: xgb.Booster = model.get_booster()
|
||||||
booster.predict(...)
|
booster.predict(...)
|
||||||
booster.save_model("model.json")
|
booster.save_model("model.json") # or model.ubj, depending on your choice of format.
|
||||||
|
|
||||||
This booster is shared by other Python interfaces and can be used by other language
|
This booster is not only shared by other Python interfaces but also used by all the
|
||||||
bindings like the C and R packages. Lastly, one can extract a booster file directly from
|
XGBoost bindings including the C, Java, and the R package. Lastly, one can extract the
|
||||||
saved spark estimator without going through the getter:
|
booster file directly from a saved spark estimator without going through the getter:
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
bst = xgb.Booster()
|
bst = xgb.Booster()
|
||||||
|
# Loading the model saved in previous snippet
|
||||||
bst.load_model("/tmp/xgboost-pyspark-model/model/part-00000")
|
bst.load_model("/tmp/xgboost-pyspark-model/model/part-00000")
|
||||||
|
|
||||||
Accelerate the whole pipeline of xgboost pyspark
|
|
||||||
================================================
|
|
||||||
|
|
||||||
With `RAPIDS Accelerator for Apache Spark <https://nvidia.github.io/spark-rapids/>`_,
|
Accelerate the whole pipeline for xgboost pyspark
|
||||||
you can accelerate the whole pipeline (ETL, Train, Transform) for xgboost pyspark
|
=================================================
|
||||||
without any code change by leveraging GPU.
|
|
||||||
|
|
||||||
Below is a simple example submit command for enabling GPU acceleration:
|
With `RAPIDS Accelerator for Apache Spark <https://nvidia.github.io/spark-rapids/>`_, you
|
||||||
|
can leverage GPUs to accelerate the whole pipeline (ETL, Train, Transform) for xgboost
|
||||||
|
pyspark without any Python code change. An example submit command is shown below with
|
||||||
|
additional spark configurations and dependencies:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
@@ -219,8 +243,9 @@ Below is a simple example submit command for enabling GPU acceleration:
|
|||||||
--packages com.nvidia:rapids-4-spark_2.12:22.08.0 \
|
--packages com.nvidia:rapids-4-spark_2.12:22.08.0 \
|
||||||
--conf spark.plugins=com.nvidia.spark.SQLPlugin \
|
--conf spark.plugins=com.nvidia.spark.SQLPlugin \
|
||||||
--conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \
|
--conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \
|
||||||
--archives xgboost-env.tar.gz#environment \
|
--archives xgboost_env.tar.gz#environment \
|
||||||
xgboost_app.py
|
xgboost_app.py
|
||||||
|
|
||||||
When rapids plugin is enabled, both of the JVM rapids plugin and the cuDF Python are
|
When rapids plugin is enabled, both of the JVM rapids plugin and the cuDF Python package
|
||||||
required for the acceleration.
|
are required. More configuration options can be found in the RAPIDS link above along with
|
||||||
|
details on the plugin.
|
||||||
|
|||||||
Submodule gputreeshap updated: acb5be3c17...787259b412
@@ -287,11 +287,22 @@ class TCPSocket {
|
|||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
return domain_;
|
return domain_;
|
||||||
#elif defined(__unix__)
|
#elif defined(__unix__)
|
||||||
|
#ifndef __PASE__
|
||||||
std::int32_t domain;
|
std::int32_t domain;
|
||||||
socklen_t len = sizeof(domain);
|
socklen_t len = sizeof(domain);
|
||||||
xgboost_CHECK_SYS_CALL(
|
xgboost_CHECK_SYS_CALL(
|
||||||
getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast<char *>(&domain), &len), 0);
|
getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast<char *>(&domain), &len), 0);
|
||||||
return ret_iafamily(domain);
|
return ret_iafamily(domain);
|
||||||
|
#else
|
||||||
|
struct sockaddr sa;
|
||||||
|
socklen_t sizeofsa = sizeof(sa);
|
||||||
|
xgboost_CHECK_SYS_CALL(
|
||||||
|
getsockname(handle_, &sa, &sizeofsa), 0);
|
||||||
|
if (sizeofsa < sizeof(uchar_t)*2) {
|
||||||
|
return ret_iafamily(AF_INET);
|
||||||
|
}
|
||||||
|
return ret_iafamily(sa.sa_family);
|
||||||
|
#endif // __PASE__
|
||||||
#else
|
#else
|
||||||
LOG(FATAL) << "Unknown platform.";
|
LOG(FATAL) << "Unknown platform.";
|
||||||
return ret_iafamily(AF_INET);
|
return ret_iafamily(AF_INET);
|
||||||
|
|||||||
@@ -6,6 +6,6 @@
|
|||||||
|
|
||||||
#define XGBOOST_VER_MAJOR 1
|
#define XGBOOST_VER_MAJOR 1
|
||||||
#define XGBOOST_VER_MINOR 7
|
#define XGBOOST_VER_MINOR 7
|
||||||
#define XGBOOST_VER_PATCH 0
|
#define XGBOOST_VER_PATCH 2
|
||||||
|
|
||||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#include <jni.h>
|
#include <jni.h>
|
||||||
#include <thrust/system/cuda/experimental/pinned_allocator.h>
|
|
||||||
|
|
||||||
#include "../../../../src/common/device_helpers.cuh"
|
#include "../../../../src/common/device_helpers.cuh"
|
||||||
|
#include "../../../../src/common/cuda_pinned_allocator.h"
|
||||||
#include "../../../../src/data/array_interface.h"
|
#include "../../../../src/data/array_interface.h"
|
||||||
#include "jvm_utils.h"
|
#include "jvm_utils.h"
|
||||||
#include <xgboost/c_api.h>
|
#include <xgboost/c_api.h>
|
||||||
@@ -131,7 +131,7 @@ class DataIteratorProxy {
|
|||||||
bool cache_on_host_{true}; // TODO(Bobby): Make this optional.
|
bool cache_on_host_{true}; // TODO(Bobby): Make this optional.
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
using Alloc = thrust::system::cuda::experimental::pinned_allocator<T>;
|
using Alloc = xgboost::common::cuda::pinned_allocator<T>;
|
||||||
template <typename U>
|
template <typename U>
|
||||||
using HostVector = std::vector<U, Alloc<U>>;
|
using HostVector = std::vector<U, Alloc<U>>;
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>1.7.0-SNAPSHOT</version>
|
<version>1.7.2</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
1.7.0-dev
|
1.7.2
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ Contributors: https://github.com/dmlc/xgboost/blob/master/CONTRIBUTORS.md
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from . import tracker # noqa
|
from . import tracker # noqa
|
||||||
from . import collective, dask
|
from . import collective, dask, rabit
|
||||||
from .core import (
|
from .core import (
|
||||||
Booster,
|
Booster,
|
||||||
DataIter,
|
DataIter,
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ except ImportError:
|
|||||||
pandas_concat = None
|
pandas_concat = None
|
||||||
PANDAS_INSTALLED = False
|
PANDAS_INSTALLED = False
|
||||||
|
|
||||||
|
|
||||||
# sklearn
|
# sklearn
|
||||||
try:
|
try:
|
||||||
from sklearn.base import BaseEstimator as XGBModelBase
|
from sklearn.base import BaseEstimator as XGBModelBase
|
||||||
@@ -72,6 +73,22 @@ except ImportError:
|
|||||||
XGBStratifiedKFold = None
|
XGBStratifiedKFold = None
|
||||||
|
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def is_cudf_available() -> bool:
|
||||||
|
"""Check cuDF package available or not"""
|
||||||
|
if importlib.util.find_spec("cudf") is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
import cudf
|
||||||
|
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
_logger.exception("Importing cuDF failed, use DMatrix instead of QDM")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class XGBoostLabelEncoder(LabelEncoder):
|
class XGBoostLabelEncoder(LabelEncoder):
|
||||||
"""Label encoder with JSON serialization methods."""
|
"""Label encoder with JSON serialization methods."""
|
||||||
|
|
||||||
|
|||||||
@@ -237,6 +237,7 @@ Error message(s): {os_error_list}
|
|||||||
"""Avoid dependency on packaging (PEP 440)."""
|
"""Avoid dependency on packaging (PEP 440)."""
|
||||||
# 2.0.0-dev or 2.0.0
|
# 2.0.0-dev or 2.0.0
|
||||||
major, minor, patch = ver.split("-")[0].split(".")
|
major, minor, patch = ver.split("-")[0].split(".")
|
||||||
|
patch = patch.split("rc")[0] # 2.0.0rc1
|
||||||
return int(major), int(minor), int(patch)
|
return int(major), int(minor), int(patch)
|
||||||
|
|
||||||
libver = _lib_version(lib)
|
libver = _lib_version(lib)
|
||||||
@@ -2307,7 +2308,7 @@ class Booster:
|
|||||||
_array_interface(csr.indptr),
|
_array_interface(csr.indptr),
|
||||||
_array_interface(csr.indices),
|
_array_interface(csr.indices),
|
||||||
_array_interface(csr.data),
|
_array_interface(csr.data),
|
||||||
ctypes.c_size_t(csr.shape[1]),
|
c_bst_ulong(csr.shape[1]),
|
||||||
from_pystr_to_cstr(json.dumps(args)),
|
from_pystr_to_cstr(json.dumps(args)),
|
||||||
p_handle,
|
p_handle,
|
||||||
ctypes.byref(shape),
|
ctypes.byref(shape),
|
||||||
|
|||||||
@@ -853,7 +853,7 @@ async def _get_rabit_args(
|
|||||||
sched_addr = None
|
sched_addr = None
|
||||||
|
|
||||||
# make sure all workers are online so that we can obtain reliable scheduler_info
|
# make sure all workers are online so that we can obtain reliable scheduler_info
|
||||||
client.wait_for_workers(n_workers)
|
await client.wait_for_workers(n_workers) # type: ignore
|
||||||
env = await client.run_on_scheduler(
|
env = await client.run_on_scheduler(
|
||||||
_start_tracker, n_workers, sched_addr, user_addr
|
_start_tracker, n_workers, sched_addr, user_addr
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ def _from_scipy_csr(
|
|||||||
_array_interface(data.indptr),
|
_array_interface(data.indptr),
|
||||||
_array_interface(data.indices),
|
_array_interface(data.indices),
|
||||||
_array_interface(data.data),
|
_array_interface(data.data),
|
||||||
ctypes.c_size_t(data.shape[1]),
|
c_bst_ulong(data.shape[1]),
|
||||||
config,
|
config,
|
||||||
ctypes.byref(handle),
|
ctypes.byref(handle),
|
||||||
)
|
)
|
||||||
|
|||||||
168
python-package/xgboost/rabit.py
Normal file
168
python-package/xgboost/rabit.py
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
"""Compatibility shim for xgboost.rabit; to be removed in 2.0"""
|
||||||
|
import logging
|
||||||
|
import warnings
|
||||||
|
from enum import IntEnum, unique
|
||||||
|
from typing import Any, TypeVar, Callable, Optional, List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from . import collective
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger("[xgboost.rabit]")
|
||||||
|
|
||||||
|
|
||||||
|
def _deprecation_warning() -> str:
|
||||||
|
return (
|
||||||
|
"The xgboost.rabit submodule is marked as deprecated in 1.7 and will be removed "
|
||||||
|
"in 2.0. Please use xgboost.collective instead."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def init(args: Optional[List[bytes]] = None) -> None:
|
||||||
|
"""Initialize the rabit library with arguments"""
|
||||||
|
warnings.warn(_deprecation_warning(), FutureWarning)
|
||||||
|
parsed = {}
|
||||||
|
if args:
|
||||||
|
for arg in args:
|
||||||
|
kv = arg.decode().split('=')
|
||||||
|
if len(kv) == 2:
|
||||||
|
parsed[kv[0]] = kv[1]
|
||||||
|
collective.init(**parsed)
|
||||||
|
|
||||||
|
|
||||||
|
def finalize() -> None:
|
||||||
|
"""Finalize the process, notify tracker everything is done."""
|
||||||
|
collective.finalize()
|
||||||
|
|
||||||
|
|
||||||
|
def get_rank() -> int:
|
||||||
|
"""Get rank of current process.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
rank : int
|
||||||
|
Rank of current process.
|
||||||
|
"""
|
||||||
|
return collective.get_rank()
|
||||||
|
|
||||||
|
|
||||||
|
def get_world_size() -> int:
|
||||||
|
"""Get total number workers.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
n : int
|
||||||
|
Total number of process.
|
||||||
|
"""
|
||||||
|
return collective.get_world_size()
|
||||||
|
|
||||||
|
|
||||||
|
def is_distributed() -> int:
|
||||||
|
"""If rabit is distributed."""
|
||||||
|
return collective.is_distributed()
|
||||||
|
|
||||||
|
|
||||||
|
def tracker_print(msg: Any) -> None:
|
||||||
|
"""Print message to the tracker.
|
||||||
|
This function can be used to communicate the information of
|
||||||
|
the progress to the tracker
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
msg : str
|
||||||
|
The message to be printed to tracker.
|
||||||
|
"""
|
||||||
|
collective.communicator_print(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def get_processor_name() -> bytes:
|
||||||
|
"""Get the processor name.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
name : str
|
||||||
|
the name of processor(host)
|
||||||
|
"""
|
||||||
|
return collective.get_processor_name().encode()
|
||||||
|
|
||||||
|
|
||||||
|
T = TypeVar("T") # pylint:disable=invalid-name
|
||||||
|
|
||||||
|
|
||||||
|
def broadcast(data: T, root: int) -> T:
|
||||||
|
"""Broadcast object from one node to all other nodes.
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data : any type that can be pickled
|
||||||
|
Input data, if current rank does not equal root, this can be None
|
||||||
|
root : int
|
||||||
|
Rank of the node to broadcast data from.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
object : int
|
||||||
|
the result of broadcast.
|
||||||
|
"""
|
||||||
|
return collective.broadcast(data, root)
|
||||||
|
|
||||||
|
|
||||||
|
@unique
|
||||||
|
class Op(IntEnum):
|
||||||
|
"""Supported operations for rabit."""
|
||||||
|
MAX = 0
|
||||||
|
MIN = 1
|
||||||
|
SUM = 2
|
||||||
|
OR = 3
|
||||||
|
|
||||||
|
|
||||||
|
def allreduce( # pylint:disable=invalid-name
|
||||||
|
data: np.ndarray, op: Op, prepare_fun: Optional[Callable[[np.ndarray], None]] = None
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""Perform allreduce, return the result.
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data :
|
||||||
|
Input data.
|
||||||
|
op :
|
||||||
|
Reduction operators, can be MIN, MAX, SUM, BITOR
|
||||||
|
prepare_fun :
|
||||||
|
Lazy preprocessing function, if it is not None, prepare_fun(data)
|
||||||
|
will be called by the function before performing allreduce, to initialize the data
|
||||||
|
If the result of Allreduce can be recovered directly,
|
||||||
|
then prepare_fun will NOT be called
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
result :
|
||||||
|
The result of allreduce, have same shape as data
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
This function is not thread-safe.
|
||||||
|
"""
|
||||||
|
if prepare_fun is None:
|
||||||
|
return collective.allreduce(data, collective.Op(op))
|
||||||
|
raise Exception("preprocessing function is no longer supported")
|
||||||
|
|
||||||
|
|
||||||
|
def version_number() -> int:
|
||||||
|
"""Returns version number of current stored model.
|
||||||
|
This means how many calls to CheckPoint we made so far.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
version : int
|
||||||
|
Version number of currently stored model
|
||||||
|
"""
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class RabitContext:
|
||||||
|
"""A context controlling rabit initialization and finalization."""
|
||||||
|
|
||||||
|
def __init__(self, args: List[bytes] = None) -> None:
|
||||||
|
if args is None:
|
||||||
|
args = []
|
||||||
|
self.args = args
|
||||||
|
|
||||||
|
def __enter__(self) -> None:
|
||||||
|
init(self.args)
|
||||||
|
assert is_distributed()
|
||||||
|
LOGGER.warning(_deprecation_warning())
|
||||||
|
LOGGER.debug("-------------- rabit say hello ------------------")
|
||||||
|
|
||||||
|
def __exit__(self, *args: List) -> None:
|
||||||
|
finalize()
|
||||||
|
LOGGER.debug("--------------- rabit say bye ------------------")
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
# type: ignore
|
# type: ignore
|
||||||
"""Xgboost pyspark integration submodule for core code."""
|
"""Xgboost pyspark integration submodule for core code."""
|
||||||
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
|
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
|
||||||
# pylint: disable=too-few-public-methods, too-many-lines
|
# pylint: disable=too-few-public-methods, too-many-lines, too-many-branches
|
||||||
import json
|
import json
|
||||||
from typing import Iterator, Optional, Tuple
|
from typing import Iterator, Optional, Tuple
|
||||||
|
|
||||||
@@ -32,6 +32,7 @@ from pyspark.sql.types import (
|
|||||||
ShortType,
|
ShortType,
|
||||||
)
|
)
|
||||||
from scipy.special import expit, softmax # pylint: disable=no-name-in-module
|
from scipy.special import expit, softmax # pylint: disable=no-name-in-module
|
||||||
|
from xgboost.compat import is_cudf_available
|
||||||
from xgboost.core import Booster
|
from xgboost.core import Booster
|
||||||
from xgboost.training import train as worker_train
|
from xgboost.training import train as worker_train
|
||||||
|
|
||||||
@@ -728,6 +729,10 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
else:
|
else:
|
||||||
dataset = dataset.repartition(num_workers)
|
dataset = dataset.repartition(num_workers)
|
||||||
|
|
||||||
|
if self.isDefined(self.qid_col) and self.getOrDefault(self.qid_col):
|
||||||
|
# XGBoost requires qid to be sorted for each partition
|
||||||
|
dataset = dataset.sortWithinPartitions(alias.qid, ascending=True)
|
||||||
|
|
||||||
train_params = self._get_distributed_train_params(dataset)
|
train_params = self._get_distributed_train_params(dataset)
|
||||||
booster_params, train_call_kwargs_params = self._get_xgb_train_call_args(
|
booster_params, train_call_kwargs_params = self._get_xgb_train_call_args(
|
||||||
train_params
|
train_params
|
||||||
@@ -755,7 +760,8 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
k: v for k, v in train_call_kwargs_params.items() if v is not None
|
k: v for k, v in train_call_kwargs_params.items() if v is not None
|
||||||
}
|
}
|
||||||
dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None}
|
dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None}
|
||||||
use_qdm = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
|
|
||||||
|
use_hist = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
|
||||||
|
|
||||||
def _train_booster(pandas_df_iter):
|
def _train_booster(pandas_df_iter):
|
||||||
"""Takes in an RDD partition and outputs a booster for that partition after
|
"""Takes in an RDD partition and outputs a booster for that partition after
|
||||||
@@ -769,6 +775,15 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
|
|
||||||
gpu_id = None
|
gpu_id = None
|
||||||
|
|
||||||
|
# If cuDF is not installed, then using DMatrix instead of QDM,
|
||||||
|
# because without cuDF, DMatrix performs better than QDM.
|
||||||
|
# Note: Checking `is_cudf_available` in spark worker side because
|
||||||
|
# spark worker might has different python environment with driver side.
|
||||||
|
if use_gpu:
|
||||||
|
use_qdm = use_hist and is_cudf_available()
|
||||||
|
else:
|
||||||
|
use_qdm = use_hist
|
||||||
|
|
||||||
if use_qdm and (booster_params.get("max_bin", None) is not None):
|
if use_qdm and (booster_params.get("max_bin", None) is not None):
|
||||||
dmatrix_kwargs["max_bin"] = booster_params["max_bin"]
|
dmatrix_kwargs["max_bin"] = booster_params["max_bin"]
|
||||||
|
|
||||||
|
|||||||
91
src/common/cuda_pinned_allocator.h
Normal file
91
src/common/cuda_pinned_allocator.h
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
/*!
|
||||||
|
* Copyright 2022 by XGBoost Contributors
|
||||||
|
* \file common.h
|
||||||
|
* \brief cuda pinned allocator for usage with thrust containers
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
namespace common {
|
||||||
|
namespace cuda {
|
||||||
|
|
||||||
|
// \p pinned_allocator is a CUDA-specific host memory allocator
|
||||||
|
// that employs \c cudaMallocHost for allocation.
|
||||||
|
//
|
||||||
|
// This implementation is ported from the experimental/pinned_allocator
|
||||||
|
// that Thrust used to provide.
|
||||||
|
//
|
||||||
|
// \see https://en.cppreference.com/w/cpp/memory/allocator
|
||||||
|
template <typename T>
|
||||||
|
class pinned_allocator;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class pinned_allocator<void> {
|
||||||
|
public:
|
||||||
|
using value_type = void; // NOLINT: The type of the elements in the allocator
|
||||||
|
using pointer = void*; // NOLINT: The type returned by address() / allocate()
|
||||||
|
using const_pointer = const void*; // NOLINT: The type returned by address()
|
||||||
|
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
|
||||||
|
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
struct rebind { // NOLINT
|
||||||
|
using other = pinned_allocator<U>; // NOLINT: The rebound type
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class pinned_allocator {
|
||||||
|
public:
|
||||||
|
using value_type = T; // NOLINT: The type of the elements in the allocator
|
||||||
|
using pointer = T*; // NOLINT: The type returned by address() / allocate()
|
||||||
|
using const_pointer = const T*; // NOLINT: The type returned by address()
|
||||||
|
using reference = T&; // NOLINT: The parameter type for address()
|
||||||
|
using const_reference = const T&; // NOLINT: The parameter type for address()
|
||||||
|
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
|
||||||
|
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
struct rebind { // NOLINT
|
||||||
|
using other = pinned_allocator<U>; // NOLINT: The rebound type
|
||||||
|
};
|
||||||
|
|
||||||
|
XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions
|
||||||
|
XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions
|
||||||
|
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions
|
||||||
|
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator<U> const&) {} // NOLINT
|
||||||
|
|
||||||
|
XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT
|
||||||
|
XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT
|
||||||
|
|
||||||
|
inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT
|
||||||
|
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
|
||||||
|
|
||||||
|
pointer result(nullptr);
|
||||||
|
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
|
||||||
|
|
||||||
|
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT
|
||||||
|
|
||||||
|
XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; }
|
||||||
|
|
||||||
|
XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const {
|
||||||
|
return !operator==(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace cuda
|
||||||
|
} // namespace common
|
||||||
|
} // namespace xgboost
|
||||||
@@ -101,7 +101,7 @@ class ArrayInterfaceHandler {
|
|||||||
template <typename PtrType>
|
template <typename PtrType>
|
||||||
static PtrType GetPtrFromArrayData(Object::Map const &obj) {
|
static PtrType GetPtrFromArrayData(Object::Map const &obj) {
|
||||||
auto data_it = obj.find("data");
|
auto data_it = obj.find("data");
|
||||||
if (data_it == obj.cend()) {
|
if (data_it == obj.cend() || IsA<Null>(data_it->second)) {
|
||||||
LOG(FATAL) << "Empty data passed in.";
|
LOG(FATAL) << "Empty data passed in.";
|
||||||
}
|
}
|
||||||
auto p_data = reinterpret_cast<PtrType>(
|
auto p_data = reinterpret_cast<PtrType>(
|
||||||
@@ -111,7 +111,7 @@ class ArrayInterfaceHandler {
|
|||||||
|
|
||||||
static void Validate(Object::Map const &array) {
|
static void Validate(Object::Map const &array) {
|
||||||
auto version_it = array.find("version");
|
auto version_it = array.find("version");
|
||||||
if (version_it == array.cend()) {
|
if (version_it == array.cend() || IsA<Null>(version_it->second)) {
|
||||||
LOG(FATAL) << "Missing `version' field for array interface";
|
LOG(FATAL) << "Missing `version' field for array interface";
|
||||||
}
|
}
|
||||||
if (get<Integer const>(version_it->second) > 3) {
|
if (get<Integer const>(version_it->second) > 3) {
|
||||||
@@ -119,17 +119,19 @@ class ArrayInterfaceHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto typestr_it = array.find("typestr");
|
auto typestr_it = array.find("typestr");
|
||||||
if (typestr_it == array.cend()) {
|
if (typestr_it == array.cend() || IsA<Null>(typestr_it->second)) {
|
||||||
LOG(FATAL) << "Missing `typestr' field for array interface";
|
LOG(FATAL) << "Missing `typestr' field for array interface";
|
||||||
}
|
}
|
||||||
|
|
||||||
auto typestr = get<String const>(typestr_it->second);
|
auto typestr = get<String const>(typestr_it->second);
|
||||||
CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();
|
CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();
|
||||||
|
|
||||||
if (array.find("shape") == array.cend()) {
|
auto shape_it = array.find("shape");
|
||||||
|
if (shape_it == array.cend() || IsA<Null>(shape_it->second)) {
|
||||||
LOG(FATAL) << "Missing `shape' field for array interface";
|
LOG(FATAL) << "Missing `shape' field for array interface";
|
||||||
}
|
}
|
||||||
if (array.find("data") == array.cend()) {
|
auto data_it = array.find("data");
|
||||||
|
if (data_it == array.cend() || IsA<Null>(data_it->second)) {
|
||||||
LOG(FATAL) << "Missing `data' field for array interface";
|
LOG(FATAL) << "Missing `data' field for array interface";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -139,8 +141,9 @@ class ArrayInterfaceHandler {
|
|||||||
static size_t ExtractMask(Object::Map const &column,
|
static size_t ExtractMask(Object::Map const &column,
|
||||||
common::Span<RBitField8::value_type> *p_out) {
|
common::Span<RBitField8::value_type> *p_out) {
|
||||||
auto &s_mask = *p_out;
|
auto &s_mask = *p_out;
|
||||||
if (column.find("mask") != column.cend()) {
|
auto const &mask_it = column.find("mask");
|
||||||
auto const &j_mask = get<Object const>(column.at("mask"));
|
if (mask_it != column.cend() && !IsA<Null>(mask_it->second)) {
|
||||||
|
auto const &j_mask = get<Object const>(mask_it->second);
|
||||||
Validate(j_mask);
|
Validate(j_mask);
|
||||||
|
|
||||||
auto p_mask = GetPtrFromArrayData<RBitField8::value_type *>(j_mask);
|
auto p_mask = GetPtrFromArrayData<RBitField8::value_type *>(j_mask);
|
||||||
@@ -173,8 +176,9 @@ class ArrayInterfaceHandler {
|
|||||||
// assume 1 byte alignment.
|
// assume 1 byte alignment.
|
||||||
size_t const span_size = RBitField8::ComputeStorageSize(n_bits);
|
size_t const span_size = RBitField8::ComputeStorageSize(n_bits);
|
||||||
|
|
||||||
if (j_mask.find("strides") != j_mask.cend()) {
|
auto strides_it = j_mask.find("strides");
|
||||||
auto strides = get<Array const>(column.at("strides"));
|
if (strides_it != j_mask.cend() && !IsA<Null>(strides_it->second)) {
|
||||||
|
auto strides = get<Array const>(strides_it->second);
|
||||||
CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1);
|
CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1);
|
||||||
CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous();
|
CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous();
|
||||||
}
|
}
|
||||||
@@ -401,7 +405,9 @@ class ArrayInterface {
|
|||||||
<< "XGBoost doesn't support internal broadcasting.";
|
<< "XGBoost doesn't support internal broadcasting.";
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
CHECK(array.find("mask") == array.cend()) << "Masked array is not yet supported.";
|
auto mask_it = array.find("mask");
|
||||||
|
CHECK(mask_it == array.cend() || IsA<Null>(mask_it->second))
|
||||||
|
<< "Masked array is not yet supported.";
|
||||||
}
|
}
|
||||||
|
|
||||||
auto stream_it = array.find("stream");
|
auto stream_it = array.find("stream");
|
||||||
|
|||||||
@@ -67,6 +67,10 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
|||||||
auto pinned = pinned_pool.GetSpan<char>(sizeof(size_t) + sizeof(bst_node_t));
|
auto pinned = pinned_pool.GetSpan<char>(sizeof(size_t) + sizeof(bst_node_t));
|
||||||
dh::CUDAStream copy_stream;
|
dh::CUDAStream copy_stream;
|
||||||
size_t* h_num_runs = reinterpret_cast<size_t*>(pinned.subspan(0, sizeof(size_t)).data());
|
size_t* h_num_runs = reinterpret_cast<size_t*>(pinned.subspan(0, sizeof(size_t)).data());
|
||||||
|
|
||||||
|
dh::CUDAEvent e;
|
||||||
|
e.Record(dh::DefaultStream());
|
||||||
|
copy_stream.View().Wait(e);
|
||||||
// flag for whether there's ignored position
|
// flag for whether there's ignored position
|
||||||
bst_node_t* h_first_unique =
|
bst_node_t* h_first_unique =
|
||||||
reinterpret_cast<bst_node_t*>(pinned.subspan(sizeof(size_t), sizeof(bst_node_t)).data());
|
reinterpret_cast<bst_node_t*>(pinned.subspan(sizeof(size_t), sizeof(bst_node_t)).data());
|
||||||
|
|||||||
@@ -3,10 +3,10 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef EVALUATE_SPLITS_CUH_
|
#ifndef EVALUATE_SPLITS_CUH_
|
||||||
#define EVALUATE_SPLITS_CUH_
|
#define EVALUATE_SPLITS_CUH_
|
||||||
#include <thrust/system/cuda/experimental/pinned_allocator.h>
|
|
||||||
#include <xgboost/span.h>
|
#include <xgboost/span.h>
|
||||||
|
|
||||||
#include "../../common/categorical.h"
|
#include "../../common/categorical.h"
|
||||||
|
#include "../../common/cuda_pinned_allocator.h"
|
||||||
#include "../split_evaluator.h"
|
#include "../split_evaluator.h"
|
||||||
#include "../updater_gpu_common.cuh"
|
#include "../updater_gpu_common.cuh"
|
||||||
#include "expand_entry.cuh"
|
#include "expand_entry.cuh"
|
||||||
@@ -57,7 +57,7 @@ struct CatAccessor {
|
|||||||
class GPUHistEvaluator {
|
class GPUHistEvaluator {
|
||||||
using CatST = common::CatBitField::value_type; // categorical storage type
|
using CatST = common::CatBitField::value_type; // categorical storage type
|
||||||
// use pinned memory to stage the categories, used for sort based splits.
|
// use pinned memory to stage the categories, used for sort based splits.
|
||||||
using Alloc = thrust::system::cuda::experimental::pinned_allocator<CatST>;
|
using Alloc = xgboost::common::cuda::pinned_allocator<CatST>;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TreeEvaluator tree_evaluator_;
|
TreeEvaluator tree_evaluator_;
|
||||||
|
|||||||
11
tests/ci_build/conda_env/cpp_test.yml
Normal file
11
tests/ci_build/conda_env/cpp_test.yml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# conda environment for CPP test on Linux distributions
|
||||||
|
name: cpp_test
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- cmake
|
||||||
|
- ninja
|
||||||
|
- c-compiler
|
||||||
|
- cxx-compiler
|
||||||
|
- gtest
|
||||||
13
tests/ci_build/conda_env/sdist_test.yml
Normal file
13
tests/ci_build/conda_env/sdist_test.yml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# conda environment for source distribution test.
|
||||||
|
name: sdist_test
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- python=3.8
|
||||||
|
- pip
|
||||||
|
- wheel
|
||||||
|
- cmake
|
||||||
|
- ninja
|
||||||
|
- c-compiler
|
||||||
|
- cxx-compiler
|
||||||
@@ -33,9 +33,8 @@ TEST(ArrayInterface, Error) {
|
|||||||
Json column { Object() };
|
Json column { Object() };
|
||||||
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
||||||
column["shape"] = Array(j_shape);
|
column["shape"] = Array(j_shape);
|
||||||
std::vector<Json> j_data {
|
std::vector<Json> j_data{Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),
|
||||||
Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),
|
Json(Boolean(false))};
|
||||||
Json(Boolean(false))};
|
|
||||||
|
|
||||||
auto const& column_obj = get<Object>(column);
|
auto const& column_obj = get<Object>(column);
|
||||||
std::string typestr{"<f4"};
|
std::string typestr{"<f4"};
|
||||||
@@ -45,6 +44,10 @@ TEST(ArrayInterface, Error) {
|
|||||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), dmlc::Error);
|
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), dmlc::Error);
|
||||||
column["version"] = 3;
|
column["version"] = 3;
|
||||||
// missing data
|
// missing data
|
||||||
|
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
|
||||||
|
dmlc::Error);
|
||||||
|
// null data
|
||||||
|
column["data"] = Null{};
|
||||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
|
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
|
||||||
dmlc::Error);
|
dmlc::Error);
|
||||||
column["data"] = j_data;
|
column["data"] = j_data;
|
||||||
@@ -63,6 +66,11 @@ TEST(ArrayInterface, Error) {
|
|||||||
Json(Boolean(false))};
|
Json(Boolean(false))};
|
||||||
column["data"] = j_data;
|
column["data"] = j_data;
|
||||||
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n));
|
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n));
|
||||||
|
// null data in mask
|
||||||
|
column["mask"] = Object{};
|
||||||
|
column["mask"]["data"] = Null{};
|
||||||
|
common::Span<RBitField8::value_type> s_mask;
|
||||||
|
EXPECT_THROW(ArrayInterfaceHandler::ExtractMask(column_obj, &s_mask), dmlc::Error);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ArrayInterface, GetElement) {
|
TEST(ArrayInterface, GetElement) {
|
||||||
|
|||||||
@@ -39,6 +39,37 @@ def test_rabit_communicator():
|
|||||||
assert worker.exitcode == 0
|
assert worker.exitcode == 0
|
||||||
|
|
||||||
|
|
||||||
|
# TODO(rongou): remove this once we remove the rabit api.
|
||||||
|
def run_rabit_api_worker(rabit_env, world_size):
|
||||||
|
with xgb.rabit.RabitContext(rabit_env):
|
||||||
|
assert xgb.rabit.get_world_size() == world_size
|
||||||
|
assert xgb.rabit.is_distributed()
|
||||||
|
assert xgb.rabit.get_processor_name().decode() == socket.gethostname()
|
||||||
|
ret = xgb.rabit.broadcast('test1234', 0)
|
||||||
|
assert str(ret) == 'test1234'
|
||||||
|
ret = xgb.rabit.allreduce(np.asarray([1, 2, 3]), xgb.rabit.Op.SUM)
|
||||||
|
assert np.array_equal(ret, np.asarray([2, 4, 6]))
|
||||||
|
|
||||||
|
|
||||||
|
# TODO(rongou): remove this once we remove the rabit api.
|
||||||
|
def test_rabit_api():
|
||||||
|
world_size = 2
|
||||||
|
tracker = RabitTracker(host_ip='127.0.0.1', n_workers=world_size)
|
||||||
|
tracker.start(world_size)
|
||||||
|
rabit_env = []
|
||||||
|
for k, v in tracker.worker_envs().items():
|
||||||
|
rabit_env.append(f"{k}={v}".encode())
|
||||||
|
workers = []
|
||||||
|
for _ in range(world_size):
|
||||||
|
worker = multiprocessing.Process(target=run_rabit_api_worker,
|
||||||
|
args=(rabit_env, world_size))
|
||||||
|
workers.append(worker)
|
||||||
|
worker.start()
|
||||||
|
for worker in workers:
|
||||||
|
worker.join()
|
||||||
|
assert worker.exitcode == 0
|
||||||
|
|
||||||
|
|
||||||
def run_federated_worker(port, world_size, rank):
|
def run_federated_worker(port, world_size, rank):
|
||||||
with xgb.collective.CommunicatorContext(xgboost_communicator='federated',
|
with xgb.collective.CommunicatorContext(xgboost_communicator='federated',
|
||||||
federated_server_address=f'localhost:{port}',
|
federated_server_address=f'localhost:{port}',
|
||||||
|
|||||||
@@ -390,28 +390,6 @@ class XgboostLocalTest(SparkTestCase):
|
|||||||
"expected_prediction_with_base_margin",
|
"expected_prediction_with_base_margin",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
self.ranker_df_train = self.session.createDataFrame(
|
|
||||||
[
|
|
||||||
(Vectors.dense(1.0, 2.0, 3.0), 0, 0),
|
|
||||||
(Vectors.dense(4.0, 5.0, 6.0), 1, 0),
|
|
||||||
(Vectors.dense(9.0, 4.0, 8.0), 2, 0),
|
|
||||||
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
|
|
||||||
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
|
|
||||||
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
|
|
||||||
],
|
|
||||||
["features", "label", "qid"],
|
|
||||||
)
|
|
||||||
self.ranker_df_test = self.session.createDataFrame(
|
|
||||||
[
|
|
||||||
(Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988),
|
|
||||||
(Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556),
|
|
||||||
(Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570),
|
|
||||||
(Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988),
|
|
||||||
(Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612),
|
|
||||||
(Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826),
|
|
||||||
],
|
|
||||||
["features", "qid", "expected_prediction"],
|
|
||||||
)
|
|
||||||
|
|
||||||
self.reg_df_sparse_train = self.session.createDataFrame(
|
self.reg_df_sparse_train = self.session.createDataFrame(
|
||||||
[
|
[
|
||||||
@@ -1039,15 +1017,6 @@ class XgboostLocalTest(SparkTestCase):
|
|||||||
for row1, row2 in zip(pred_result, pred_result2):
|
for row1, row2 in zip(pred_result, pred_result2):
|
||||||
self.assertTrue(np.allclose(row1.probability, row2.probability, rtol=1e-3))
|
self.assertTrue(np.allclose(row1.probability, row2.probability, rtol=1e-3))
|
||||||
|
|
||||||
def test_ranker(self):
|
|
||||||
ranker = SparkXGBRanker(qid_col="qid")
|
|
||||||
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
|
|
||||||
model = ranker.fit(self.ranker_df_train)
|
|
||||||
pred_result = model.transform(self.ranker_df_test).collect()
|
|
||||||
|
|
||||||
for row in pred_result:
|
|
||||||
assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3)
|
|
||||||
|
|
||||||
def test_empty_validation_data(self) -> None:
|
def test_empty_validation_data(self) -> None:
|
||||||
for tree_method in [
|
for tree_method in [
|
||||||
"hist",
|
"hist",
|
||||||
@@ -1130,3 +1099,63 @@ class XgboostLocalTest(SparkTestCase):
|
|||||||
def test_unsupported_params(self):
|
def test_unsupported_params(self):
|
||||||
with pytest.raises(ValueError, match="evals_result"):
|
with pytest.raises(ValueError, match="evals_result"):
|
||||||
SparkXGBClassifier(evals_result={})
|
SparkXGBClassifier(evals_result={})
|
||||||
|
|
||||||
|
|
||||||
|
class XgboostRankerLocalTest(SparkTestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.session.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "8")
|
||||||
|
self.ranker_df_train = self.session.createDataFrame(
|
||||||
|
[
|
||||||
|
(Vectors.dense(1.0, 2.0, 3.0), 0, 0),
|
||||||
|
(Vectors.dense(4.0, 5.0, 6.0), 1, 0),
|
||||||
|
(Vectors.dense(9.0, 4.0, 8.0), 2, 0),
|
||||||
|
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
|
||||||
|
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
|
||||||
|
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
|
||||||
|
],
|
||||||
|
["features", "label", "qid"],
|
||||||
|
)
|
||||||
|
self.ranker_df_test = self.session.createDataFrame(
|
||||||
|
[
|
||||||
|
(Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988),
|
||||||
|
(Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556),
|
||||||
|
(Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570),
|
||||||
|
(Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988),
|
||||||
|
(Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612),
|
||||||
|
(Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826),
|
||||||
|
],
|
||||||
|
["features", "qid", "expected_prediction"],
|
||||||
|
)
|
||||||
|
self.ranker_df_train_1 = self.session.createDataFrame(
|
||||||
|
[
|
||||||
|
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9),
|
||||||
|
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9),
|
||||||
|
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9),
|
||||||
|
(Vectors.dense(1.0, 2.0, 3.0), 0, 8),
|
||||||
|
(Vectors.dense(4.0, 5.0, 6.0), 1, 8),
|
||||||
|
(Vectors.dense(9.0, 4.0, 8.0), 2, 8),
|
||||||
|
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7),
|
||||||
|
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7),
|
||||||
|
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7),
|
||||||
|
(Vectors.dense(1.0, 2.0, 3.0), 0, 6),
|
||||||
|
(Vectors.dense(4.0, 5.0, 6.0), 1, 6),
|
||||||
|
(Vectors.dense(9.0, 4.0, 8.0), 2, 6),
|
||||||
|
]
|
||||||
|
* 4,
|
||||||
|
["features", "label", "qid"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_ranker(self):
|
||||||
|
ranker = SparkXGBRanker(qid_col="qid")
|
||||||
|
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
|
||||||
|
model = ranker.fit(self.ranker_df_train)
|
||||||
|
pred_result = model.transform(self.ranker_df_test).collect()
|
||||||
|
|
||||||
|
for row in pred_result:
|
||||||
|
assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3)
|
||||||
|
|
||||||
|
def test_ranker_qid_sorted(self):
|
||||||
|
ranker = SparkXGBRanker(qid_col="qid", num_workers=4)
|
||||||
|
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
|
||||||
|
model = ranker.fit(self.ranker_df_train_1)
|
||||||
|
model.transform(self.ranker_df_test).collect()
|
||||||
|
|||||||
Reference in New Issue
Block a user