Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ccf43d4ba0 | ||
|
|
dd58c2ac47 | ||
|
|
899e4c8988 | ||
|
|
a2085bf223 | ||
|
|
067b704e58 | ||
|
|
1a834b2b85 | ||
|
|
162b48a1a4 | ||
|
|
83a078b7e5 | ||
|
|
575fba651b | ||
|
|
62ed8b5fef | ||
|
|
a980e10744 | ||
|
|
59c54e361b | ||
|
|
60a8c8ebba | ||
|
|
58bc225657 | ||
|
|
850b53100f | ||
|
|
67b657dad0 | ||
|
|
db14e3feb7 | ||
|
|
9372370dda | ||
|
|
1136a7e0c3 | ||
|
|
a347cd512b | ||
|
|
9ff0c0832a | ||
|
|
534c940a7e | ||
|
|
5b76acccff |
14
.github/workflows/main.yml
vendored
14
.github/workflows/main.yml
vendored
@@ -75,19 +75,18 @@ jobs:
|
|||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
with:
|
with:
|
||||||
submodules: 'true'
|
submodules: 'true'
|
||||||
- name: Install system packages
|
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
|
||||||
run: |
|
|
||||||
sudo apt-get install -y --no-install-recommends ninja-build
|
|
||||||
- uses: conda-incubator/setup-miniconda@v2
|
|
||||||
with:
|
with:
|
||||||
auto-update-conda: true
|
cache-downloads: true
|
||||||
python-version: ${{ matrix.python-version }}
|
cache-env: true
|
||||||
activate-environment: test
|
environment-name: cpp_test
|
||||||
|
environment-file: tests/ci_build/conda_env/cpp_test.yml
|
||||||
- name: Display Conda env
|
- name: Display Conda env
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
conda info
|
conda info
|
||||||
conda list
|
conda list
|
||||||
|
|
||||||
- name: Build and install XGBoost static library
|
- name: Build and install XGBoost static library
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
@@ -109,6 +108,7 @@ jobs:
|
|||||||
cd ..
|
cd ..
|
||||||
rm -rf ./build
|
rm -rf ./build
|
||||||
popd
|
popd
|
||||||
|
|
||||||
- name: Build and install XGBoost shared library
|
- name: Build and install XGBoost shared library
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
150
.github/workflows/python_tests.yml
vendored
150
.github/workflows/python_tests.yml
vendored
@@ -41,12 +41,46 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1
|
python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1
|
||||||
|
|
||||||
python-sdist-test:
|
python-sdist-test-on-Linux:
|
||||||
|
# Mismatched glibcxx version between system and conda forge.
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
name: Test installing XGBoost Python source package on ${{ matrix.os }}
|
name: Test installing XGBoost Python source package on ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, macos-11, windows-latest]
|
os: [ubuntu-latest]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
|
||||||
|
with:
|
||||||
|
submodules: 'true'
|
||||||
|
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
|
||||||
|
with:
|
||||||
|
cache-downloads: true
|
||||||
|
cache-env: false
|
||||||
|
environment-name: sdist_test
|
||||||
|
environment-file: tests/ci_build/conda_env/sdist_test.yml
|
||||||
|
- name: Display Conda env
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
conda info
|
||||||
|
conda list
|
||||||
|
- name: Build and install XGBoost
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
cd python-package
|
||||||
|
python --version
|
||||||
|
python setup.py sdist
|
||||||
|
pip install -v ./dist/xgboost-*.tar.gz
|
||||||
|
cd ..
|
||||||
|
python -c 'import xgboost'
|
||||||
|
|
||||||
|
python-sdist-test:
|
||||||
|
# Use system toolchain instead of conda toolchain for macos and windows.
|
||||||
|
# MacOS has linker error if clang++ from conda-forge is used
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
name: Test installing XGBoost Python source package on ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [macos-11, windows-latest]
|
||||||
python-version: ["3.8"]
|
python-version: ["3.8"]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
@@ -56,11 +90,7 @@ jobs:
|
|||||||
if: matrix.os == 'macos-11'
|
if: matrix.os == 'macos-11'
|
||||||
run: |
|
run: |
|
||||||
brew install ninja libomp
|
brew install ninja libomp
|
||||||
- name: Install Ubuntu system dependencies
|
- uses: conda-incubator/setup-miniconda@35d1405e78aa3f784fe3ce9a2eb378d5eeb62169 # v2.1.1
|
||||||
if: matrix.os == 'ubuntu-latest'
|
|
||||||
run: |
|
|
||||||
sudo apt-get install -y --no-install-recommends ninja-build
|
|
||||||
- uses: conda-incubator/setup-miniconda@v2
|
|
||||||
with:
|
with:
|
||||||
auto-update-conda: true
|
auto-update-conda: true
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
@@ -80,6 +110,58 @@ jobs:
|
|||||||
cd ..
|
cd ..
|
||||||
python -c 'import xgboost'
|
python -c 'import xgboost'
|
||||||
|
|
||||||
|
python-tests-on-macos:
|
||||||
|
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
||||||
|
runs-on: ${{ matrix.config.os }}
|
||||||
|
timeout-minutes: 60
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
config:
|
||||||
|
- {os: macos-11}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
|
||||||
|
with:
|
||||||
|
submodules: 'true'
|
||||||
|
|
||||||
|
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
|
||||||
|
with:
|
||||||
|
cache-downloads: true
|
||||||
|
cache-env: false
|
||||||
|
environment-name: macos_test
|
||||||
|
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
|
||||||
|
|
||||||
|
- name: Display Conda env
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
conda info
|
||||||
|
conda list
|
||||||
|
|
||||||
|
- name: Build XGBoost on macos
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
brew install ninja
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
# Set prefix, to use OpenMP library from Conda env
|
||||||
|
# See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
|
||||||
|
# to learn why we don't use libomp from Homebrew.
|
||||||
|
cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
|
||||||
|
ninja
|
||||||
|
|
||||||
|
- name: Install Python package
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
cd python-package
|
||||||
|
python --version
|
||||||
|
python setup.py install
|
||||||
|
|
||||||
|
- name: Test Python package
|
||||||
|
shell: bash -l {0}
|
||||||
|
run: |
|
||||||
|
pytest -s -v -rxXs --durations=0 ./tests/python
|
||||||
|
|
||||||
python-tests-on-win:
|
python-tests-on-win:
|
||||||
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
||||||
runs-on: ${{ matrix.config.os }}
|
runs-on: ${{ matrix.config.os }}
|
||||||
@@ -125,56 +207,4 @@ jobs:
|
|||||||
- name: Test Python package
|
- name: Test Python package
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
pytest -s -v ./tests/python
|
pytest -s -v -rxXs --durations=0 ./tests/python
|
||||||
|
|
||||||
python-tests-on-macos:
|
|
||||||
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
|
||||||
runs-on: ${{ matrix.config.os }}
|
|
||||||
timeout-minutes: 90
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
config:
|
|
||||||
- {os: macos-11, python-version "3.8" }
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: 'true'
|
|
||||||
|
|
||||||
- uses: conda-incubator/setup-miniconda@v2
|
|
||||||
with:
|
|
||||||
auto-update-conda: true
|
|
||||||
python-version: ${{ matrix.config.python-version }}
|
|
||||||
activate-environment: macos_test
|
|
||||||
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
|
|
||||||
|
|
||||||
- name: Display Conda env
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
conda info
|
|
||||||
conda list
|
|
||||||
|
|
||||||
- name: Build XGBoost on macos
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
brew install ninja
|
|
||||||
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
# Set prefix, to use OpenMP library from Conda env
|
|
||||||
# See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
|
|
||||||
# to learn why we don't use libomp from Homebrew.
|
|
||||||
cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
|
|
||||||
ninja
|
|
||||||
|
|
||||||
- name: Install Python package
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
cd python-package
|
|
||||||
python --version
|
|
||||||
python setup.py install
|
|
||||||
|
|
||||||
- name: Test Python package
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
pytest -s -v ./tests/python
|
|
||||||
|
|||||||
6
.github/workflows/r_tests.yml
vendored
6
.github/workflows/r_tests.yml
vendored
@@ -5,6 +5,7 @@ on: [push, pull_request]
|
|||||||
env:
|
env:
|
||||||
R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
||||||
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: read # to fetch code (actions/checkout)
|
contents: read # to fetch code (actions/checkout)
|
||||||
@@ -68,6 +69,7 @@ jobs:
|
|||||||
- {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'}
|
- {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'}
|
||||||
env:
|
env:
|
||||||
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
|
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
|
||||||
|
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
|
||||||
RSPM: ${{ matrix.config.rspm }}
|
RSPM: ${{ matrix.config.rspm }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -121,6 +123,10 @@ jobs:
|
|||||||
config:
|
config:
|
||||||
- {r: 'release'}
|
- {r: 'release'}
|
||||||
|
|
||||||
|
env:
|
||||||
|
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
|
||||||
|
MAKE: "make -j$(nproc)"
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
with:
|
with:
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
||||||
project(xgboost LANGUAGES CXX C VERSION 1.7.0)
|
project(xgboost LANGUAGES CXX C VERSION 1.7.3)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
cmake_policy(SET CMP0022 NEW)
|
||||||
|
|||||||
1
Makefile
1
Makefile
@@ -126,7 +126,6 @@ Rpack: clean_all
|
|||||||
cat R-package/src/Makevars.in|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.in
|
cat R-package/src/Makevars.in|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.in
|
||||||
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
|
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
|
||||||
rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it
|
rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it
|
||||||
rm -f xgboost/cleanup
|
|
||||||
bash R-package/remove_warning_suppression_pragma.sh
|
bash R-package/remove_warning_suppression_pragma.sh
|
||||||
bash xgboost/remove_warning_suppression_pragma.sh
|
bash xgboost/remove_warning_suppression_pragma.sh
|
||||||
rm xgboost/remove_warning_suppression_pragma.sh
|
rm xgboost/remove_warning_suppression_pragma.sh
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 1.7.0.1
|
Version: 1.7.3.1
|
||||||
Date: 2022-10-18
|
Date: 2023-01-06
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Tianqi", "Chen", role = c("aut"),
|
person("Tianqi", "Chen", role = c("aut"),
|
||||||
email = "tianqi.tchen@gmail.com"),
|
email = "tianqi.tchen@gmail.com"),
|
||||||
@@ -66,5 +66,5 @@ Imports:
|
|||||||
methods,
|
methods,
|
||||||
data.table (>= 1.9.6),
|
data.table (>= 1.9.6),
|
||||||
jsonlite (>= 1.0),
|
jsonlite (>= 1.0),
|
||||||
RoxygenNote: 7.1.1
|
RoxygenNote: 7.2.2
|
||||||
SystemRequirements: GNU make, C++14
|
SystemRequirements: GNU make, C++14
|
||||||
|
|||||||
@@ -544,9 +544,11 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#'
|
#'
|
||||||
#' @return
|
#' @return
|
||||||
#' Results are stored in the \code{coefs} element of the closure.
|
#' Results are stored in the \code{coefs} element of the closure.
|
||||||
#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it.
|
#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy
|
||||||
|
#' way to access it.
|
||||||
#' With \code{xgb.train}, it is either a dense of a sparse matrix.
|
#' With \code{xgb.train}, it is either a dense of a sparse matrix.
|
||||||
#' While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices.
|
#' While with \code{xgb.cv}, it is a list (an element per each fold) of such
|
||||||
|
#' matrices.
|
||||||
#'
|
#'
|
||||||
#' @seealso
|
#' @seealso
|
||||||
#' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
|
#' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
|
||||||
@@ -558,7 +560,7 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#' # without considering the 2nd order interactions:
|
#' # without considering the 2nd order interactions:
|
||||||
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||||
#' colnames(x)
|
#' colnames(x)
|
||||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
|
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
||||||
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||||
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||||
@@ -583,14 +585,14 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#'
|
#'
|
||||||
#' # For xgb.cv:
|
#' # For xgb.cv:
|
||||||
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||||
#' callbacks = list(cb.gblinear.history()))
|
#' callbacks = list(cb.gblinear.history()))
|
||||||
#' # coefficients in the CV fold #3
|
#' # coefficients in the CV fold #3
|
||||||
#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||||
#'
|
#'
|
||||||
#'
|
#'
|
||||||
#' #### Multiclass classification:
|
#' #### Multiclass classification:
|
||||||
#' #
|
#' #
|
||||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1)
|
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
|
||||||
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||||
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
||||||
|
|||||||
@@ -18,7 +18,7 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
@@ -110,7 +110,7 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL, nth
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||||
#'
|
#'
|
||||||
#' stopifnot(nrow(dtrain) == nrow(train$data))
|
#' stopifnot(nrow(dtrain) == nrow(train$data))
|
||||||
#' stopifnot(ncol(dtrain) == ncol(train$data))
|
#' stopifnot(ncol(dtrain) == ncol(train$data))
|
||||||
@@ -138,7 +138,7 @@ dim.xgb.DMatrix <- function(x) {
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||||
#' dimnames(dtrain)
|
#' dimnames(dtrain)
|
||||||
#' colnames(dtrain)
|
#' colnames(dtrain)
|
||||||
#' colnames(dtrain) <- make.names(1:ncol(train$data))
|
#' colnames(dtrain) <- make.names(1:ncol(train$data))
|
||||||
@@ -193,7 +193,7 @@ dimnames.xgb.DMatrix <- function(x) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' labels <- getinfo(dtrain, 'label')
|
#' labels <- getinfo(dtrain, 'label')
|
||||||
#' setinfo(dtrain, 'label', 1-labels)
|
#' setinfo(dtrain, 'label', 1-labels)
|
||||||
@@ -249,7 +249,7 @@ getinfo.xgb.DMatrix <- function(object, name, ...) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' labels <- getinfo(dtrain, 'label')
|
#' labels <- getinfo(dtrain, 'label')
|
||||||
#' setinfo(dtrain, 'label', 1-labels)
|
#' setinfo(dtrain, 'label', 1-labels)
|
||||||
@@ -345,7 +345,7 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' dsub <- slice(dtrain, 1:42)
|
#' dsub <- slice(dtrain, 1:42)
|
||||||
#' labels1 <- getinfo(dsub, 'label')
|
#' labels1 <- getinfo(dsub, 'label')
|
||||||
@@ -401,7 +401,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' dtrain
|
#' dtrain
|
||||||
#' print(dtrain, verbose=TRUE)
|
#' print(dtrain, verbose=TRUE)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@@ -48,8 +48,8 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#'
|
#'
|
||||||
#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||||
#' nrounds = 4
|
#' nrounds = 4
|
||||||
@@ -65,8 +65,12 @@
|
|||||||
#' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
#' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
||||||
#'
|
#'
|
||||||
#' # learning with new features
|
#' # learning with new features
|
||||||
#' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
#' new.dtrain <- xgb.DMatrix(
|
||||||
#' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
#' data = new.features.train, label = agaricus.train$label, nthread = 2
|
||||||
|
#' )
|
||||||
|
#' new.dtest <- xgb.DMatrix(
|
||||||
|
#' data = new.features.test, label = agaricus.test$label, nthread = 2
|
||||||
|
#' )
|
||||||
#' watchlist <- list(train = new.dtrain)
|
#' watchlist <- list(train = new.dtrain)
|
||||||
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||||
#'
|
#'
|
||||||
@@ -79,7 +83,7 @@
|
|||||||
#' accuracy.after, "!\n"))
|
#' accuracy.after, "!\n"))
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.create.features <- function(model, data, ...){
|
xgb.create.features <- function(model, data, ...) {
|
||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
pred_with_leaf <- predict(model, data, predleaf = TRUE)
|
pred_with_leaf <- predict(model, data, predleaf = TRUE)
|
||||||
cols <- lapply(as.data.frame(pred_with_leaf), factor)
|
cols <- lapply(as.data.frame(pred_with_leaf), factor)
|
||||||
|
|||||||
@@ -110,9 +110,9 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
#' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
||||||
#' max_depth = 3, eta = 1, objective = "binary:logistic")
|
#' max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||||
#' print(cv)
|
#' print(cv)
|
||||||
#' print(cv, verbose=TRUE)
|
#' print(cv, verbose=TRUE)
|
||||||
#'
|
#'
|
||||||
@@ -192,7 +192,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
|
|
||||||
# create the booster-folds
|
# create the booster-folds
|
||||||
# train_folds
|
# train_folds
|
||||||
dall <- xgb.get.DMatrix(data, label, missing)
|
dall <- xgb.get.DMatrix(data, label, missing, nthread = params$nthread)
|
||||||
bst_folds <- lapply(seq_along(folds), function(k) {
|
bst_folds <- lapply(seq_along(folds), function(k) {
|
||||||
dtest <- slice(dall, folds[[k]])
|
dtest <- slice(dall, folds[[k]])
|
||||||
# code originally contributed by @RolandASc on stackoverflow
|
# code originally contributed by @RolandASc on stackoverflow
|
||||||
|
|||||||
@@ -192,8 +192,8 @@
|
|||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#'
|
#'
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' watchlist <- list(train = dtrain, eval = dtest)
|
#' watchlist <- list(train = dtrain, eval = dtest)
|
||||||
#'
|
#'
|
||||||
#' ## A simple xgb.train example:
|
#' ## A simple xgb.train example:
|
||||||
|
|||||||
18
R-package/configure
vendored
18
R-package/configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for xgboost 1.7.0.
|
# Generated by GNU Autoconf 2.69 for xgboost 1.7.3.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
@@ -576,8 +576,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='xgboost'
|
PACKAGE_NAME='xgboost'
|
||||||
PACKAGE_TARNAME='xgboost'
|
PACKAGE_TARNAME='xgboost'
|
||||||
PACKAGE_VERSION='1.7.0'
|
PACKAGE_VERSION='1.7.3'
|
||||||
PACKAGE_STRING='xgboost 1.7.0'
|
PACKAGE_STRING='xgboost 1.7.3'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures xgboost 1.7.0 to adapt to many kinds of systems.
|
\`configure' configures xgboost 1.7.3 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1257,7 +1257,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of xgboost 1.7.0:";;
|
short | recursive ) echo "Configuration of xgboost 1.7.3:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1336,7 +1336,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
xgboost configure 1.7.0
|
xgboost configure 1.7.3
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by xgboost $as_me 1.7.0, which was
|
It was created by xgboost $as_me 1.7.3, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by xgboost $as_me 1.7.0, which was
|
This file was extended by xgboost $as_me 1.7.3, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -3347,7 +3347,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
xgboost config.status 1.7.0
|
xgboost config.status 1.7.3
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
AC_PREREQ(2.69)
|
AC_PREREQ(2.69)
|
||||||
|
|
||||||
AC_INIT([xgboost],[1.7.0],[],[xgboost],[])
|
AC_INIT([xgboost],[1.7.3],[],[xgboost],[])
|
||||||
|
|
||||||
# Use this line to set CC variable to a C compiler
|
# Use this line to set CC variable to a C compiler
|
||||||
AC_PROG_CC
|
AC_PROG_CC
|
||||||
|
|||||||
@@ -15,9 +15,11 @@ selected per iteration.}
|
|||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
Results are stored in the \code{coefs} element of the closure.
|
Results are stored in the \code{coefs} element of the closure.
|
||||||
The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it.
|
The \code{\link{xgb.gblinear.history}} convenience function provides an easy
|
||||||
|
way to access it.
|
||||||
With \code{xgb.train}, it is either a dense of a sparse matrix.
|
With \code{xgb.train}, it is either a dense of a sparse matrix.
|
||||||
While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices.
|
While with \code{xgb.cv}, it is a list (an element per each fold) of such
|
||||||
|
matrices.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Callback closure for collecting the model coefficients history of a gblinear booster
|
Callback closure for collecting the model coefficients history of a gblinear booster
|
||||||
@@ -38,7 +40,7 @@ Callback function expects the following values to be set in its calling frame:
|
|||||||
# without considering the 2nd order interactions:
|
# without considering the 2nd order interactions:
|
||||||
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||||
colnames(x)
|
colnames(x)
|
||||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
|
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
||||||
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||||
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||||
@@ -63,14 +65,14 @@ matplot(xgb.gblinear.history(bst), type = 'l')
|
|||||||
|
|
||||||
# For xgb.cv:
|
# For xgb.cv:
|
||||||
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||||
callbacks = list(cb.gblinear.history()))
|
callbacks = list(cb.gblinear.history()))
|
||||||
# coefficients in the CV fold #3
|
# coefficients in the CV fold #3
|
||||||
matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||||
|
|
||||||
|
|
||||||
#### Multiclass classification:
|
#### Multiclass classification:
|
||||||
#
|
#
|
||||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1)
|
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
|
||||||
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||||
# For the default linear updater 'shotgun' it sometimes is helpful
|
# For the default linear updater 'shotgun' it sometimes is helpful
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ be directly used with an \code{xgb.DMatrix} object.
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||||
|
|
||||||
stopifnot(nrow(dtrain) == nrow(train$data))
|
stopifnot(nrow(dtrain) == nrow(train$data))
|
||||||
stopifnot(ncol(dtrain) == ncol(train$data))
|
stopifnot(ncol(dtrain) == ncol(train$data))
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ Since row names are irrelevant, it is recommended to use \code{colnames} directl
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||||
dimnames(dtrain)
|
dimnames(dtrain)
|
||||||
colnames(dtrain)
|
colnames(dtrain)
|
||||||
colnames(dtrain) <- make.names(1:ncol(train$data))
|
colnames(dtrain) <- make.names(1:ncol(train$data))
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ The \code{name} field can be one of the following:
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
labels <- getinfo(dtrain, 'label')
|
labels <- getinfo(dtrain, 'label')
|
||||||
setinfo(dtrain, 'label', 1-labels)
|
setinfo(dtrain, 'label', 1-labels)
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ Currently it displays dimensions and presence of info-fields and colnames.
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
dtrain
|
dtrain
|
||||||
print(dtrain, verbose=TRUE)
|
print(dtrain, verbose=TRUE)
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ The \code{name} field can be one of the following:
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
labels <- getinfo(dtrain, 'label')
|
labels <- getinfo(dtrain, 'label')
|
||||||
setinfo(dtrain, 'label', 1-labels)
|
setinfo(dtrain, 'label', 1-labels)
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ original xgb.DMatrix object
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
dsub <- slice(dtrain, 1:42)
|
dsub <- slice(dtrain, 1:42)
|
||||||
labels1 <- getinfo(dsub, 'label')
|
labels1 <- getinfo(dsub, 'label')
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ Supported input file formats are either a LIBSVM text file or a binary file that
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ Save xgb.DMatrix object to binary file
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@@ -59,8 +59,8 @@ a rule on certain features."
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
|
|
||||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||||
nrounds = 4
|
nrounds = 4
|
||||||
@@ -76,8 +76,12 @@ new.features.train <- xgb.create.features(model = bst, agaricus.train$data)
|
|||||||
new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
||||||
|
|
||||||
# learning with new features
|
# learning with new features
|
||||||
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
new.dtrain <- xgb.DMatrix(
|
||||||
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
data = new.features.train, label = agaricus.train$label, nthread = 2
|
||||||
|
)
|
||||||
|
new.dtest <- xgb.DMatrix(
|
||||||
|
data = new.features.test, label = agaricus.test$label, nthread = 2
|
||||||
|
)
|
||||||
watchlist <- list(train = new.dtrain)
|
watchlist <- list(train = new.dtrain)
|
||||||
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||||
|
|
||||||
|
|||||||
@@ -158,9 +158,9 @@ Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
||||||
max_depth = 3, eta = 1, objective = "binary:logistic")
|
max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||||
print(cv)
|
print(cv)
|
||||||
print(cv, verbose=TRUE)
|
print(cv, verbose=TRUE)
|
||||||
|
|
||||||
|
|||||||
@@ -241,8 +241,8 @@ The following callbacks are automatically created when certain parameters are se
|
|||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
watchlist <- list(train = dtrain, eval = dtest)
|
watchlist <- list(train = dtrain, eval = dtest)
|
||||||
|
|
||||||
## A simple xgb.train example:
|
## A simple xgb.train example:
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ XGBoost Release Policy
|
|||||||
=======================
|
=======================
|
||||||
|
|
||||||
Versioning Policy
|
Versioning Policy
|
||||||
---------------------------
|
-----------------
|
||||||
|
|
||||||
Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE]
|
Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE]
|
||||||
|
|
||||||
@@ -34,6 +34,20 @@ Making a Release
|
|||||||
|
|
||||||
+ The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__.
|
+ The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__.
|
||||||
|
|
||||||
Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
|
|
||||||
|
|
||||||
+ The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.
|
+ The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.
|
||||||
|
|
||||||
|
|
||||||
|
R CRAN Package
|
||||||
|
--------------
|
||||||
|
Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
|
||||||
|
|
||||||
|
According to the `CRAN policy <https://cran.r-project.org/web/packages/policies.html>`__:
|
||||||
|
|
||||||
|
If running a package uses multiple threads/cores it must never use more than two simultaneously: the check farm is a shared resource and will typically be running many checks simultaneously.
|
||||||
|
|
||||||
|
We need to check the number of CPUs used in examples. Export ``_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_=2.5`` before running ``R CMD check --as-cran`` `[1] <#references>`__ and make sure the machine you are using has enough CPU cores to reveal any potential policy violation.
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
|
||||||
|
[1] https://stat.ethz.ch/pipermail/r-package-devel/2022q4/008610.html
|
||||||
|
|||||||
@@ -44,8 +44,7 @@ General Parameters
|
|||||||
* ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface]
|
* ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface]
|
||||||
|
|
||||||
- When set to True, XGBoost will perform validation of input parameters to check whether
|
- When set to True, XGBoost will perform validation of input parameters to check whether
|
||||||
a parameter is used or not. The feature is still experimental. It's expected to have
|
a parameter is used or not.
|
||||||
some false positives.
|
|
||||||
|
|
||||||
* ``nthread`` [default to maximum number of threads available if not set]
|
* ``nthread`` [default to maximum number of threads available if not set]
|
||||||
|
|
||||||
@@ -233,24 +232,21 @@ Parameters for Categorical Feature
|
|||||||
These parameters are only used for training with categorical data. See
|
These parameters are only used for training with categorical data. See
|
||||||
:doc:`/tutorials/categorical` for more information.
|
:doc:`/tutorials/categorical` for more information.
|
||||||
|
|
||||||
|
.. note:: These parameters are experimental. ``exact`` tree method is not yet supported.
|
||||||
|
|
||||||
|
|
||||||
* ``max_cat_to_onehot``
|
* ``max_cat_to_onehot``
|
||||||
|
|
||||||
.. versionadded:: 1.6.0
|
.. versionadded:: 1.6.0
|
||||||
|
|
||||||
.. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
|
|
||||||
|
|
||||||
- A threshold for deciding whether XGBoost should use one-hot encoding based split for
|
- A threshold for deciding whether XGBoost should use one-hot encoding based split for
|
||||||
categorical data. When number of categories is lesser than the threshold then one-hot
|
categorical data. When number of categories is lesser than the threshold then one-hot
|
||||||
encoding is chosen, otherwise the categories will be partitioned into children nodes.
|
encoding is chosen, otherwise the categories will be partitioned into children nodes.
|
||||||
Only relevant for regression and binary classification. Also, ``exact`` tree method is
|
|
||||||
not supported
|
|
||||||
|
|
||||||
* ``max_cat_threshold``
|
* ``max_cat_threshold``
|
||||||
|
|
||||||
.. versionadded:: 1.7.0
|
.. versionadded:: 1.7.0
|
||||||
|
|
||||||
.. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
|
|
||||||
|
|
||||||
- Maximum number of categories considered for each split. Used only by partition-based
|
- Maximum number of categories considered for each split. Used only by partition-based
|
||||||
splits for preventing over-fitting.
|
splits for preventing over-fitting.
|
||||||
|
|
||||||
|
|||||||
@@ -25,9 +25,6 @@ Core Data Structure
|
|||||||
.. autoclass:: xgboost.QuantileDMatrix
|
.. autoclass:: xgboost.QuantileDMatrix
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
.. autoclass:: xgboost.DeviceQuantileDMatrix
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
.. autoclass:: xgboost.Booster
|
.. autoclass:: xgboost.Booster
|
||||||
:members:
|
:members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
@@ -115,7 +112,7 @@ Dask API
|
|||||||
:inherited-members:
|
:inherited-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
.. autoclass:: xgboost.dask.DaskDeviceQuantileDMatrix
|
.. autoclass:: xgboost.dask.DaskQuantileDMatrix
|
||||||
:members:
|
:members:
|
||||||
:inherited-members:
|
:inherited-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|||||||
@@ -138,11 +138,11 @@ Miscellaneous
|
|||||||
|
|
||||||
By default, XGBoost assumes input categories are integers starting from 0 till the number
|
By default, XGBoost assumes input categories are integers starting from 0 till the number
|
||||||
of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
|
of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
|
||||||
values due to mistakes or missing values. It can be negative value, integer values that
|
values due to mistakes or missing values in training dataset. It can be negative value,
|
||||||
can not be accurately represented by 32-bit floating point, or values that are larger than
|
integer values that can not be accurately represented by 32-bit floating point, or values
|
||||||
actual number of unique categories. During training this is validated but for prediction
|
that are larger than actual number of unique categories. During training this is
|
||||||
it's treated as the same as missing value for performance reasons. Lastly, missing values
|
validated but for prediction it's treated as the same as not-chosen category for
|
||||||
are treated as the same as numerical features (using the learned split direction).
|
performance reasons.
|
||||||
|
|
||||||
|
|
||||||
**********
|
**********
|
||||||
|
|||||||
@@ -564,7 +564,7 @@ Here are some pratices on reducing memory usage with dask and xgboost.
|
|||||||
nice summary.
|
nice summary.
|
||||||
|
|
||||||
- When using GPU input, like dataframe loaded by ``dask_cudf``, you can try
|
- When using GPU input, like dataframe loaded by ``dask_cudf``, you can try
|
||||||
:py:class:`xgboost.dask.DaskDeviceQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``
|
:py:class:`xgboost.dask.DaskQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``
|
||||||
to reduce overall memory usage. See
|
to reduce overall memory usage. See
|
||||||
:ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example.
|
:ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example.
|
||||||
|
|
||||||
|
|||||||
Submodule gputreeshap updated: acb5be3c17...787259b412
@@ -287,11 +287,22 @@ class TCPSocket {
|
|||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
return domain_;
|
return domain_;
|
||||||
#elif defined(__unix__)
|
#elif defined(__unix__)
|
||||||
|
#ifndef __PASE__
|
||||||
std::int32_t domain;
|
std::int32_t domain;
|
||||||
socklen_t len = sizeof(domain);
|
socklen_t len = sizeof(domain);
|
||||||
xgboost_CHECK_SYS_CALL(
|
xgboost_CHECK_SYS_CALL(
|
||||||
getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast<char *>(&domain), &len), 0);
|
getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast<char *>(&domain), &len), 0);
|
||||||
return ret_iafamily(domain);
|
return ret_iafamily(domain);
|
||||||
|
#else
|
||||||
|
struct sockaddr sa;
|
||||||
|
socklen_t sizeofsa = sizeof(sa);
|
||||||
|
xgboost_CHECK_SYS_CALL(
|
||||||
|
getsockname(handle_, &sa, &sizeofsa), 0);
|
||||||
|
if (sizeofsa < sizeof(uchar_t)*2) {
|
||||||
|
return ret_iafamily(AF_INET);
|
||||||
|
}
|
||||||
|
return ret_iafamily(sa.sa_family);
|
||||||
|
#endif // __PASE__
|
||||||
#else
|
#else
|
||||||
LOG(FATAL) << "Unknown platform.";
|
LOG(FATAL) << "Unknown platform.";
|
||||||
return ret_iafamily(AF_INET);
|
return ret_iafamily(AF_INET);
|
||||||
|
|||||||
@@ -6,6 +6,6 @@
|
|||||||
|
|
||||||
#define XGBOOST_VER_MAJOR 1
|
#define XGBOOST_VER_MAJOR 1
|
||||||
#define XGBOOST_VER_MINOR 7
|
#define XGBOOST_VER_MINOR 7
|
||||||
#define XGBOOST_VER_PATCH 0
|
#define XGBOOST_VER_PATCH 3
|
||||||
|
|
||||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#include <jni.h>
|
#include <jni.h>
|
||||||
#include <thrust/system/cuda/experimental/pinned_allocator.h>
|
|
||||||
|
|
||||||
#include "../../../../src/common/device_helpers.cuh"
|
#include "../../../../src/common/device_helpers.cuh"
|
||||||
|
#include "../../../../src/common/cuda_pinned_allocator.h"
|
||||||
#include "../../../../src/data/array_interface.h"
|
#include "../../../../src/data/array_interface.h"
|
||||||
#include "jvm_utils.h"
|
#include "jvm_utils.h"
|
||||||
#include <xgboost/c_api.h>
|
#include <xgboost/c_api.h>
|
||||||
@@ -131,7 +131,7 @@ class DataIteratorProxy {
|
|||||||
bool cache_on_host_{true}; // TODO(Bobby): Make this optional.
|
bool cache_on_host_{true}; // TODO(Bobby): Make this optional.
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
using Alloc = thrust::system::cuda::experimental::pinned_allocator<T>;
|
using Alloc = xgboost::common::cuda::pinned_allocator<T>;
|
||||||
template <typename U>
|
template <typename U>
|
||||||
using HostVector = std::vector<U, Alloc<U>>;
|
using HostVector = std::vector<U, Alloc<U>>;
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
from sklearn.datasets import load_iris
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas
|
import pandas
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
|
||||||
X, y = load_iris(return_X_y=True)
|
X, y = load_iris(return_X_y=True)
|
||||||
y = y.astype(np.int)
|
y = y.astype(np.int32)
|
||||||
df = pandas.DataFrame(data=X, columns=['sepal length', 'sepal width', 'petal length', 'petal width'])
|
df = pandas.DataFrame(data=X, columns=['sepal length', 'sepal width', 'petal length', 'petal width'])
|
||||||
class_id_to_name = {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'}
|
class_id_to_name = {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'}
|
||||||
df['class'] = np.vectorize(class_id_to_name.get)(y)
|
df['class'] = np.vectorize(class_id_to_name.get)(y)
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>1.7.0</version>
|
<version>1.7.3</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
1.7.0
|
1.7.3
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ Contributors: https://github.com/dmlc/xgboost/blob/master/CONTRIBUTORS.md
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from . import tracker # noqa
|
from . import tracker # noqa
|
||||||
from . import collective, dask
|
from . import collective, dask, rabit
|
||||||
from .core import (
|
from .core import (
|
||||||
Booster,
|
Booster,
|
||||||
DataIter,
|
DataIter,
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ except ImportError:
|
|||||||
pandas_concat = None
|
pandas_concat = None
|
||||||
PANDAS_INSTALLED = False
|
PANDAS_INSTALLED = False
|
||||||
|
|
||||||
|
|
||||||
# sklearn
|
# sklearn
|
||||||
try:
|
try:
|
||||||
from sklearn.base import BaseEstimator as XGBModelBase
|
from sklearn.base import BaseEstimator as XGBModelBase
|
||||||
@@ -72,6 +73,22 @@ except ImportError:
|
|||||||
XGBStratifiedKFold = None
|
XGBStratifiedKFold = None
|
||||||
|
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def is_cudf_available() -> bool:
|
||||||
|
"""Check cuDF package available or not"""
|
||||||
|
if importlib.util.find_spec("cudf") is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
import cudf
|
||||||
|
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
_logger.exception("Importing cuDF failed, use DMatrix instead of QDM")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class XGBoostLabelEncoder(LabelEncoder):
|
class XGBoostLabelEncoder(LabelEncoder):
|
||||||
"""Label encoder with JSON serialization methods."""
|
"""Label encoder with JSON serialization methods."""
|
||||||
|
|
||||||
|
|||||||
@@ -853,7 +853,7 @@ async def _get_rabit_args(
|
|||||||
sched_addr = None
|
sched_addr = None
|
||||||
|
|
||||||
# make sure all workers are online so that we can obtain reliable scheduler_info
|
# make sure all workers are online so that we can obtain reliable scheduler_info
|
||||||
client.wait_for_workers(n_workers)
|
await client.wait_for_workers(n_workers) # type: ignore
|
||||||
env = await client.run_on_scheduler(
|
env = await client.run_on_scheduler(
|
||||||
_start_tracker, n_workers, sched_addr, user_addr
|
_start_tracker, n_workers, sched_addr, user_addr
|
||||||
)
|
)
|
||||||
|
|||||||
168
python-package/xgboost/rabit.py
Normal file
168
python-package/xgboost/rabit.py
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
"""Compatibility shim for xgboost.rabit; to be removed in 2.0"""
|
||||||
|
import logging
|
||||||
|
import warnings
|
||||||
|
from enum import IntEnum, unique
|
||||||
|
from typing import Any, TypeVar, Callable, Optional, List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from . import collective
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger("[xgboost.rabit]")
|
||||||
|
|
||||||
|
|
||||||
|
def _deprecation_warning() -> str:
|
||||||
|
return (
|
||||||
|
"The xgboost.rabit submodule is marked as deprecated in 1.7 and will be removed "
|
||||||
|
"in 2.0. Please use xgboost.collective instead."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def init(args: Optional[List[bytes]] = None) -> None:
|
||||||
|
"""Initialize the rabit library with arguments"""
|
||||||
|
warnings.warn(_deprecation_warning(), FutureWarning)
|
||||||
|
parsed = {}
|
||||||
|
if args:
|
||||||
|
for arg in args:
|
||||||
|
kv = arg.decode().split('=')
|
||||||
|
if len(kv) == 2:
|
||||||
|
parsed[kv[0]] = kv[1]
|
||||||
|
collective.init(**parsed)
|
||||||
|
|
||||||
|
|
||||||
|
def finalize() -> None:
|
||||||
|
"""Finalize the process, notify tracker everything is done."""
|
||||||
|
collective.finalize()
|
||||||
|
|
||||||
|
|
||||||
|
def get_rank() -> int:
|
||||||
|
"""Get rank of current process.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
rank : int
|
||||||
|
Rank of current process.
|
||||||
|
"""
|
||||||
|
return collective.get_rank()
|
||||||
|
|
||||||
|
|
||||||
|
def get_world_size() -> int:
|
||||||
|
"""Get total number workers.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
n : int
|
||||||
|
Total number of process.
|
||||||
|
"""
|
||||||
|
return collective.get_world_size()
|
||||||
|
|
||||||
|
|
||||||
|
def is_distributed() -> int:
|
||||||
|
"""If rabit is distributed."""
|
||||||
|
return collective.is_distributed()
|
||||||
|
|
||||||
|
|
||||||
|
def tracker_print(msg: Any) -> None:
|
||||||
|
"""Print message to the tracker.
|
||||||
|
This function can be used to communicate the information of
|
||||||
|
the progress to the tracker
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
msg : str
|
||||||
|
The message to be printed to tracker.
|
||||||
|
"""
|
||||||
|
collective.communicator_print(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def get_processor_name() -> bytes:
|
||||||
|
"""Get the processor name.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
name : str
|
||||||
|
the name of processor(host)
|
||||||
|
"""
|
||||||
|
return collective.get_processor_name().encode()
|
||||||
|
|
||||||
|
|
||||||
|
T = TypeVar("T") # pylint:disable=invalid-name
|
||||||
|
|
||||||
|
|
||||||
|
def broadcast(data: T, root: int) -> T:
|
||||||
|
"""Broadcast object from one node to all other nodes.
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data : any type that can be pickled
|
||||||
|
Input data, if current rank does not equal root, this can be None
|
||||||
|
root : int
|
||||||
|
Rank of the node to broadcast data from.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
object : int
|
||||||
|
the result of broadcast.
|
||||||
|
"""
|
||||||
|
return collective.broadcast(data, root)
|
||||||
|
|
||||||
|
|
||||||
|
@unique
|
||||||
|
class Op(IntEnum):
|
||||||
|
"""Supported operations for rabit."""
|
||||||
|
MAX = 0
|
||||||
|
MIN = 1
|
||||||
|
SUM = 2
|
||||||
|
OR = 3
|
||||||
|
|
||||||
|
|
||||||
|
def allreduce( # pylint:disable=invalid-name
|
||||||
|
data: np.ndarray, op: Op, prepare_fun: Optional[Callable[[np.ndarray], None]] = None
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""Perform allreduce, return the result.
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data :
|
||||||
|
Input data.
|
||||||
|
op :
|
||||||
|
Reduction operators, can be MIN, MAX, SUM, BITOR
|
||||||
|
prepare_fun :
|
||||||
|
Lazy preprocessing function, if it is not None, prepare_fun(data)
|
||||||
|
will be called by the function before performing allreduce, to initialize the data
|
||||||
|
If the result of Allreduce can be recovered directly,
|
||||||
|
then prepare_fun will NOT be called
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
result :
|
||||||
|
The result of allreduce, have same shape as data
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
This function is not thread-safe.
|
||||||
|
"""
|
||||||
|
if prepare_fun is None:
|
||||||
|
return collective.allreduce(data, collective.Op(op))
|
||||||
|
raise Exception("preprocessing function is no longer supported")
|
||||||
|
|
||||||
|
|
||||||
|
def version_number() -> int:
|
||||||
|
"""Returns version number of current stored model.
|
||||||
|
This means how many calls to CheckPoint we made so far.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
version : int
|
||||||
|
Version number of currently stored model
|
||||||
|
"""
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class RabitContext:
|
||||||
|
"""A context controlling rabit initialization and finalization."""
|
||||||
|
|
||||||
|
def __init__(self, args: List[bytes] = None) -> None:
|
||||||
|
if args is None:
|
||||||
|
args = []
|
||||||
|
self.args = args
|
||||||
|
|
||||||
|
def __enter__(self) -> None:
|
||||||
|
init(self.args)
|
||||||
|
assert is_distributed()
|
||||||
|
LOGGER.warning(_deprecation_warning())
|
||||||
|
LOGGER.debug("-------------- rabit say hello ------------------")
|
||||||
|
|
||||||
|
def __exit__(self, *args: List) -> None:
|
||||||
|
finalize()
|
||||||
|
LOGGER.debug("--------------- rabit say bye ------------------")
|
||||||
@@ -674,7 +674,7 @@ class XGBModel(XGBModelBase):
|
|||||||
self.kwargs = {}
|
self.kwargs = {}
|
||||||
self.kwargs[key] = value
|
self.kwargs[key] = value
|
||||||
|
|
||||||
if hasattr(self, "_Booster"):
|
if self.__sklearn_is_fitted__():
|
||||||
parameters = self.get_xgb_params()
|
parameters = self.get_xgb_params()
|
||||||
self.get_booster().set_param(parameters)
|
self.get_booster().set_param(parameters)
|
||||||
|
|
||||||
@@ -701,39 +701,12 @@ class XGBModel(XGBModelBase):
|
|||||||
np.iinfo(np.int32).max
|
np.iinfo(np.int32).max
|
||||||
)
|
)
|
||||||
|
|
||||||
def parse_parameter(value: Any) -> Optional[Union[int, float, str]]:
|
|
||||||
for t in (int, float, str):
|
|
||||||
try:
|
|
||||||
ret = t(value)
|
|
||||||
return ret
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Get internal parameter values
|
|
||||||
try:
|
|
||||||
config = json.loads(self.get_booster().save_config())
|
|
||||||
stack = [config]
|
|
||||||
internal = {}
|
|
||||||
while stack:
|
|
||||||
obj = stack.pop()
|
|
||||||
for k, v in obj.items():
|
|
||||||
if k.endswith("_param"):
|
|
||||||
for p_k, p_v in v.items():
|
|
||||||
internal[p_k] = p_v
|
|
||||||
elif isinstance(v, dict):
|
|
||||||
stack.append(v)
|
|
||||||
|
|
||||||
for k, v in internal.items():
|
|
||||||
if k in params and params[k] is None:
|
|
||||||
params[k] = parse_parameter(v)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def get_xgb_params(self) -> Dict[str, Any]:
|
def get_xgb_params(self) -> Dict[str, Any]:
|
||||||
"""Get xgboost specific parameters."""
|
"""Get xgboost specific parameters."""
|
||||||
params = self.get_params()
|
params: Dict[str, Any] = self.get_params()
|
||||||
|
|
||||||
# Parameters that should not go into native learner.
|
# Parameters that should not go into native learner.
|
||||||
wrapper_specific = {
|
wrapper_specific = {
|
||||||
"importance_type",
|
"importance_type",
|
||||||
@@ -750,6 +723,7 @@ class XGBModel(XGBModelBase):
|
|||||||
for k, v in params.items():
|
for k, v in params.items():
|
||||||
if k not in wrapper_specific and not callable(v):
|
if k not in wrapper_specific and not callable(v):
|
||||||
filtered[k] = v
|
filtered[k] = v
|
||||||
|
|
||||||
return filtered
|
return filtered
|
||||||
|
|
||||||
def get_num_boosting_rounds(self) -> int:
|
def get_num_boosting_rounds(self) -> int:
|
||||||
@@ -1070,7 +1044,7 @@ class XGBModel(XGBModelBase):
|
|||||||
# error with incompatible data type.
|
# error with incompatible data type.
|
||||||
# Inplace predict doesn't handle as many data types as DMatrix, but it's
|
# Inplace predict doesn't handle as many data types as DMatrix, but it's
|
||||||
# sufficient for dask interface where input is simpiler.
|
# sufficient for dask interface where input is simpiler.
|
||||||
predictor = self.get_params().get("predictor", None)
|
predictor = self.get_xgb_params().get("predictor", None)
|
||||||
if predictor in ("auto", None) and self.booster != "gblinear":
|
if predictor in ("auto", None) and self.booster != "gblinear":
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@@ -1336,7 +1310,7 @@ class XGBModel(XGBModelBase):
|
|||||||
-------
|
-------
|
||||||
coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]``
|
coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]``
|
||||||
"""
|
"""
|
||||||
if self.get_params()["booster"] != "gblinear":
|
if self.get_xgb_params()["booster"] != "gblinear":
|
||||||
raise AttributeError(
|
raise AttributeError(
|
||||||
f"Coefficients are not defined for Booster type {self.booster}"
|
f"Coefficients are not defined for Booster type {self.booster}"
|
||||||
)
|
)
|
||||||
@@ -1366,7 +1340,7 @@ class XGBModel(XGBModelBase):
|
|||||||
-------
|
-------
|
||||||
intercept_ : array of shape ``(1,)`` or ``[n_classes]``
|
intercept_ : array of shape ``(1,)`` or ``[n_classes]``
|
||||||
"""
|
"""
|
||||||
if self.get_params()["booster"] != "gblinear":
|
if self.get_xgb_params()["booster"] != "gblinear":
|
||||||
raise AttributeError(
|
raise AttributeError(
|
||||||
f"Intercept (bias) is not defined for Booster type {self.booster}"
|
f"Intercept (bias) is not defined for Booster type {self.booster}"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# type: ignore
|
# type: ignore
|
||||||
"""Xgboost pyspark integration submodule for core code."""
|
"""Xgboost pyspark integration submodule for core code."""
|
||||||
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
|
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
|
||||||
# pylint: disable=too-few-public-methods, too-many-lines
|
# pylint: disable=too-few-public-methods, too-many-lines, too-many-branches
|
||||||
import json
|
import json
|
||||||
from typing import Iterator, Optional, Tuple
|
from typing import Iterator, Optional, Tuple
|
||||||
|
|
||||||
@@ -32,6 +32,7 @@ from pyspark.sql.types import (
|
|||||||
ShortType,
|
ShortType,
|
||||||
)
|
)
|
||||||
from scipy.special import expit, softmax # pylint: disable=no-name-in-module
|
from scipy.special import expit, softmax # pylint: disable=no-name-in-module
|
||||||
|
from xgboost.compat import is_cudf_available
|
||||||
from xgboost.core import Booster
|
from xgboost.core import Booster
|
||||||
from xgboost.training import train as worker_train
|
from xgboost.training import train as worker_train
|
||||||
|
|
||||||
@@ -728,6 +729,10 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
else:
|
else:
|
||||||
dataset = dataset.repartition(num_workers)
|
dataset = dataset.repartition(num_workers)
|
||||||
|
|
||||||
|
if self.isDefined(self.qid_col) and self.getOrDefault(self.qid_col):
|
||||||
|
# XGBoost requires qid to be sorted for each partition
|
||||||
|
dataset = dataset.sortWithinPartitions(alias.qid, ascending=True)
|
||||||
|
|
||||||
train_params = self._get_distributed_train_params(dataset)
|
train_params = self._get_distributed_train_params(dataset)
|
||||||
booster_params, train_call_kwargs_params = self._get_xgb_train_call_args(
|
booster_params, train_call_kwargs_params = self._get_xgb_train_call_args(
|
||||||
train_params
|
train_params
|
||||||
@@ -755,7 +760,8 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
k: v for k, v in train_call_kwargs_params.items() if v is not None
|
k: v for k, v in train_call_kwargs_params.items() if v is not None
|
||||||
}
|
}
|
||||||
dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None}
|
dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None}
|
||||||
use_qdm = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
|
|
||||||
|
use_hist = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
|
||||||
|
|
||||||
def _train_booster(pandas_df_iter):
|
def _train_booster(pandas_df_iter):
|
||||||
"""Takes in an RDD partition and outputs a booster for that partition after
|
"""Takes in an RDD partition and outputs a booster for that partition after
|
||||||
@@ -769,6 +775,15 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
|
|
||||||
gpu_id = None
|
gpu_id = None
|
||||||
|
|
||||||
|
# If cuDF is not installed, then using DMatrix instead of QDM,
|
||||||
|
# because without cuDF, DMatrix performs better than QDM.
|
||||||
|
# Note: Checking `is_cudf_available` in spark worker side because
|
||||||
|
# spark worker might has different python environment with driver side.
|
||||||
|
if use_gpu:
|
||||||
|
use_qdm = use_hist and is_cudf_available()
|
||||||
|
else:
|
||||||
|
use_qdm = use_hist
|
||||||
|
|
||||||
if use_qdm and (booster_params.get("max_bin", None) is not None):
|
if use_qdm and (booster_params.get("max_bin", None) is not None):
|
||||||
dmatrix_kwargs["max_bin"] = booster_params["max_bin"]
|
dmatrix_kwargs["max_bin"] = booster_params["max_bin"]
|
||||||
|
|
||||||
|
|||||||
@@ -48,20 +48,21 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
|
|||||||
return cat < 0 || cat >= kMaxCat;
|
return cat < 0 || cat >= kMaxCat;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* \brief Whether should it traverse to left branch of a tree.
|
/**
|
||||||
|
* \brief Whether should it traverse to left branch of a tree.
|
||||||
*
|
*
|
||||||
* For one hot split, go to left if it's NOT the matching category.
|
* Go to left if it's NOT the matching category, which matches one-hot encoding.
|
||||||
*/
|
*/
|
||||||
template <bool validate = true>
|
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
|
||||||
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat, bool dft_left) {
|
|
||||||
KCatBitField const s_cats(cats);
|
KCatBitField const s_cats(cats);
|
||||||
// FIXME: Size() is not accurate since it represents the size of bit set instead of
|
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
|
||||||
// actual number of categories.
|
return true;
|
||||||
if (XGBOOST_EXPECT(validate && (InvalidCat(cat) || cat >= s_cats.Size()), false)) {
|
|
||||||
return dft_left;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto pos = KCatBitField::ToBitPos(cat);
|
auto pos = KCatBitField::ToBitPos(cat);
|
||||||
|
// If the input category is larger than the size of the bit field, it implies that the
|
||||||
|
// category is not chosen. Otherwise the bit field would have the category instead of
|
||||||
|
// being smaller than the category value.
|
||||||
if (pos.int_pos >= cats.size()) {
|
if (pos.int_pos >= cats.size()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
91
src/common/cuda_pinned_allocator.h
Normal file
91
src/common/cuda_pinned_allocator.h
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
/*!
|
||||||
|
* Copyright 2022 by XGBoost Contributors
|
||||||
|
* \file common.h
|
||||||
|
* \brief cuda pinned allocator for usage with thrust containers
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
namespace common {
|
||||||
|
namespace cuda {
|
||||||
|
|
||||||
|
// \p pinned_allocator is a CUDA-specific host memory allocator
|
||||||
|
// that employs \c cudaMallocHost for allocation.
|
||||||
|
//
|
||||||
|
// This implementation is ported from the experimental/pinned_allocator
|
||||||
|
// that Thrust used to provide.
|
||||||
|
//
|
||||||
|
// \see https://en.cppreference.com/w/cpp/memory/allocator
|
||||||
|
template <typename T>
|
||||||
|
class pinned_allocator;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class pinned_allocator<void> {
|
||||||
|
public:
|
||||||
|
using value_type = void; // NOLINT: The type of the elements in the allocator
|
||||||
|
using pointer = void*; // NOLINT: The type returned by address() / allocate()
|
||||||
|
using const_pointer = const void*; // NOLINT: The type returned by address()
|
||||||
|
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
|
||||||
|
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
struct rebind { // NOLINT
|
||||||
|
using other = pinned_allocator<U>; // NOLINT: The rebound type
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class pinned_allocator {
|
||||||
|
public:
|
||||||
|
using value_type = T; // NOLINT: The type of the elements in the allocator
|
||||||
|
using pointer = T*; // NOLINT: The type returned by address() / allocate()
|
||||||
|
using const_pointer = const T*; // NOLINT: The type returned by address()
|
||||||
|
using reference = T&; // NOLINT: The parameter type for address()
|
||||||
|
using const_reference = const T&; // NOLINT: The parameter type for address()
|
||||||
|
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
|
||||||
|
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
struct rebind { // NOLINT
|
||||||
|
using other = pinned_allocator<U>; // NOLINT: The rebound type
|
||||||
|
};
|
||||||
|
|
||||||
|
XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions
|
||||||
|
XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions
|
||||||
|
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions
|
||||||
|
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator<U> const&) {} // NOLINT
|
||||||
|
|
||||||
|
XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT
|
||||||
|
XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT
|
||||||
|
|
||||||
|
inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT
|
||||||
|
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
|
||||||
|
|
||||||
|
pointer result(nullptr);
|
||||||
|
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
|
||||||
|
|
||||||
|
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT
|
||||||
|
|
||||||
|
XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; }
|
||||||
|
|
||||||
|
XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const {
|
||||||
|
return !operator==(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace cuda
|
||||||
|
} // namespace common
|
||||||
|
} // namespace xgboost
|
||||||
@@ -62,7 +62,7 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
|
|||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA)
|
||||||
|
|
||||||
template <typename T, std::int32_t kDim>
|
template <typename T, std::int32_t kDim>
|
||||||
auto cbegin(TensorView<T, kDim> v) { // NOLINT
|
auto cbegin(TensorView<T, kDim> const& v) { // NOLINT
|
||||||
auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
|
auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
|
||||||
return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
|
return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
|
||||||
});
|
});
|
||||||
@@ -70,19 +70,19 @@ auto cbegin(TensorView<T, kDim> v) { // NOLINT
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, std::int32_t kDim>
|
template <typename T, std::int32_t kDim>
|
||||||
auto cend(TensorView<T, kDim> v) { // NOLINT
|
auto cend(TensorView<T, kDim> const& v) { // NOLINT
|
||||||
return cbegin(v) + v.Size();
|
return cbegin(v) + v.Size();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, std::int32_t kDim>
|
template <typename T, std::int32_t kDim>
|
||||||
auto begin(TensorView<T, kDim> v) { // NOLINT
|
auto begin(TensorView<T, kDim>& v) { // NOLINT
|
||||||
auto it = common::MakeIndexTransformIter(
|
auto it = common::MakeIndexTransformIter(
|
||||||
[&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
|
[&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, std::int32_t kDim>
|
template <typename T, std::int32_t kDim>
|
||||||
auto end(TensorView<T, kDim> v) { // NOLINT
|
auto end(TensorView<T, kDim>& v) { // NOLINT
|
||||||
return begin(v) + v.Size();
|
return begin(v) + v.Size();
|
||||||
}
|
}
|
||||||
} // namespace linalg
|
} // namespace linalg
|
||||||
|
|||||||
@@ -144,7 +144,7 @@ class PartitionBuilder {
|
|||||||
auto gidx = gidx_calc(ridx);
|
auto gidx = gidx_calc(ridx);
|
||||||
bool go_left = default_left;
|
bool go_left = default_left;
|
||||||
if (gidx > -1) {
|
if (gidx > -1) {
|
||||||
go_left = Decision(node_cats, cut_values[gidx], default_left);
|
go_left = Decision(node_cats, cut_values[gidx]);
|
||||||
}
|
}
|
||||||
return go_left;
|
return go_left;
|
||||||
} else {
|
} else {
|
||||||
@@ -157,7 +157,7 @@ class PartitionBuilder {
|
|||||||
bool go_left = default_left;
|
bool go_left = default_left;
|
||||||
if (gidx > -1) {
|
if (gidx > -1) {
|
||||||
if (is_cat) {
|
if (is_cat) {
|
||||||
go_left = Decision(node_cats, cut_values[gidx], default_left);
|
go_left = Decision(node_cats, cut_values[gidx]);
|
||||||
} else {
|
} else {
|
||||||
go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
|
go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ class ArrayInterfaceHandler {
|
|||||||
template <typename PtrType>
|
template <typename PtrType>
|
||||||
static PtrType GetPtrFromArrayData(Object::Map const &obj) {
|
static PtrType GetPtrFromArrayData(Object::Map const &obj) {
|
||||||
auto data_it = obj.find("data");
|
auto data_it = obj.find("data");
|
||||||
if (data_it == obj.cend()) {
|
if (data_it == obj.cend() || IsA<Null>(data_it->second)) {
|
||||||
LOG(FATAL) << "Empty data passed in.";
|
LOG(FATAL) << "Empty data passed in.";
|
||||||
}
|
}
|
||||||
auto p_data = reinterpret_cast<PtrType>(
|
auto p_data = reinterpret_cast<PtrType>(
|
||||||
@@ -111,7 +111,7 @@ class ArrayInterfaceHandler {
|
|||||||
|
|
||||||
static void Validate(Object::Map const &array) {
|
static void Validate(Object::Map const &array) {
|
||||||
auto version_it = array.find("version");
|
auto version_it = array.find("version");
|
||||||
if (version_it == array.cend()) {
|
if (version_it == array.cend() || IsA<Null>(version_it->second)) {
|
||||||
LOG(FATAL) << "Missing `version' field for array interface";
|
LOG(FATAL) << "Missing `version' field for array interface";
|
||||||
}
|
}
|
||||||
if (get<Integer const>(version_it->second) > 3) {
|
if (get<Integer const>(version_it->second) > 3) {
|
||||||
@@ -119,17 +119,19 @@ class ArrayInterfaceHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto typestr_it = array.find("typestr");
|
auto typestr_it = array.find("typestr");
|
||||||
if (typestr_it == array.cend()) {
|
if (typestr_it == array.cend() || IsA<Null>(typestr_it->second)) {
|
||||||
LOG(FATAL) << "Missing `typestr' field for array interface";
|
LOG(FATAL) << "Missing `typestr' field for array interface";
|
||||||
}
|
}
|
||||||
|
|
||||||
auto typestr = get<String const>(typestr_it->second);
|
auto typestr = get<String const>(typestr_it->second);
|
||||||
CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();
|
CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();
|
||||||
|
|
||||||
if (array.find("shape") == array.cend()) {
|
auto shape_it = array.find("shape");
|
||||||
|
if (shape_it == array.cend() || IsA<Null>(shape_it->second)) {
|
||||||
LOG(FATAL) << "Missing `shape' field for array interface";
|
LOG(FATAL) << "Missing `shape' field for array interface";
|
||||||
}
|
}
|
||||||
if (array.find("data") == array.cend()) {
|
auto data_it = array.find("data");
|
||||||
|
if (data_it == array.cend() || IsA<Null>(data_it->second)) {
|
||||||
LOG(FATAL) << "Missing `data' field for array interface";
|
LOG(FATAL) << "Missing `data' field for array interface";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -139,8 +141,9 @@ class ArrayInterfaceHandler {
|
|||||||
static size_t ExtractMask(Object::Map const &column,
|
static size_t ExtractMask(Object::Map const &column,
|
||||||
common::Span<RBitField8::value_type> *p_out) {
|
common::Span<RBitField8::value_type> *p_out) {
|
||||||
auto &s_mask = *p_out;
|
auto &s_mask = *p_out;
|
||||||
if (column.find("mask") != column.cend()) {
|
auto const &mask_it = column.find("mask");
|
||||||
auto const &j_mask = get<Object const>(column.at("mask"));
|
if (mask_it != column.cend() && !IsA<Null>(mask_it->second)) {
|
||||||
|
auto const &j_mask = get<Object const>(mask_it->second);
|
||||||
Validate(j_mask);
|
Validate(j_mask);
|
||||||
|
|
||||||
auto p_mask = GetPtrFromArrayData<RBitField8::value_type *>(j_mask);
|
auto p_mask = GetPtrFromArrayData<RBitField8::value_type *>(j_mask);
|
||||||
@@ -173,8 +176,9 @@ class ArrayInterfaceHandler {
|
|||||||
// assume 1 byte alignment.
|
// assume 1 byte alignment.
|
||||||
size_t const span_size = RBitField8::ComputeStorageSize(n_bits);
|
size_t const span_size = RBitField8::ComputeStorageSize(n_bits);
|
||||||
|
|
||||||
if (j_mask.find("strides") != j_mask.cend()) {
|
auto strides_it = j_mask.find("strides");
|
||||||
auto strides = get<Array const>(column.at("strides"));
|
if (strides_it != j_mask.cend() && !IsA<Null>(strides_it->second)) {
|
||||||
|
auto strides = get<Array const>(strides_it->second);
|
||||||
CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1);
|
CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1);
|
||||||
CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous();
|
CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous();
|
||||||
}
|
}
|
||||||
@@ -401,7 +405,9 @@ class ArrayInterface {
|
|||||||
<< "XGBoost doesn't support internal broadcasting.";
|
<< "XGBoost doesn't support internal broadcasting.";
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
CHECK(array.find("mask") == array.cend()) << "Masked array is not yet supported.";
|
auto mask_it = array.find("mask");
|
||||||
|
CHECK(mask_it == array.cend() || IsA<Null>(mask_it->second))
|
||||||
|
<< "Masked array is not yet supported.";
|
||||||
}
|
}
|
||||||
|
|
||||||
auto stream_it = array.find("stream");
|
auto stream_it = array.find("stream");
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
#include "xgboost/objective.h"
|
#include "xgboost/objective.h"
|
||||||
#include "xgboost/predictor.h"
|
#include "xgboost/predictor.h"
|
||||||
|
#include "xgboost/string_view.h"
|
||||||
#include "xgboost/tree_updater.h"
|
#include "xgboost/tree_updater.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
@@ -395,23 +396,36 @@ void GBTree::LoadConfig(Json const& in) {
|
|||||||
tparam_.process_type = TreeProcessType::kDefault;
|
tparam_.process_type = TreeProcessType::kDefault;
|
||||||
int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
|
int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
|
||||||
if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
|
if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
|
||||||
LOG(WARNING)
|
LOG(WARNING) << "Loading from a raw memory buffer on CPU only machine. "
|
||||||
<< "Loading from a raw memory buffer on CPU only machine. "
|
"Changing predictor to auto.";
|
||||||
"Changing predictor to auto.";
|
|
||||||
tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
|
tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto msg = StringView{
|
||||||
|
R"(
|
||||||
|
Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only
|
||||||
|
machine. Consider using `save_model/load_model` instead. See:
|
||||||
|
|
||||||
|
https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
|
||||||
|
|
||||||
|
for more details about differences between saving model and serializing.)"};
|
||||||
|
|
||||||
if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
|
if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
|
||||||
tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
|
tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
|
||||||
LOG(WARNING)
|
LOG(WARNING) << msg << " Changing `tree_method` to `hist`.";
|
||||||
<< "Loading from a raw memory buffer on CPU only machine. "
|
|
||||||
"Changing tree_method to hist.";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto const& j_updaters = get<Object const>(in["updater"]);
|
auto const& j_updaters = get<Object const>(in["updater"]);
|
||||||
updaters_.clear();
|
updaters_.clear();
|
||||||
|
|
||||||
for (auto const& kv : j_updaters) {
|
for (auto const& kv : j_updaters) {
|
||||||
std::unique_ptr<TreeUpdater> up(
|
auto name = kv.first;
|
||||||
TreeUpdater::Create(kv.first, ctx_, model_.learner_model_param->task));
|
if (n_gpus == 0 && name == "grow_gpu_hist") {
|
||||||
|
name = "grow_quantile_histmaker";
|
||||||
|
LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.";
|
||||||
|
}
|
||||||
|
std::unique_ptr<TreeUpdater> up{
|
||||||
|
TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)};
|
||||||
up->LoadConfig(kv.second);
|
up->LoadConfig(kv.second);
|
||||||
updaters_.push_back(std::move(up));
|
updaters_.push_back(std::move(up));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,9 +18,7 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs
|
|||||||
if (has_categorical && common::IsCat(cats.split_type, nid)) {
|
if (has_categorical && common::IsCat(cats.split_type, nid)) {
|
||||||
auto node_categories =
|
auto node_categories =
|
||||||
cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size);
|
cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size);
|
||||||
return common::Decision<true>(node_categories, fvalue, node.DefaultLeft())
|
return common::Decision(node_categories, fvalue) ? node.LeftChild() : node.RightChild();
|
||||||
? node.LeftChild()
|
|
||||||
: node.RightChild();
|
|
||||||
} else {
|
} else {
|
||||||
return node.LeftChild() + !(fvalue < node.SplitCond());
|
return node.LeftChild() + !(fvalue < node.SplitCond());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,10 +3,10 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef EVALUATE_SPLITS_CUH_
|
#ifndef EVALUATE_SPLITS_CUH_
|
||||||
#define EVALUATE_SPLITS_CUH_
|
#define EVALUATE_SPLITS_CUH_
|
||||||
#include <thrust/system/cuda/experimental/pinned_allocator.h>
|
|
||||||
#include <xgboost/span.h>
|
#include <xgboost/span.h>
|
||||||
|
|
||||||
#include "../../common/categorical.h"
|
#include "../../common/categorical.h"
|
||||||
|
#include "../../common/cuda_pinned_allocator.h"
|
||||||
#include "../split_evaluator.h"
|
#include "../split_evaluator.h"
|
||||||
#include "../updater_gpu_common.cuh"
|
#include "../updater_gpu_common.cuh"
|
||||||
#include "expand_entry.cuh"
|
#include "expand_entry.cuh"
|
||||||
@@ -57,7 +57,7 @@ struct CatAccessor {
|
|||||||
class GPUHistEvaluator {
|
class GPUHistEvaluator {
|
||||||
using CatST = common::CatBitField::value_type; // categorical storage type
|
using CatST = common::CatBitField::value_type; // categorical storage type
|
||||||
// use pinned memory to stage the categories, used for sort based splits.
|
// use pinned memory to stage the categories, used for sort based splits.
|
||||||
using Alloc = thrust::system::cuda::experimental::pinned_allocator<CatST>;
|
using Alloc = xgboost::common::cuda::pinned_allocator<CatST>;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TreeEvaluator tree_evaluator_;
|
TreeEvaluator tree_evaluator_;
|
||||||
|
|||||||
@@ -403,8 +403,7 @@ struct GPUHistMakerDevice {
|
|||||||
go_left = data.split_node.DefaultLeft();
|
go_left = data.split_node.DefaultLeft();
|
||||||
} else {
|
} else {
|
||||||
if (data.split_type == FeatureType::kCategorical) {
|
if (data.split_type == FeatureType::kCategorical) {
|
||||||
go_left = common::Decision<false>(data.node_cats.Bits(), cut_value,
|
go_left = common::Decision(data.node_cats.Bits(), cut_value);
|
||||||
data.split_node.DefaultLeft());
|
|
||||||
} else {
|
} else {
|
||||||
go_left = cut_value <= data.split_node.SplitCond();
|
go_left = cut_value <= data.split_node.SplitCond();
|
||||||
}
|
}
|
||||||
@@ -481,7 +480,7 @@ struct GPUHistMakerDevice {
|
|||||||
if (common::IsCat(d_feature_types, position)) {
|
if (common::IsCat(d_feature_types, position)) {
|
||||||
auto node_cats = categories.subspan(categories_segments[position].beg,
|
auto node_cats = categories.subspan(categories_segments[position].beg,
|
||||||
categories_segments[position].size);
|
categories_segments[position].size);
|
||||||
go_left = common::Decision<false>(node_cats, element, node.DefaultLeft());
|
go_left = common::Decision(node_cats, element);
|
||||||
} else {
|
} else {
|
||||||
go_left = element <= node.SplitCond();
|
go_left = element <= node.SplitCond();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ set -euo pipefail
|
|||||||
|
|
||||||
source tests/buildkite/conftest.sh
|
source tests/buildkite/conftest.sh
|
||||||
|
|
||||||
echo "--- Run Google Tests with CUDA, using 4 GPUs"
|
echo "--- Run Google Tests with CUDA, using a GPU"
|
||||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
|
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
|
||||||
chmod +x build/testxgboost
|
chmod +x build/testxgboost
|
||||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||||
@@ -12,11 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
|
|||||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||||
build/testxgboost
|
build/testxgboost
|
||||||
|
|
||||||
echo "--- Run Google Tests with CUDA, using 4 GPUs, RMM enabled"
|
# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved
|
||||||
rm -rfv build/
|
# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
|
||||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
|
# rm -rfv build/
|
||||||
chmod +x build/testxgboost
|
# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
|
||||||
tests/ci_build/ci_build.sh rmm nvidia-docker \
|
# chmod +x build/testxgboost
|
||||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
# tests/ci_build/ci_build.sh rmm nvidia-docker \
|
||||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
|
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||||
"source activate gpu_test && build/testxgboost --use-rmm-pool"
|
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
|
||||||
|
# "source activate gpu_test && build/testxgboost --use-rmm-pool"
|
||||||
|
|||||||
11
tests/ci_build/conda_env/cpp_test.yml
Normal file
11
tests/ci_build/conda_env/cpp_test.yml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# conda environment for CPP test on Linux distributions
|
||||||
|
name: cpp_test
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- cmake
|
||||||
|
- ninja
|
||||||
|
- c-compiler
|
||||||
|
- cxx-compiler
|
||||||
|
- gtest
|
||||||
13
tests/ci_build/conda_env/sdist_test.yml
Normal file
13
tests/ci_build/conda_env/sdist_test.yml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# conda environment for source distribution test.
|
||||||
|
name: sdist_test
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- python=3.8
|
||||||
|
- pip
|
||||||
|
- wheel
|
||||||
|
- cmake
|
||||||
|
- ninja
|
||||||
|
- c-compiler
|
||||||
|
- cxx-compiler
|
||||||
@@ -1,11 +1,14 @@
|
|||||||
/*!
|
/*!
|
||||||
* Copyright 2021 by XGBoost Contributors
|
* Copyright 2021-2022 by XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
#include <xgboost/json.h>
|
||||||
|
#include <xgboost/learner.h>
|
||||||
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
#include "../../../src/common/categorical.h"
|
#include "../../../src/common/categorical.h"
|
||||||
|
#include "../helpers.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
@@ -15,29 +18,76 @@ TEST(Categorical, Decision) {
|
|||||||
|
|
||||||
ASSERT_TRUE(common::InvalidCat(a));
|
ASSERT_TRUE(common::InvalidCat(a));
|
||||||
std::vector<uint32_t> cats(256, 0);
|
std::vector<uint32_t> cats(256, 0);
|
||||||
ASSERT_TRUE(Decision(cats, a, true));
|
ASSERT_TRUE(Decision(cats, a));
|
||||||
|
|
||||||
// larger than size
|
// larger than size
|
||||||
a = 256;
|
a = 256;
|
||||||
ASSERT_TRUE(Decision(cats, a, true));
|
ASSERT_TRUE(Decision(cats, a));
|
||||||
|
|
||||||
// negative
|
// negative
|
||||||
a = -1;
|
a = -1;
|
||||||
ASSERT_TRUE(Decision(cats, a, true));
|
ASSERT_TRUE(Decision(cats, a));
|
||||||
|
|
||||||
CatBitField bits{cats};
|
CatBitField bits{cats};
|
||||||
bits.Set(0);
|
bits.Set(0);
|
||||||
a = -0.5;
|
a = -0.5;
|
||||||
ASSERT_TRUE(Decision(cats, a, true));
|
ASSERT_TRUE(Decision(cats, a));
|
||||||
|
|
||||||
// round toward 0
|
// round toward 0
|
||||||
a = 0.5;
|
a = 0.5;
|
||||||
ASSERT_FALSE(Decision(cats, a, true));
|
ASSERT_FALSE(Decision(cats, a));
|
||||||
|
|
||||||
// valid
|
// valid
|
||||||
a = 13;
|
a = 13;
|
||||||
bits.Set(a);
|
bits.Set(a);
|
||||||
ASSERT_FALSE(Decision(bits.Bits(), a, true));
|
ASSERT_FALSE(Decision(bits.Bits(), a));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for running inference with input category greater than the one stored in tree.
|
||||||
|
*/
|
||||||
|
TEST(Categorical, MinimalSet) {
|
||||||
|
std::size_t constexpr kRows = 256, kCols = 1, kCat = 3;
|
||||||
|
std::vector<FeatureType> types{FeatureType::kCategorical};
|
||||||
|
auto Xy =
|
||||||
|
RandomDataGenerator{kRows, kCols, 0.0}.Type(types).MaxCategory(kCat).GenerateDMatrix(true);
|
||||||
|
|
||||||
|
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||||
|
learner->SetParam("max_depth", "1");
|
||||||
|
learner->SetParam("tree_method", "hist");
|
||||||
|
learner->Configure();
|
||||||
|
learner->UpdateOneIter(0, Xy);
|
||||||
|
|
||||||
|
Json model{Object{}};
|
||||||
|
learner->SaveModel(&model);
|
||||||
|
auto tree = model["learner"]["gradient_booster"]["model"]["trees"][0];
|
||||||
|
ASSERT_GE(get<I32Array const>(tree["categories"]).size(), 1);
|
||||||
|
auto v = get<I32Array const>(tree["categories"])[0];
|
||||||
|
|
||||||
|
HostDeviceVector<float> predt;
|
||||||
|
{
|
||||||
|
std::vector<float> data{static_cast<float>(kCat),
|
||||||
|
static_cast<float>(kCat + 1), 32.0f, 33.0f, 34.0f};
|
||||||
|
auto test = GetDMatrixFromData(data, data.size(), kCols);
|
||||||
|
learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
|
||||||
|
ASSERT_EQ(predt.Size(), data.size());
|
||||||
|
auto const& h_predt = predt.ConstHostSpan();
|
||||||
|
for (auto v : h_predt) {
|
||||||
|
ASSERT_EQ(v, 1); // left child of root node
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||||
|
learner->LoadModel(model);
|
||||||
|
std::vector<float> data = {static_cast<float>(v)};
|
||||||
|
auto test = GetDMatrixFromData(data, data.size(), kCols);
|
||||||
|
learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
|
||||||
|
auto const& h_predt = predt.ConstHostSpan();
|
||||||
|
for (auto v : h_predt) {
|
||||||
|
ASSERT_EQ(v, 2); // right child of root node
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // namespace common
|
} // namespace common
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@@ -33,9 +33,8 @@ TEST(ArrayInterface, Error) {
|
|||||||
Json column { Object() };
|
Json column { Object() };
|
||||||
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
||||||
column["shape"] = Array(j_shape);
|
column["shape"] = Array(j_shape);
|
||||||
std::vector<Json> j_data {
|
std::vector<Json> j_data{Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),
|
||||||
Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),
|
Json(Boolean(false))};
|
||||||
Json(Boolean(false))};
|
|
||||||
|
|
||||||
auto const& column_obj = get<Object>(column);
|
auto const& column_obj = get<Object>(column);
|
||||||
std::string typestr{"<f4"};
|
std::string typestr{"<f4"};
|
||||||
@@ -45,6 +44,10 @@ TEST(ArrayInterface, Error) {
|
|||||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), dmlc::Error);
|
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), dmlc::Error);
|
||||||
column["version"] = 3;
|
column["version"] = 3;
|
||||||
// missing data
|
// missing data
|
||||||
|
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
|
||||||
|
dmlc::Error);
|
||||||
|
// null data
|
||||||
|
column["data"] = Null{};
|
||||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
|
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
|
||||||
dmlc::Error);
|
dmlc::Error);
|
||||||
column["data"] = j_data;
|
column["data"] = j_data;
|
||||||
@@ -63,6 +66,11 @@ TEST(ArrayInterface, Error) {
|
|||||||
Json(Boolean(false))};
|
Json(Boolean(false))};
|
||||||
column["data"] = j_data;
|
column["data"] = j_data;
|
||||||
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n));
|
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n));
|
||||||
|
// null data in mask
|
||||||
|
column["mask"] = Object{};
|
||||||
|
column["mask"]["data"] = Null{};
|
||||||
|
common::Span<RBitField8::value_type> s_mask;
|
||||||
|
EXPECT_THROW(ArrayInterfaceHandler::ExtractMask(column_obj, &s_mask), dmlc::Error);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ArrayInterface, GetElement) {
|
TEST(ArrayInterface, GetElement) {
|
||||||
|
|||||||
@@ -39,6 +39,37 @@ def test_rabit_communicator():
|
|||||||
assert worker.exitcode == 0
|
assert worker.exitcode == 0
|
||||||
|
|
||||||
|
|
||||||
|
# TODO(rongou): remove this once we remove the rabit api.
|
||||||
|
def run_rabit_api_worker(rabit_env, world_size):
|
||||||
|
with xgb.rabit.RabitContext(rabit_env):
|
||||||
|
assert xgb.rabit.get_world_size() == world_size
|
||||||
|
assert xgb.rabit.is_distributed()
|
||||||
|
assert xgb.rabit.get_processor_name().decode() == socket.gethostname()
|
||||||
|
ret = xgb.rabit.broadcast('test1234', 0)
|
||||||
|
assert str(ret) == 'test1234'
|
||||||
|
ret = xgb.rabit.allreduce(np.asarray([1, 2, 3]), xgb.rabit.Op.SUM)
|
||||||
|
assert np.array_equal(ret, np.asarray([2, 4, 6]))
|
||||||
|
|
||||||
|
|
||||||
|
# TODO(rongou): remove this once we remove the rabit api.
|
||||||
|
def test_rabit_api():
|
||||||
|
world_size = 2
|
||||||
|
tracker = RabitTracker(host_ip='127.0.0.1', n_workers=world_size)
|
||||||
|
tracker.start(world_size)
|
||||||
|
rabit_env = []
|
||||||
|
for k, v in tracker.worker_envs().items():
|
||||||
|
rabit_env.append(f"{k}={v}".encode())
|
||||||
|
workers = []
|
||||||
|
for _ in range(world_size):
|
||||||
|
worker = multiprocessing.Process(target=run_rabit_api_worker,
|
||||||
|
args=(rabit_env, world_size))
|
||||||
|
workers.append(worker)
|
||||||
|
worker.start()
|
||||||
|
for worker in workers:
|
||||||
|
worker.join()
|
||||||
|
assert worker.exitcode == 0
|
||||||
|
|
||||||
|
|
||||||
def run_federated_worker(port, world_size, rank):
|
def run_federated_worker(port, world_size, rank):
|
||||||
with xgb.collective.CommunicatorContext(xgboost_communicator='federated',
|
with xgb.collective.CommunicatorContext(xgboost_communicator='federated',
|
||||||
federated_server_address=f'localhost:{port}',
|
federated_server_address=f'localhost:{port}',
|
||||||
|
|||||||
@@ -390,28 +390,6 @@ class XgboostLocalTest(SparkTestCase):
|
|||||||
"expected_prediction_with_base_margin",
|
"expected_prediction_with_base_margin",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
self.ranker_df_train = self.session.createDataFrame(
|
|
||||||
[
|
|
||||||
(Vectors.dense(1.0, 2.0, 3.0), 0, 0),
|
|
||||||
(Vectors.dense(4.0, 5.0, 6.0), 1, 0),
|
|
||||||
(Vectors.dense(9.0, 4.0, 8.0), 2, 0),
|
|
||||||
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
|
|
||||||
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
|
|
||||||
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
|
|
||||||
],
|
|
||||||
["features", "label", "qid"],
|
|
||||||
)
|
|
||||||
self.ranker_df_test = self.session.createDataFrame(
|
|
||||||
[
|
|
||||||
(Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988),
|
|
||||||
(Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556),
|
|
||||||
(Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570),
|
|
||||||
(Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988),
|
|
||||||
(Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612),
|
|
||||||
(Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826),
|
|
||||||
],
|
|
||||||
["features", "qid", "expected_prediction"],
|
|
||||||
)
|
|
||||||
|
|
||||||
self.reg_df_sparse_train = self.session.createDataFrame(
|
self.reg_df_sparse_train = self.session.createDataFrame(
|
||||||
[
|
[
|
||||||
@@ -1039,15 +1017,6 @@ class XgboostLocalTest(SparkTestCase):
|
|||||||
for row1, row2 in zip(pred_result, pred_result2):
|
for row1, row2 in zip(pred_result, pred_result2):
|
||||||
self.assertTrue(np.allclose(row1.probability, row2.probability, rtol=1e-3))
|
self.assertTrue(np.allclose(row1.probability, row2.probability, rtol=1e-3))
|
||||||
|
|
||||||
def test_ranker(self):
|
|
||||||
ranker = SparkXGBRanker(qid_col="qid")
|
|
||||||
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
|
|
||||||
model = ranker.fit(self.ranker_df_train)
|
|
||||||
pred_result = model.transform(self.ranker_df_test).collect()
|
|
||||||
|
|
||||||
for row in pred_result:
|
|
||||||
assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3)
|
|
||||||
|
|
||||||
def test_empty_validation_data(self) -> None:
|
def test_empty_validation_data(self) -> None:
|
||||||
for tree_method in [
|
for tree_method in [
|
||||||
"hist",
|
"hist",
|
||||||
@@ -1130,3 +1099,63 @@ class XgboostLocalTest(SparkTestCase):
|
|||||||
def test_unsupported_params(self):
|
def test_unsupported_params(self):
|
||||||
with pytest.raises(ValueError, match="evals_result"):
|
with pytest.raises(ValueError, match="evals_result"):
|
||||||
SparkXGBClassifier(evals_result={})
|
SparkXGBClassifier(evals_result={})
|
||||||
|
|
||||||
|
|
||||||
|
class XgboostRankerLocalTest(SparkTestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.session.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "8")
|
||||||
|
self.ranker_df_train = self.session.createDataFrame(
|
||||||
|
[
|
||||||
|
(Vectors.dense(1.0, 2.0, 3.0), 0, 0),
|
||||||
|
(Vectors.dense(4.0, 5.0, 6.0), 1, 0),
|
||||||
|
(Vectors.dense(9.0, 4.0, 8.0), 2, 0),
|
||||||
|
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
|
||||||
|
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
|
||||||
|
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
|
||||||
|
],
|
||||||
|
["features", "label", "qid"],
|
||||||
|
)
|
||||||
|
self.ranker_df_test = self.session.createDataFrame(
|
||||||
|
[
|
||||||
|
(Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988),
|
||||||
|
(Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556),
|
||||||
|
(Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570),
|
||||||
|
(Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988),
|
||||||
|
(Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612),
|
||||||
|
(Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826),
|
||||||
|
],
|
||||||
|
["features", "qid", "expected_prediction"],
|
||||||
|
)
|
||||||
|
self.ranker_df_train_1 = self.session.createDataFrame(
|
||||||
|
[
|
||||||
|
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9),
|
||||||
|
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9),
|
||||||
|
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9),
|
||||||
|
(Vectors.dense(1.0, 2.0, 3.0), 0, 8),
|
||||||
|
(Vectors.dense(4.0, 5.0, 6.0), 1, 8),
|
||||||
|
(Vectors.dense(9.0, 4.0, 8.0), 2, 8),
|
||||||
|
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7),
|
||||||
|
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7),
|
||||||
|
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7),
|
||||||
|
(Vectors.dense(1.0, 2.0, 3.0), 0, 6),
|
||||||
|
(Vectors.dense(4.0, 5.0, 6.0), 1, 6),
|
||||||
|
(Vectors.dense(9.0, 4.0, 8.0), 2, 6),
|
||||||
|
]
|
||||||
|
* 4,
|
||||||
|
["features", "label", "qid"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_ranker(self):
|
||||||
|
ranker = SparkXGBRanker(qid_col="qid")
|
||||||
|
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
|
||||||
|
model = ranker.fit(self.ranker_df_train)
|
||||||
|
pred_result = model.transform(self.ranker_df_test).collect()
|
||||||
|
|
||||||
|
for row in pred_result:
|
||||||
|
assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3)
|
||||||
|
|
||||||
|
def test_ranker_qid_sorted(self):
|
||||||
|
ranker = SparkXGBRanker(qid_col="qid", num_workers=4)
|
||||||
|
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
|
||||||
|
model = ranker.fit(self.ranker_df_train_1)
|
||||||
|
model.transform(self.ranker_df_test).collect()
|
||||||
|
|||||||
@@ -112,7 +112,6 @@ class TestPandas:
|
|||||||
|
|
||||||
# test Index as columns
|
# test Index as columns
|
||||||
df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2]))
|
df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2]))
|
||||||
print(df.columns, isinstance(df.columns, pd.Index))
|
|
||||||
Xy = xgb.DMatrix(df)
|
Xy = xgb.DMatrix(df)
|
||||||
np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"]))
|
np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"]))
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import pytest
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import shap
|
import shap
|
||||||
except ImportError:
|
except Exception:
|
||||||
shap = None
|
shap = None
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import collections
|
|||||||
import importlib.util
|
import importlib.util
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import pickle
|
||||||
import random
|
import random
|
||||||
import tempfile
|
import tempfile
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
@@ -636,26 +637,74 @@ def test_sklearn_n_jobs():
|
|||||||
|
|
||||||
def test_parameters_access():
|
def test_parameters_access():
|
||||||
from sklearn import datasets
|
from sklearn import datasets
|
||||||
params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1}
|
|
||||||
|
params = {"updater": "grow_gpu_hist", "subsample": 0.5, "n_jobs": -1}
|
||||||
clf = xgb.XGBClassifier(n_estimators=1000, **params)
|
clf = xgb.XGBClassifier(n_estimators=1000, **params)
|
||||||
assert clf.get_params()['updater'] == 'grow_gpu_hist'
|
assert clf.get_params()["updater"] == "grow_gpu_hist"
|
||||||
assert clf.get_params()['subsample'] == .5
|
assert clf.get_params()["subsample"] == 0.5
|
||||||
assert clf.get_params()['n_estimators'] == 1000
|
assert clf.get_params()["n_estimators"] == 1000
|
||||||
|
|
||||||
clf = xgb.XGBClassifier(n_estimators=1, nthread=4)
|
clf = xgb.XGBClassifier(n_estimators=1, nthread=4)
|
||||||
X, y = datasets.load_iris(return_X_y=True)
|
X, y = datasets.load_iris(return_X_y=True)
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
|
|
||||||
config = json.loads(clf.get_booster().save_config())
|
config = json.loads(clf.get_booster().save_config())
|
||||||
assert int(config['learner']['generic_param']['nthread']) == 4
|
assert int(config["learner"]["generic_param"]["nthread"]) == 4
|
||||||
|
|
||||||
clf.set_params(nthread=16)
|
clf.set_params(nthread=16)
|
||||||
config = json.loads(clf.get_booster().save_config())
|
config = json.loads(clf.get_booster().save_config())
|
||||||
assert int(config['learner']['generic_param']['nthread']) == 16
|
assert int(config["learner"]["generic_param"]["nthread"]) == 16
|
||||||
|
|
||||||
clf.predict(X)
|
clf.predict(X)
|
||||||
config = json.loads(clf.get_booster().save_config())
|
config = json.loads(clf.get_booster().save_config())
|
||||||
assert int(config['learner']['generic_param']['nthread']) == 16
|
assert int(config["learner"]["generic_param"]["nthread"]) == 16
|
||||||
|
|
||||||
|
clf = xgb.XGBClassifier(n_estimators=2)
|
||||||
|
assert clf.tree_method is None
|
||||||
|
assert clf.get_params()["tree_method"] is None
|
||||||
|
clf.fit(X, y)
|
||||||
|
assert clf.get_params()["tree_method"] is None
|
||||||
|
|
||||||
|
def save_load(clf: xgb.XGBClassifier) -> xgb.XGBClassifier:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
path = os.path.join(tmpdir, "model.json")
|
||||||
|
clf.save_model(path)
|
||||||
|
clf = xgb.XGBClassifier()
|
||||||
|
clf.load_model(path)
|
||||||
|
return clf
|
||||||
|
|
||||||
|
def get_tm(clf: xgb.XGBClassifier) -> str:
|
||||||
|
tm = json.loads(clf.get_booster().save_config())["learner"]["gradient_booster"][
|
||||||
|
"gbtree_train_param"
|
||||||
|
]["tree_method"]
|
||||||
|
return tm
|
||||||
|
|
||||||
|
assert get_tm(clf) == "exact"
|
||||||
|
|
||||||
|
clf = pickle.loads(pickle.dumps(clf))
|
||||||
|
|
||||||
|
assert clf.tree_method is None
|
||||||
|
assert clf.n_estimators == 2
|
||||||
|
assert clf.get_params()["tree_method"] is None
|
||||||
|
assert clf.get_params()["n_estimators"] == 2
|
||||||
|
assert get_tm(clf) == "exact" # preserved for pickle
|
||||||
|
|
||||||
|
clf = save_load(clf)
|
||||||
|
|
||||||
|
assert clf.tree_method is None
|
||||||
|
assert clf.n_estimators == 2
|
||||||
|
assert clf.get_params()["tree_method"] is None
|
||||||
|
assert clf.get_params()["n_estimators"] == 2
|
||||||
|
assert get_tm(clf) == "auto" # discarded for save/load_model
|
||||||
|
|
||||||
|
clf.set_params(tree_method="hist")
|
||||||
|
assert clf.get_params()["tree_method"] == "hist"
|
||||||
|
clf = pickle.loads(pickle.dumps(clf))
|
||||||
|
assert clf.get_params()["tree_method"] == "hist"
|
||||||
|
clf = save_load(clf)
|
||||||
|
# FIXME(jiamingy): We should remove this behavior once we remove parameters
|
||||||
|
# serialization for skl save/load_model.
|
||||||
|
assert clf.get_params()["tree_method"] == "hist"
|
||||||
|
|
||||||
|
|
||||||
def test_kwargs_error():
|
def test_kwargs_error():
|
||||||
@@ -695,13 +744,19 @@ def test_sklearn_clone():
|
|||||||
|
|
||||||
def test_sklearn_get_default_params():
|
def test_sklearn_get_default_params():
|
||||||
from sklearn.datasets import load_digits
|
from sklearn.datasets import load_digits
|
||||||
|
|
||||||
digits_2class = load_digits(n_class=2)
|
digits_2class = load_digits(n_class=2)
|
||||||
X = digits_2class['data']
|
X = digits_2class["data"]
|
||||||
y = digits_2class['target']
|
y = digits_2class["target"]
|
||||||
cls = xgb.XGBClassifier()
|
cls = xgb.XGBClassifier()
|
||||||
assert cls.get_params()['base_score'] is None
|
assert cls.get_params()["base_score"] is None
|
||||||
cls.fit(X[:4, ...], y[:4, ...])
|
cls.fit(X[:4, ...], y[:4, ...])
|
||||||
assert cls.get_params()['base_score'] is not None
|
base_score = float(
|
||||||
|
json.loads(cls.get_booster().save_config())["learner"]["learner_model_param"][
|
||||||
|
"base_score"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
np.testing.assert_equal(base_score, 0.5)
|
||||||
|
|
||||||
|
|
||||||
def run_validation_weights(model):
|
def run_validation_weights(model):
|
||||||
@@ -1029,9 +1084,9 @@ def test_pandas_input():
|
|||||||
|
|
||||||
clf_isotonic = CalibratedClassifierCV(model, cv="prefit", method="isotonic")
|
clf_isotonic = CalibratedClassifierCV(model, cv="prefit", method="isotonic")
|
||||||
clf_isotonic.fit(train, target)
|
clf_isotonic.fit(train, target)
|
||||||
assert isinstance(
|
clf = clf_isotonic.calibrated_classifiers_[0]
|
||||||
clf_isotonic.calibrated_classifiers_[0].base_estimator, xgb.XGBClassifier
|
est = clf.estimator if hasattr(clf, "estimator") else clf.base_estimator
|
||||||
)
|
assert isinstance(est, xgb.XGBClassifier)
|
||||||
np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1]))
|
np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1]))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user