Compare commits
52 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
36eb41c960 | ||
|
|
39ddf40a8d | ||
|
|
573f1c7db4 | ||
|
|
abc80d2a6d | ||
|
|
e882fb3262 | ||
|
|
3218f6cd3c | ||
|
|
a962611de7 | ||
|
|
14476e8868 | ||
|
|
03f3879b71 | ||
|
|
21d95f3d8f | ||
|
|
5cd4015d70 | ||
|
|
b8c6b86792 | ||
|
|
1baebe231b | ||
|
|
365da0b8f4 | ||
|
|
f5f03dfb61 | ||
|
|
a1c209182d | ||
|
|
4be75d852c | ||
|
|
ba50e6eb62 | ||
|
|
36ad160501 | ||
|
|
c22f6db4bf | ||
|
|
f15a6d2b19 | ||
|
|
08a547f5c2 | ||
|
|
60303db2ee | ||
|
|
df984f9c43 | ||
|
|
2f22f8d49b | ||
|
|
68d86336d7 | ||
|
|
76bdca072a | ||
|
|
021e6a842a | ||
|
|
e5bef4ffce | ||
|
|
10bb0a74ef | ||
|
|
e803d06d8c | ||
|
|
ccf43d4ba0 | ||
|
|
dd58c2ac47 | ||
|
|
899e4c8988 | ||
|
|
a2085bf223 | ||
|
|
067b704e58 | ||
|
|
1a834b2b85 | ||
|
|
162b48a1a4 | ||
|
|
83a078b7e5 | ||
|
|
575fba651b | ||
|
|
62ed8b5fef | ||
|
|
a980e10744 | ||
|
|
59c54e361b | ||
|
|
60a8c8ebba | ||
|
|
58bc225657 | ||
|
|
850b53100f | ||
|
|
67b657dad0 | ||
|
|
db14e3feb7 | ||
|
|
9372370dda | ||
|
|
1136a7e0c3 | ||
|
|
a347cd512b | ||
|
|
9ff0c0832a |
87
.github/workflows/main.yml
vendored
87
.github/workflows/main.yml
vendored
@@ -75,19 +75,18 @@ jobs:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
- name: Install system packages
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends ninja-build
|
||||
- uses: conda-incubator/setup-miniconda@v2
|
||||
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
|
||||
with:
|
||||
auto-update-conda: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
activate-environment: test
|
||||
cache-downloads: true
|
||||
cache-env: true
|
||||
environment-name: cpp_test
|
||||
environment-file: tests/ci_build/conda_env/cpp_test.yml
|
||||
- name: Display Conda env
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
conda info
|
||||
conda list
|
||||
|
||||
- name: Build and install XGBoost static library
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
@@ -109,6 +108,7 @@ jobs:
|
||||
cd ..
|
||||
rm -rf ./build
|
||||
popd
|
||||
|
||||
- name: Build and install XGBoost shared library
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
@@ -148,66 +148,13 @@ jobs:
|
||||
run: |
|
||||
LINT_LANG=cpp make lint
|
||||
|
||||
doxygen:
|
||||
runs-on: ubuntu-latest
|
||||
name: Generate C/C++ API doc using Doxygen
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
architecture: 'x64'
|
||||
- name: Install system packages
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends doxygen graphviz ninja-build
|
||||
python -m pip install wheel setuptools
|
||||
python -m pip install awscli
|
||||
- name: Run Doxygen
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DBUILD_C_DOC=ON -GNinja
|
||||
ninja -v doc_doxygen
|
||||
- name: Extract branch name
|
||||
shell: bash
|
||||
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
|
||||
id: extract_branch
|
||||
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
|
||||
- name: Publish
|
||||
run: |
|
||||
cd build/
|
||||
tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doc_doxygen/
|
||||
python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
|
||||
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
|
||||
|
||||
sphinx:
|
||||
runs-on: ubuntu-latest
|
||||
name: Build docs using Sphinx
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
architecture: 'x64'
|
||||
- name: Install system packages
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends graphviz
|
||||
python -m pip install wheel setuptools
|
||||
python -m pip install -r doc/requirements.txt
|
||||
- name: Extract branch name
|
||||
shell: bash
|
||||
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
|
||||
id: extract_branch
|
||||
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
|
||||
- name: Run Sphinx
|
||||
run: |
|
||||
make -C doc html
|
||||
env:
|
||||
SPHINX_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
|
||||
python3 dmlc-core/scripts/lint.py --exclude_path \
|
||||
python-package/xgboost/dmlc-core \
|
||||
python-package/xgboost/include \
|
||||
python-package/xgboost/lib \
|
||||
python-package/xgboost/rabit \
|
||||
python-package/xgboost/src \
|
||||
--pylint-rc python-package/.pylintrc \
|
||||
xgboost \
|
||||
cpp \
|
||||
include src python-package
|
||||
|
||||
150
.github/workflows/python_tests.yml
vendored
150
.github/workflows/python_tests.yml
vendored
@@ -41,12 +41,46 @@ jobs:
|
||||
run: |
|
||||
python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1
|
||||
|
||||
python-sdist-test:
|
||||
python-sdist-test-on-Linux:
|
||||
# Mismatched glibcxx version between system and conda forge.
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: Test installing XGBoost Python source package on ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-11, windows-latest]
|
||||
os: [ubuntu-latest]
|
||||
steps:
|
||||
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
|
||||
with:
|
||||
submodules: 'true'
|
||||
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
|
||||
with:
|
||||
cache-downloads: true
|
||||
cache-env: false
|
||||
environment-name: sdist_test
|
||||
environment-file: tests/ci_build/conda_env/sdist_test.yml
|
||||
- name: Display Conda env
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
conda info
|
||||
conda list
|
||||
- name: Build and install XGBoost
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
cd python-package
|
||||
python --version
|
||||
python setup.py sdist
|
||||
pip install -v ./dist/xgboost-*.tar.gz
|
||||
cd ..
|
||||
python -c 'import xgboost'
|
||||
|
||||
python-sdist-test:
|
||||
# Use system toolchain instead of conda toolchain for macos and windows.
|
||||
# MacOS has linker error if clang++ from conda-forge is used
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: Test installing XGBoost Python source package on ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [macos-11, windows-latest]
|
||||
python-version: ["3.8"]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@@ -56,11 +90,7 @@ jobs:
|
||||
if: matrix.os == 'macos-11'
|
||||
run: |
|
||||
brew install ninja libomp
|
||||
- name: Install Ubuntu system dependencies
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends ninja-build
|
||||
- uses: conda-incubator/setup-miniconda@v2
|
||||
- uses: conda-incubator/setup-miniconda@35d1405e78aa3f784fe3ce9a2eb378d5eeb62169 # v2.1.1
|
||||
with:
|
||||
auto-update-conda: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
@@ -80,6 +110,58 @@ jobs:
|
||||
cd ..
|
||||
python -c 'import xgboost'
|
||||
|
||||
python-tests-on-macos:
|
||||
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- {os: macos-11}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
- uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
|
||||
with:
|
||||
cache-downloads: true
|
||||
cache-env: false
|
||||
environment-name: macos_test
|
||||
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
|
||||
|
||||
- name: Display Conda env
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
conda info
|
||||
conda list
|
||||
|
||||
- name: Build XGBoost on macos
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
brew install ninja
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
# Set prefix, to use OpenMP library from Conda env
|
||||
# See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
|
||||
# to learn why we don't use libomp from Homebrew.
|
||||
cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
|
||||
ninja
|
||||
|
||||
- name: Install Python package
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
cd python-package
|
||||
python --version
|
||||
python setup.py install
|
||||
|
||||
- name: Test Python package
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
pytest -s -v -rxXs --durations=0 ./tests/python
|
||||
|
||||
python-tests-on-win:
|
||||
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
@@ -125,56 +207,4 @@ jobs:
|
||||
- name: Test Python package
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
pytest -s -v ./tests/python
|
||||
|
||||
python-tests-on-macos:
|
||||
name: Test XGBoost Python package on ${{ matrix.config.os }}
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
timeout-minutes: 90
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- {os: macos-11, python-version "3.8" }
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
- uses: conda-incubator/setup-miniconda@v2
|
||||
with:
|
||||
auto-update-conda: true
|
||||
python-version: ${{ matrix.config.python-version }}
|
||||
activate-environment: macos_test
|
||||
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
|
||||
|
||||
- name: Display Conda env
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
conda info
|
||||
conda list
|
||||
|
||||
- name: Build XGBoost on macos
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
brew install ninja
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
# Set prefix, to use OpenMP library from Conda env
|
||||
# See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
|
||||
# to learn why we don't use libomp from Homebrew.
|
||||
cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
|
||||
ninja
|
||||
|
||||
- name: Install Python package
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
cd python-package
|
||||
python --version
|
||||
python setup.py install
|
||||
|
||||
- name: Test Python package
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
pytest -s -v ./tests/python
|
||||
pytest -s -v -rxXs --durations=0 ./tests/python
|
||||
|
||||
6
.github/workflows/r_tests.yml
vendored
6
.github/workflows/r_tests.yml
vendored
@@ -5,6 +5,7 @@ on: [push, pull_request]
|
||||
env:
|
||||
R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
||||
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
||||
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
@@ -68,6 +69,7 @@ jobs:
|
||||
- {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'}
|
||||
env:
|
||||
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
|
||||
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
|
||||
RSPM: ${{ matrix.config.rspm }}
|
||||
|
||||
steps:
|
||||
@@ -121,6 +123,10 @@ jobs:
|
||||
config:
|
||||
- {r: 'release'}
|
||||
|
||||
env:
|
||||
_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5
|
||||
MAKE: "make -j$(nproc)"
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.7.1)
|
||||
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.7.6)
|
||||
include(cmake/Utils.cmake)
|
||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||
cmake_policy(SET CMP0022 NEW)
|
||||
@@ -168,9 +168,6 @@ find_package(Threads REQUIRED)
|
||||
|
||||
if (USE_OPENMP)
|
||||
if (APPLE)
|
||||
# Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating
|
||||
# OpenMP on Mac. See https://github.com/dmlc/xgboost/pull/5146#issuecomment-568312706
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
find_package(OpenMP)
|
||||
if (NOT OpenMP_FOUND)
|
||||
# Try again with extra path info; required for libomp 15+ from Homebrew
|
||||
|
||||
1
Makefile
1
Makefile
@@ -126,7 +126,6 @@ Rpack: clean_all
|
||||
cat R-package/src/Makevars.in|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.in
|
||||
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
|
||||
rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it
|
||||
rm -f xgboost/cleanup
|
||||
bash R-package/remove_warning_suppression_pragma.sh
|
||||
bash xgboost/remove_warning_suppression_pragma.sh
|
||||
rm xgboost/remove_warning_suppression_pragma.sh
|
||||
|
||||
@@ -31,7 +31,7 @@ if (USE_OPENMP)
|
||||
endif (USE_OPENMP)
|
||||
set_target_properties(
|
||||
xgboost-r PROPERTIES
|
||||
CXX_STANDARD 14
|
||||
CXX_STANDARD 17
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: Extreme Gradient Boosting
|
||||
Version: 1.7.1.1
|
||||
Date: 2022-11-03
|
||||
Version: 1.7.6.1
|
||||
Date: 2023-06-16
|
||||
Authors@R: c(
|
||||
person("Tianqi", "Chen", role = c("aut"),
|
||||
email = "tianqi.tchen@gmail.com"),
|
||||
@@ -66,5 +66,6 @@ Imports:
|
||||
methods,
|
||||
data.table (>= 1.9.6),
|
||||
jsonlite (>= 1.0),
|
||||
RoxygenNote: 7.1.1
|
||||
SystemRequirements: GNU make, C++14
|
||||
RoxygenNote: 7.2.3
|
||||
Encoding: UTF-8
|
||||
SystemRequirements: GNU make, C++17
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
Copyright (c) 2014 by Tianqi Chen and Contributors
|
||||
Copyright (c) 2014-2023, Tianqi Chen and XBGoost Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
|
||||
@@ -544,9 +544,11 @@ cb.cv.predict <- function(save_models = FALSE) {
|
||||
#'
|
||||
#' @return
|
||||
#' Results are stored in the \code{coefs} element of the closure.
|
||||
#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it.
|
||||
#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy
|
||||
#' way to access it.
|
||||
#' With \code{xgb.train}, it is either a dense of a sparse matrix.
|
||||
#' While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices.
|
||||
#' While with \code{xgb.cv}, it is a list (an element per each fold) of such
|
||||
#' matrices.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
|
||||
@@ -558,7 +560,7 @@ cb.cv.predict <- function(save_models = FALSE) {
|
||||
#' # without considering the 2nd order interactions:
|
||||
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||
#' colnames(x)
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
||||
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||
@@ -583,14 +585,14 @@ cb.cv.predict <- function(save_models = FALSE) {
|
||||
#'
|
||||
#' # For xgb.cv:
|
||||
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||
#' callbacks = list(cb.gblinear.history()))
|
||||
#' callbacks = list(cb.gblinear.history()))
|
||||
#' # coefficients in the CV fold #3
|
||||
#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||
#'
|
||||
#'
|
||||
#' #### Multiclass classification:
|
||||
#' #
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1)
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
|
||||
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
||||
|
||||
@@ -328,8 +328,9 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
|
||||
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
|
||||
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
||||
|
||||
if (!inherits(newdata, "xgb.DMatrix"))
|
||||
newdata <- xgb.DMatrix(newdata, missing = missing)
|
||||
newdata <- xgb.DMatrix(newdata, missing = missing, nthread = NVL(object$params[["nthread"]], -1))
|
||||
if (!is.null(object[["feature_names"]]) &&
|
||||
!is.null(colnames(newdata)) &&
|
||||
!identical(object[["feature_names"]], colnames(newdata)))
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||
@@ -110,7 +110,7 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL, nth
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||
#'
|
||||
#' stopifnot(nrow(dtrain) == nrow(train$data))
|
||||
#' stopifnot(ncol(dtrain) == ncol(train$data))
|
||||
@@ -138,7 +138,7 @@ dim.xgb.DMatrix <- function(x) {
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||
#' dimnames(dtrain)
|
||||
#' colnames(dtrain)
|
||||
#' colnames(dtrain) <- make.names(1:ncol(train$data))
|
||||
@@ -193,7 +193,7 @@ dimnames.xgb.DMatrix <- function(x) {
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#'
|
||||
#' labels <- getinfo(dtrain, 'label')
|
||||
#' setinfo(dtrain, 'label', 1-labels)
|
||||
@@ -249,7 +249,7 @@ getinfo.xgb.DMatrix <- function(object, name, ...) {
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#'
|
||||
#' labels <- getinfo(dtrain, 'label')
|
||||
#' setinfo(dtrain, 'label', 1-labels)
|
||||
@@ -345,7 +345,7 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) {
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#'
|
||||
#' dsub <- slice(dtrain, 1:42)
|
||||
#' labels1 <- getinfo(dsub, 'label')
|
||||
@@ -401,7 +401,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#'
|
||||
#' dtrain
|
||||
#' print(dtrain, verbose=TRUE)
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||
|
||||
@@ -48,8 +48,8 @@
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#'
|
||||
#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
#' nrounds = 4
|
||||
@@ -65,8 +65,12 @@
|
||||
#' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
||||
#'
|
||||
#' # learning with new features
|
||||
#' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
||||
#' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
||||
#' new.dtrain <- xgb.DMatrix(
|
||||
#' data = new.features.train, label = agaricus.train$label, nthread = 2
|
||||
#' )
|
||||
#' new.dtest <- xgb.DMatrix(
|
||||
#' data = new.features.test, label = agaricus.test$label, nthread = 2
|
||||
#' )
|
||||
#' watchlist <- list(train = new.dtrain)
|
||||
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||
#'
|
||||
@@ -79,7 +83,7 @@
|
||||
#' accuracy.after, "!\n"))
|
||||
#'
|
||||
#' @export
|
||||
xgb.create.features <- function(model, data, ...){
|
||||
xgb.create.features <- function(model, data, ...) {
|
||||
check.deprecation(...)
|
||||
pred_with_leaf <- predict(model, data, predleaf = TRUE)
|
||||
cols <- lapply(as.data.frame(pred_with_leaf), factor)
|
||||
|
||||
@@ -110,9 +110,9 @@
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
||||
#' max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||
#' max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||
#' print(cv)
|
||||
#' print(cv, verbose=TRUE)
|
||||
#'
|
||||
@@ -192,7 +192,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
|
||||
# create the booster-folds
|
||||
# train_folds
|
||||
dall <- xgb.get.DMatrix(data, label, missing)
|
||||
dall <- xgb.get.DMatrix(data, label, missing, nthread = params$nthread)
|
||||
bst_folds <- lapply(seq_along(folds), function(k) {
|
||||
dtest <- slice(dall, folds[[k]])
|
||||
# code originally contributed by @RolandASc on stackoverflow
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
#' The branches that also used for missing values are marked as bold
|
||||
#' (as in "carrying extra capacity").
|
||||
#'
|
||||
#' This function uses \href{http://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR.
|
||||
#' This function uses \href{https://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR.
|
||||
#'
|
||||
#' @return
|
||||
#'
|
||||
|
||||
@@ -192,8 +192,8 @@
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#'
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' watchlist <- list(train = dtrain, eval = dtest)
|
||||
#'
|
||||
#' ## A simple xgb.train example:
|
||||
|
||||
1831
R-package/configure
vendored
1831
R-package/configure
vendored
File diff suppressed because it is too large
Load Diff
@@ -2,10 +2,25 @@
|
||||
|
||||
AC_PREREQ(2.69)
|
||||
|
||||
AC_INIT([xgboost],[1.7.1],[],[xgboost],[])
|
||||
AC_INIT([xgboost],[1.7.6],[],[xgboost],[])
|
||||
|
||||
# Use this line to set CC variable to a C compiler
|
||||
AC_PROG_CC
|
||||
: ${R_HOME=`R RHOME`}
|
||||
if test -z "${R_HOME}"; then
|
||||
echo "could not determine R_HOME"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CXX17=`"${R_HOME}/bin/R" CMD config CXX17`
|
||||
CXX17STD=`"${R_HOME}/bin/R" CMD config CXX17STD`
|
||||
CXX="${CXX17} ${CXX17STD}"
|
||||
CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS`
|
||||
|
||||
CC=`"${R_HOME}/bin/R" CMD config CC`
|
||||
CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS`
|
||||
CPPFLAGS=`"${R_HOME}/bin/R" CMD config CPPFLAGS`
|
||||
|
||||
LDFLAGS=`"${R_HOME}/bin/R" CMD config LDFLAGS`
|
||||
AC_LANG(C++)
|
||||
|
||||
### Check whether backtrace() is part of libc or the external lib libexecinfo
|
||||
AC_MSG_CHECKING([Backtrace lib])
|
||||
@@ -40,7 +55,7 @@ then
|
||||
ac_pkg_openmp=no
|
||||
AC_MSG_CHECKING([whether OpenMP will work in a package])
|
||||
AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <omp.h>]], [[ return (omp_get_max_threads() <= 1); ]])])
|
||||
${CC} -o conftest conftest.c ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes
|
||||
${CXX} -o conftest conftest.cpp ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes
|
||||
AC_MSG_RESULT([${ac_pkg_openmp}])
|
||||
if test "${ac_pkg_openmp}" = no; then
|
||||
OPENMP_CXXFLAGS=''
|
||||
|
||||
@@ -15,9 +15,11 @@ selected per iteration.}
|
||||
}
|
||||
\value{
|
||||
Results are stored in the \code{coefs} element of the closure.
|
||||
The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it.
|
||||
The \code{\link{xgb.gblinear.history}} convenience function provides an easy
|
||||
way to access it.
|
||||
With \code{xgb.train}, it is either a dense of a sparse matrix.
|
||||
While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices.
|
||||
While with \code{xgb.cv}, it is a list (an element per each fold) of such
|
||||
matrices.
|
||||
}
|
||||
\description{
|
||||
Callback closure for collecting the model coefficients history of a gblinear booster
|
||||
@@ -38,7 +40,7 @@ Callback function expects the following values to be set in its calling frame:
|
||||
# without considering the 2nd order interactions:
|
||||
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||
colnames(x)
|
||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
|
||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
||||
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||
@@ -63,14 +65,14 @@ matplot(xgb.gblinear.history(bst), type = 'l')
|
||||
|
||||
# For xgb.cv:
|
||||
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||
callbacks = list(cb.gblinear.history()))
|
||||
callbacks = list(cb.gblinear.history()))
|
||||
# coefficients in the CV fold #3
|
||||
matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||
|
||||
|
||||
#### Multiclass classification:
|
||||
#
|
||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1)
|
||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
|
||||
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||
# For the default linear updater 'shotgun' it sometimes is helpful
|
||||
|
||||
@@ -19,7 +19,7 @@ be directly used with an \code{xgb.DMatrix} object.
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
train <- agaricus.train
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||
|
||||
stopifnot(nrow(dtrain) == nrow(train$data))
|
||||
stopifnot(ncol(dtrain) == ncol(train$data))
|
||||
|
||||
@@ -26,7 +26,7 @@ Since row names are irrelevant, it is recommended to use \code{colnames} directl
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
train <- agaricus.train
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2)
|
||||
dimnames(dtrain)
|
||||
colnames(dtrain)
|
||||
colnames(dtrain) <- make.names(1:ncol(train$data))
|
||||
|
||||
@@ -34,7 +34,7 @@ The \code{name} field can be one of the following:
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
|
||||
labels <- getinfo(dtrain, 'label')
|
||||
setinfo(dtrain, 'label', 1-labels)
|
||||
|
||||
@@ -19,7 +19,7 @@ Currently it displays dimensions and presence of info-fields and colnames.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
|
||||
dtrain
|
||||
print(dtrain, verbose=TRUE)
|
||||
|
||||
@@ -33,7 +33,7 @@ The \code{name} field can be one of the following:
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
|
||||
labels <- getinfo(dtrain, 'label')
|
||||
setinfo(dtrain, 'label', 1-labels)
|
||||
|
||||
@@ -28,7 +28,7 @@ original xgb.DMatrix object
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
|
||||
dsub <- slice(dtrain, 1:42)
|
||||
labels1 <- getinfo(dsub, 'label')
|
||||
|
||||
@@ -38,7 +38,7 @@ Supported input file formats are either a LIBSVM text file or a binary file that
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||
|
||||
@@ -16,7 +16,7 @@ Save xgb.DMatrix object to binary file
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||
|
||||
@@ -59,8 +59,8 @@ a rule on certain features."
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
|
||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
nrounds = 4
|
||||
@@ -76,8 +76,12 @@ new.features.train <- xgb.create.features(model = bst, agaricus.train$data)
|
||||
new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
||||
|
||||
# learning with new features
|
||||
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
||||
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
||||
new.dtrain <- xgb.DMatrix(
|
||||
data = new.features.train, label = agaricus.train$label, nthread = 2
|
||||
)
|
||||
new.dtest <- xgb.DMatrix(
|
||||
data = new.features.test, label = agaricus.test$label, nthread = 2
|
||||
)
|
||||
watchlist <- list(train = new.dtrain)
|
||||
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||
|
||||
|
||||
@@ -158,9 +158,9 @@ Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
||||
max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||
max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||
print(cv)
|
||||
print(cv, verbose=TRUE)
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ The "Yes" branches are marked by the "< split_value" label.
|
||||
The branches that also used for missing values are marked as bold
|
||||
(as in "carrying extra capacity").
|
||||
|
||||
This function uses \href{http://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR.
|
||||
This function uses \href{https://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
|
||||
@@ -241,8 +241,8 @@ The following callbacks are automatically created when certain parameters are se
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
watchlist <- list(train = dtrain, eval = dtest)
|
||||
|
||||
## A simple xgb.train example:
|
||||
|
||||
@@ -3,7 +3,7 @@ PKGROOT=../../
|
||||
ENABLE_STD_THREAD=1
|
||||
# _*_ mode: Makefile; _*_
|
||||
|
||||
CXX_STD = CXX14
|
||||
CXX_STD = CXX17
|
||||
|
||||
XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\
|
||||
-DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\
|
||||
@@ -23,7 +23,6 @@ PKG_LIBS = @OPENMP_CXXFLAGS@ @OPENMP_LIB@ @ENDIAN_FLAG@ @BACKTRACE_LIB@ -pthread
|
||||
OBJECTS= \
|
||||
./xgboost_R.o \
|
||||
./xgboost_custom.o \
|
||||
./xgboost_assert.o \
|
||||
./init.o \
|
||||
$(PKGROOT)/src/metric/metric.o \
|
||||
$(PKGROOT)/src/metric/elementwise_metric.o \
|
||||
|
||||
@@ -3,7 +3,7 @@ PKGROOT=../../
|
||||
ENABLE_STD_THREAD=0
|
||||
# _*_ mode: Makefile; _*_
|
||||
|
||||
CXX_STD = CXX14
|
||||
CXX_STD = CXX17
|
||||
|
||||
XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\
|
||||
-DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\
|
||||
@@ -23,7 +23,6 @@ PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) -DDMLC_CMAKE_LITTLE_ENDIAN=1 $(SHLIB_PTHRE
|
||||
OBJECTS= \
|
||||
./xgboost_R.o \
|
||||
./xgboost_custom.o \
|
||||
./xgboost_assert.o \
|
||||
./init.o \
|
||||
$(PKGROOT)/src/metric/metric.o \
|
||||
$(PKGROOT)/src/metric/elementwise_metric.o \
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
// Copyright (c) 2014 by Contributors
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <Rinternals.h>
|
||||
|
||||
// implements error handling
|
||||
void XGBoostAssert_R(int exp, const char *fmt, ...) {
|
||||
char buf[1024];
|
||||
if (exp == 0) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsprintf(buf, fmt, args);
|
||||
va_end(args);
|
||||
error("AssertError:%s\n", buf);
|
||||
}
|
||||
}
|
||||
void XGBoostCheck_R(int exp, const char *fmt, ...) {
|
||||
char buf[1024];
|
||||
if (exp == 0) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsprintf(buf, fmt, args);
|
||||
va_end(args);
|
||||
error("%s\n", buf);
|
||||
}
|
||||
}
|
||||
@@ -178,17 +178,10 @@ function(xgboost_set_cuda_flags target)
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>)
|
||||
endif (MSVC)
|
||||
|
||||
if (PLUGIN_RMM)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CUDA_STANDARD 17
|
||||
CUDA_STANDARD_REQUIRED ON
|
||||
CUDA_SEPARABLE_COMPILATION OFF)
|
||||
else ()
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CUDA_STANDARD 14
|
||||
CUDA_STANDARD_REQUIRED ON
|
||||
CUDA_SEPARABLE_COMPILATION OFF)
|
||||
endif (PLUGIN_RMM)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CUDA_STANDARD 17
|
||||
CUDA_STANDARD_REQUIRED ON
|
||||
CUDA_SEPARABLE_COMPILATION OFF)
|
||||
endfunction(xgboost_set_cuda_flags)
|
||||
|
||||
macro(xgboost_link_nccl target)
|
||||
@@ -205,17 +198,10 @@ endmacro(xgboost_link_nccl)
|
||||
|
||||
# compile options
|
||||
macro(xgboost_target_properties target)
|
||||
if (PLUGIN_RMM)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CXX_STANDARD 17
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
else ()
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CXX_STANDARD 14
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
endif (PLUGIN_RMM)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CXX_STANDARD 17
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if (HIDE_CXX_SYMBOLS)
|
||||
#-- Hide all C++ symbols
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(xgboost-c-examples)
|
||||
|
||||
add_subdirectory(basic)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(external-memory-demo LANGUAGES C VERSION 0.0.1)
|
||||
|
||||
find_package(xgboost REQUIRED)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(inference-demo LANGUAGES C VERSION 0.0.1)
|
||||
find_package(xgboost REQUIRED)
|
||||
|
||||
|
||||
Submodule dmlc-core updated: dfd9365264...81db539486
@@ -8,5 +8,5 @@ As a result it's changing quite often and we don't maintain its stability. Alon
|
||||
plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some
|
||||
existing c++ headers for gaining more access to the internal of XGBoost.
|
||||
|
||||
* `C++ interface documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/files.html>`_
|
||||
* `C++ interface documentation (latest master branch) <./dev/files.html>`_
|
||||
* `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_
|
||||
|
||||
@@ -10,7 +10,7 @@ simply look at function comments in ``include/xgboost/c_api.h``. The reference i
|
||||
to sphinx with the help of breathe, which doesn't contain links to examples but might be
|
||||
easier to read. For the original doxygen pages please visit:
|
||||
|
||||
* `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_
|
||||
* `C API documentation (latest master branch) <./dev/c__api_8h.html>`_
|
||||
* `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
|
||||
|
||||
***************
|
||||
|
||||
203
doc/conf.py
203
doc/conf.py
@@ -11,54 +11,107 @@
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
from subprocess import call
|
||||
from sh.contrib import git
|
||||
import urllib.request
|
||||
from urllib.error import HTTPError
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import urllib.request
|
||||
import warnings
|
||||
from urllib.error import HTTPError
|
||||
|
||||
git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None)
|
||||
from sh.contrib import git
|
||||
|
||||
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
|
||||
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
|
||||
TMP_DIR = os.path.join(CURR_PATH, "tmp")
|
||||
DOX_DIR = "doxygen"
|
||||
|
||||
|
||||
def run_doxygen():
|
||||
"""Run the doxygen make command in the designated folder."""
|
||||
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
|
||||
if os.path.exists(TMP_DIR):
|
||||
print(f"Delete directory {TMP_DIR}")
|
||||
shutil.rmtree(TMP_DIR)
|
||||
else:
|
||||
print(f"Create directory {TMP_DIR}")
|
||||
os.mkdir(TMP_DIR)
|
||||
try:
|
||||
os.chdir(PROJECT_ROOT)
|
||||
if not os.path.exists(DOX_DIR):
|
||||
os.mkdir(DOX_DIR)
|
||||
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
|
||||
print(
|
||||
"Build doxygen at {}".format(
|
||||
os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen")
|
||||
)
|
||||
)
|
||||
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
|
||||
subprocess.check_call(["ninja", "doc_doxygen"])
|
||||
|
||||
src = os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen", "html")
|
||||
dest = os.path.join(TMP_DIR, "dev")
|
||||
print(f"Copy directory {src} -> {dest}")
|
||||
shutil.copytree(src, dest)
|
||||
except OSError as e:
|
||||
sys.stderr.write("doxygen execution failed: %s" % e)
|
||||
finally:
|
||||
os.chdir(curdir)
|
||||
|
||||
|
||||
def is_readthedocs_build():
|
||||
if os.environ.get("READTHEDOCS", None) == "True":
|
||||
return True
|
||||
warnings.warn(
|
||||
"Skipping Doxygen build... You won't have documentation for C/C++ functions. "
|
||||
"Set environment variable READTHEDOCS=True if you want to build Doxygen. "
|
||||
"(If you do opt in, make sure to install Doxygen, Graphviz, CMake, and C++ compiler "
|
||||
"on your system.)"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
if is_readthedocs_build():
|
||||
run_doxygen()
|
||||
|
||||
|
||||
git_branch = os.getenv("SPHINX_GIT_BRANCH", default=None)
|
||||
if not git_branch:
|
||||
# If SPHINX_GIT_BRANCH environment variable is not given, run git
|
||||
# to determine branch name
|
||||
git_branch = [
|
||||
re.sub(r'origin/', '', x.lstrip(' ')) for x in str(
|
||||
git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n')
|
||||
re.sub(r"origin/", "", x.lstrip(" "))
|
||||
for x in str(git.branch("-r", "--contains", "HEAD")).rstrip("\n").split("\n")
|
||||
]
|
||||
git_branch = [x for x in git_branch if 'HEAD' not in x]
|
||||
git_branch = [x for x in git_branch if "HEAD" not in x]
|
||||
else:
|
||||
git_branch = [git_branch]
|
||||
print('git_branch = {}'.format(git_branch[0]))
|
||||
print("git_branch = {}".format(git_branch[0]))
|
||||
|
||||
try:
|
||||
filename, _ = urllib.request.urlretrieve(
|
||||
'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(
|
||||
git_branch[0]))
|
||||
call(
|
||||
'if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'
|
||||
.format(filename),
|
||||
shell=True)
|
||||
f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{git_branch[0]}.tar.bz2"
|
||||
)
|
||||
if not os.path.exists(TMP_DIR):
|
||||
print(f"Create directory {TMP_DIR}")
|
||||
os.mkdir(TMP_DIR)
|
||||
jvm_doc_dir = os.path.join(TMP_DIR, "jvm")
|
||||
if os.path.exists(jvm_doc_dir):
|
||||
print(f"Delete directory {jvm_doc_dir}")
|
||||
shutil.rmtree(jvm_doc_dir)
|
||||
print(f"Create directory {jvm_doc_dir}")
|
||||
os.mkdir(jvm_doc_dir)
|
||||
|
||||
with tarfile.open(filename, "r:bz2") as t:
|
||||
t.extractall(jvm_doc_dir)
|
||||
except HTTPError:
|
||||
print('JVM doc not found. Skipping...')
|
||||
try:
|
||||
filename, _ = urllib.request.urlretrieve(
|
||||
'https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'.
|
||||
format(git_branch[0]))
|
||||
call(
|
||||
'mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen'
|
||||
.format(filename),
|
||||
shell=True)
|
||||
except HTTPError:
|
||||
print('C API doc not found. Skipping...')
|
||||
print("JVM doc not found. Skipping...")
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
|
||||
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
|
||||
libpath = os.path.join(PROJECT_ROOT, "python-package/")
|
||||
sys.path.insert(0, libpath)
|
||||
sys.path.insert(0, CURR_PATH)
|
||||
@@ -81,50 +134,56 @@ release = xgboost.__version__
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
|
||||
extensions = [
|
||||
'matplotlib.sphinxext.plot_directive',
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.napoleon',
|
||||
'sphinx.ext.mathjax',
|
||||
'sphinx.ext.intersphinx',
|
||||
"matplotlib.sphinxext.plot_directive",
|
||||
"sphinxcontrib.jquery",
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.mathjax",
|
||||
"sphinx.ext.intersphinx",
|
||||
"sphinx_gallery.gen_gallery",
|
||||
'breathe',
|
||||
'recommonmark'
|
||||
"breathe",
|
||||
"recommonmark",
|
||||
]
|
||||
|
||||
sphinx_gallery_conf = {
|
||||
# path to your example scripts
|
||||
"examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"],
|
||||
# path to where to save gallery generated output
|
||||
"gallery_dirs": ["python/examples", "python/dask-examples", "python/survival-examples"],
|
||||
"gallery_dirs": [
|
||||
"python/examples",
|
||||
"python/dask-examples",
|
||||
"python/survival-examples",
|
||||
],
|
||||
"matplotlib_animations": True,
|
||||
}
|
||||
|
||||
autodoc_typehints = "description"
|
||||
|
||||
graphviz_output_format = 'png'
|
||||
plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)]
|
||||
graphviz_output_format = "png"
|
||||
plot_formats = [("svg", 300), ("png", 100), ("hires.png", 300)]
|
||||
plot_html_show_source_link = False
|
||||
plot_html_show_formats = False
|
||||
|
||||
# Breathe extension variables
|
||||
DOX_DIR = "doxygen"
|
||||
breathe_projects = {
|
||||
"xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
|
||||
}
|
||||
breathe_projects = {}
|
||||
if is_readthedocs_build():
|
||||
breathe_projects = {
|
||||
"xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
|
||||
}
|
||||
breathe_default_project = "xgboost"
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
templates_path = ["_templates"]
|
||||
|
||||
# The suffix(es) of source filenames.
|
||||
# You can specify multiple suffix as a list of string:
|
||||
source_suffix = ['.rst', '.md']
|
||||
source_suffix = [".rst", ".md"]
|
||||
|
||||
# The encoding of source files.
|
||||
# source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
master_doc = "index"
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
@@ -133,7 +192,7 @@ master_doc = 'index'
|
||||
# Usually you set "language" from the command line for these cases.
|
||||
language = "en"
|
||||
|
||||
autoclass_content = 'both'
|
||||
autoclass_content = "both"
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
@@ -143,8 +202,10 @@ autoclass_content = 'both'
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = ['_build']
|
||||
html_extra_path = ['./tmp']
|
||||
exclude_patterns = ["_build"]
|
||||
html_extra_path = []
|
||||
if is_readthedocs_build():
|
||||
html_extra_path = [TMP_DIR]
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all
|
||||
# documents.
|
||||
@@ -162,7 +223,7 @@ html_extra_path = ['./tmp']
|
||||
# show_authors = False
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
pygments_style = "sphinx"
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
# modindex_common_prefix = []
|
||||
@@ -185,27 +246,24 @@ html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/lo
|
||||
|
||||
html_css_files = ["css/custom.css"]
|
||||
|
||||
html_sidebars = {
|
||||
'**': ['logo-text.html', 'globaltoc.html', 'searchbox.html']
|
||||
}
|
||||
html_sidebars = {"**": ["logo-text.html", "globaltoc.html", "searchbox.html"]}
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
html_static_path = ["_static"]
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = project + 'doc'
|
||||
htmlhelp_basename = project + "doc"
|
||||
|
||||
# -- Options for LaTeX output ---------------------------------------------
|
||||
latex_elements = {
|
||||
}
|
||||
latex_elements = {}
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
latex_documents = [
|
||||
(master_doc, '%s.tex' % project, project, author, 'manual'),
|
||||
(master_doc, "%s.tex" % project, project, author, "manual"),
|
||||
]
|
||||
|
||||
intersphinx_mapping = {
|
||||
@@ -220,30 +278,5 @@ intersphinx_mapping = {
|
||||
}
|
||||
|
||||
|
||||
# hook for doxygen
|
||||
def run_doxygen():
|
||||
"""Run the doxygen make command in the designated folder."""
|
||||
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
|
||||
try:
|
||||
os.chdir(PROJECT_ROOT)
|
||||
if not os.path.exists(DOX_DIR):
|
||||
os.mkdir(DOX_DIR)
|
||||
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
|
||||
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
|
||||
subprocess.check_call(["ninja", "doc_doxygen"])
|
||||
except OSError as e:
|
||||
sys.stderr.write("doxygen execution failed: %s" % e)
|
||||
finally:
|
||||
os.chdir(curdir)
|
||||
|
||||
|
||||
def generate_doxygen_xml(app):
|
||||
"""Run the doxygen make commands if we're on the ReadTheDocs server"""
|
||||
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
|
||||
if read_the_docs_build:
|
||||
run_doxygen()
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.add_css_file('custom.css')
|
||||
app.connect("builder-inited", generate_doxygen_xml)
|
||||
app.add_css_file("custom.css")
|
||||
|
||||
@@ -4,7 +4,7 @@ XGBoost Release Policy
|
||||
=======================
|
||||
|
||||
Versioning Policy
|
||||
---------------------------
|
||||
-----------------
|
||||
|
||||
Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE]
|
||||
|
||||
@@ -34,6 +34,20 @@ Making a Release
|
||||
|
||||
+ The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__.
|
||||
|
||||
Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
|
||||
|
||||
+ The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.
|
||||
|
||||
|
||||
R CRAN Package
|
||||
--------------
|
||||
Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
|
||||
|
||||
According to the `CRAN policy <https://cran.r-project.org/web/packages/policies.html>`__:
|
||||
|
||||
If running a package uses multiple threads/cores it must never use more than two simultaneously: the check farm is a shared resource and will typically be running many checks simultaneously.
|
||||
|
||||
We need to check the number of CPUs used in examples. Export ``_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_=2.5`` before running ``R CMD check --as-cran`` `[1] <#references>`__ and make sure the machine you are using has enough CPU cores to reveal any potential policy violation.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
[1] https://stat.ethz.ch/pipermail/r-package-devel/2022q4/008610.html
|
||||
|
||||
@@ -44,8 +44,7 @@ General Parameters
|
||||
* ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface]
|
||||
|
||||
- When set to True, XGBoost will perform validation of input parameters to check whether
|
||||
a parameter is used or not. The feature is still experimental. It's expected to have
|
||||
some false positives.
|
||||
a parameter is used or not.
|
||||
|
||||
* ``nthread`` [default to maximum number of threads available if not set]
|
||||
|
||||
@@ -233,24 +232,21 @@ Parameters for Categorical Feature
|
||||
These parameters are only used for training with categorical data. See
|
||||
:doc:`/tutorials/categorical` for more information.
|
||||
|
||||
.. note:: These parameters are experimental. ``exact`` tree method is not yet supported.
|
||||
|
||||
|
||||
* ``max_cat_to_onehot``
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
|
||||
.. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
|
||||
|
||||
- A threshold for deciding whether XGBoost should use one-hot encoding based split for
|
||||
categorical data. When number of categories is lesser than the threshold then one-hot
|
||||
encoding is chosen, otherwise the categories will be partitioned into children nodes.
|
||||
Only relevant for regression and binary classification. Also, ``exact`` tree method is
|
||||
not supported
|
||||
|
||||
* ``max_cat_threshold``
|
||||
|
||||
.. versionadded:: 1.7.0
|
||||
|
||||
.. note:: This parameter is experimental. ``exact`` tree method is not yet supported.
|
||||
|
||||
- Maximum number of categories considered for each split. Used only by partition-based
|
||||
splits for preventing over-fitting.
|
||||
|
||||
|
||||
@@ -25,9 +25,6 @@ Core Data Structure
|
||||
.. autoclass:: xgboost.QuantileDMatrix
|
||||
:show-inheritance:
|
||||
|
||||
.. autoclass:: xgboost.DeviceQuantileDMatrix
|
||||
:show-inheritance:
|
||||
|
||||
.. autoclass:: xgboost.Booster
|
||||
:members:
|
||||
:show-inheritance:
|
||||
@@ -115,7 +112,7 @@ Dask API
|
||||
:inherited-members:
|
||||
:show-inheritance:
|
||||
|
||||
.. autoclass:: xgboost.dask.DaskDeviceQuantileDMatrix
|
||||
.. autoclass:: xgboost.dask.DaskQuantileDMatrix
|
||||
:members:
|
||||
:inherited-members:
|
||||
:show-inheritance:
|
||||
@@ -176,3 +173,13 @@ PySpark API
|
||||
:members:
|
||||
:inherited-members:
|
||||
:show-inheritance:
|
||||
|
||||
.. autoclass:: xgboost.spark.SparkXGBRanker
|
||||
:members:
|
||||
:inherited-members:
|
||||
:show-inheritance:
|
||||
|
||||
.. autoclass:: xgboost.spark.SparkXGBRankerModel
|
||||
:members:
|
||||
:inherited-members:
|
||||
:show-inheritance:
|
||||
|
||||
@@ -45,7 +45,7 @@ Use ``find_package()`` and ``target_link_libraries()`` in your application's CMa
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(your_project_name LANGUAGES C CXX VERSION your_project_version)
|
||||
find_package(xgboost REQUIRED)
|
||||
add_executable(your_project_name /path/to/project_file.c)
|
||||
|
||||
@@ -138,11 +138,11 @@ Miscellaneous
|
||||
|
||||
By default, XGBoost assumes input categories are integers starting from 0 till the number
|
||||
of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
|
||||
values due to mistakes or missing values. It can be negative value, integer values that
|
||||
can not be accurately represented by 32-bit floating point, or values that are larger than
|
||||
actual number of unique categories. During training this is validated but for prediction
|
||||
it's treated as the same as missing value for performance reasons. Lastly, missing values
|
||||
are treated as the same as numerical features (using the learned split direction).
|
||||
values due to mistakes or missing values in training dataset. It can be negative value,
|
||||
integer values that can not be accurately represented by 32-bit floating point, or values
|
||||
that are larger than actual number of unique categories. During training this is
|
||||
validated but for prediction it's treated as the same as not-chosen category for
|
||||
performance reasons.
|
||||
|
||||
|
||||
**********
|
||||
|
||||
@@ -564,7 +564,7 @@ Here are some pratices on reducing memory usage with dask and xgboost.
|
||||
nice summary.
|
||||
|
||||
- When using GPU input, like dataframe loaded by ``dask_cudf``, you can try
|
||||
:py:class:`xgboost.dask.DaskDeviceQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``
|
||||
:py:class:`xgboost.dask.DaskQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``
|
||||
to reduce overall memory usage. See
|
||||
:ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example.
|
||||
|
||||
|
||||
@@ -43,10 +43,10 @@ in spark estimator, and some parameters are replaced with pyspark specific param
|
||||
such as `weight_col`, `validation_indicator_col`, `use_gpu`, for details please see
|
||||
`SparkXGBRegressor` doc.
|
||||
|
||||
The following code snippet shows how to train a spark xgboost regressor model,
|
||||
first we need to prepare a training dataset as a spark dataframe contains
|
||||
"label" column and "features" column(s), the "features" column(s) must be `pyspark.ml.linalg.Vector`
|
||||
type or spark array type or a list of feature column names.
|
||||
The following code snippet shows how to train a spark xgboost regressor model, first we
|
||||
need to prepare a training dataset as a spark dataframe contains "label" column and
|
||||
"features" column(s), the "features" column(s) must be ``pyspark.ml.linalg.Vector`` type
|
||||
or spark array type or a list of feature column names.
|
||||
|
||||
|
||||
.. code-block:: python
|
||||
@@ -54,10 +54,10 @@ type or spark array type or a list of feature column names.
|
||||
xgb_regressor_model = xgb_regressor.fit(train_spark_dataframe)
|
||||
|
||||
|
||||
The following code snippet shows how to predict test data using a spark xgboost regressor model,
|
||||
first we need to prepare a test dataset as a spark dataframe contains
|
||||
"features" and "label" column, the "features" column must be `pyspark.ml.linalg.Vector`
|
||||
type or spark array type.
|
||||
The following code snippet shows how to predict test data using a spark xgboost regressor
|
||||
model, first we need to prepare a test dataset as a spark dataframe contains "features"
|
||||
and "label" column, the "features" column must be ``pyspark.ml.linalg.Vector`` type or
|
||||
spark array type.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -107,8 +107,8 @@ virtualenv and pip:
|
||||
python -m venv xgboost_env
|
||||
source xgboost_env/bin/activate
|
||||
pip install pyarrow pandas venv-pack xgboost
|
||||
# https://rapids.ai/pip.html#install
|
||||
pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
|
||||
# https://docs.rapids.ai/install#pip-install
|
||||
pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com
|
||||
venv-pack -o xgboost_env.tar.gz
|
||||
|
||||
With Conda:
|
||||
@@ -240,7 +240,7 @@ additional spark configurations and dependencies:
|
||||
--master spark://<master-ip>:7077 \
|
||||
--conf spark.executor.resource.gpu.amount=1 \
|
||||
--conf spark.task.resource.gpu.amount=1 \
|
||||
--packages com.nvidia:rapids-4-spark_2.12:22.08.0 \
|
||||
--packages com.nvidia:rapids-4-spark_2.12:23.04.0 \
|
||||
--conf spark.plugins=com.nvidia.spark.SQLPlugin \
|
||||
--conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \
|
||||
--archives xgboost_env.tar.gz#environment \
|
||||
|
||||
Submodule gputreeshap updated: acb5be3c17...787259b412
@@ -287,11 +287,22 @@ class TCPSocket {
|
||||
#elif defined(__APPLE__)
|
||||
return domain_;
|
||||
#elif defined(__unix__)
|
||||
#ifndef __PASE__
|
||||
std::int32_t domain;
|
||||
socklen_t len = sizeof(domain);
|
||||
xgboost_CHECK_SYS_CALL(
|
||||
getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast<char *>(&domain), &len), 0);
|
||||
return ret_iafamily(domain);
|
||||
#else
|
||||
struct sockaddr sa;
|
||||
socklen_t sizeofsa = sizeof(sa);
|
||||
xgboost_CHECK_SYS_CALL(
|
||||
getsockname(handle_, &sa, &sizeofsa), 0);
|
||||
if (sizeofsa < sizeof(uchar_t)*2) {
|
||||
return ret_iafamily(AF_INET);
|
||||
}
|
||||
return ret_iafamily(sa.sa_family);
|
||||
#endif // __PASE__
|
||||
#else
|
||||
LOG(FATAL) << "Unknown platform.";
|
||||
return ret_iafamily(AF_INET);
|
||||
|
||||
@@ -508,7 +508,7 @@ class RegTree : public Model {
|
||||
* \brief drop the trace after fill, must be called after fill.
|
||||
* \param inst The sparse instance to drop.
|
||||
*/
|
||||
void Drop(const SparsePage::Inst& inst);
|
||||
void Drop();
|
||||
/*!
|
||||
* \brief returns the size of the feature vector
|
||||
* \return the size of the feature vector
|
||||
@@ -709,13 +709,10 @@ inline void RegTree::FVec::Fill(const SparsePage::Inst& inst) {
|
||||
has_missing_ = data_.size() != feature_count;
|
||||
}
|
||||
|
||||
inline void RegTree::FVec::Drop(const SparsePage::Inst& inst) {
|
||||
for (auto const& entry : inst) {
|
||||
if (entry.index >= data_.size()) {
|
||||
continue;
|
||||
}
|
||||
data_[entry.index].flag = -1;
|
||||
}
|
||||
inline void RegTree::FVec::Drop() {
|
||||
Entry e{};
|
||||
e.flag = -1;
|
||||
std::fill_n(data_.data(), data_.size(), e);
|
||||
has_missing_ = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,6 @@
|
||||
|
||||
#define XGBOOST_VER_MAJOR 1
|
||||
#define XGBOOST_VER_MINOR 7
|
||||
#define XGBOOST_VER_PATCH 0
|
||||
#define XGBOOST_VER_PATCH 6
|
||||
|
||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>XGBoost JVM Package</name>
|
||||
<description>JVM Package for XGBoost</description>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
<packaging>jar</packaging>
|
||||
<build>
|
||||
<plugins>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
@@ -37,7 +37,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include <jni.h>
|
||||
#include <thrust/system/cuda/experimental/pinned_allocator.h>
|
||||
|
||||
#include "../../../../src/common/device_helpers.cuh"
|
||||
#include "../../../../src/common/cuda_pinned_allocator.h"
|
||||
#include "../../../../src/data/array_interface.h"
|
||||
#include "jvm_utils.h"
|
||||
#include <xgboost/c_api.h>
|
||||
@@ -131,7 +131,7 @@ class DataIteratorProxy {
|
||||
bool cache_on_host_{true}; // TODO(Bobby): Make this optional.
|
||||
|
||||
template <typename T>
|
||||
using Alloc = thrust::system::cuda::experimental::pinned_allocator<T>;
|
||||
using Alloc = xgboost::common::cuda::pinned_allocator<T>;
|
||||
template <typename U>
|
||||
using HostVector = std::vector<U, Alloc<U>>;
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||
<build>
|
||||
@@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||
<build>
|
||||
@@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from sklearn.datasets import load_iris
|
||||
import numpy as np
|
||||
import pandas
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
X, y = load_iris(return_X_y=True)
|
||||
y = y.astype(np.int)
|
||||
y = y.astype(np.int32)
|
||||
df = pandas.DataFrame(data=X, columns=['sepal length', 'sepal width', 'petal length', 'petal width'])
|
||||
class_id_to_name = {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'}
|
||||
df['class'] = np.vectorize(class_id_to_name.get)(y)
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j_2.12</artifactId>
|
||||
<version>1.7.1</version>
|
||||
<version>1.7.6</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@@ -15,7 +15,7 @@ if (PLUGIN_UPDATER_ONEAPI)
|
||||
target_link_libraries(oneapi_plugin PUBLIC -fsycl)
|
||||
set_target_properties(oneapi_plugin PROPERTIES
|
||||
COMPILE_FLAGS -fsycl
|
||||
CXX_STANDARD 14
|
||||
CXX_STANDARD 17
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
if (USE_OPENMP)
|
||||
|
||||
@@ -1 +1 @@
|
||||
1.7.1
|
||||
1.7.6
|
||||
|
||||
@@ -36,13 +36,13 @@ try:
|
||||
|
||||
PANDAS_INSTALLED = True
|
||||
except ImportError:
|
||||
|
||||
MultiIndex = object
|
||||
DataFrame = object
|
||||
Series = object
|
||||
pandas_concat = None
|
||||
PANDAS_INSTALLED = False
|
||||
|
||||
|
||||
# sklearn
|
||||
try:
|
||||
from sklearn.base import BaseEstimator as XGBModelBase
|
||||
@@ -72,6 +72,22 @@ except ImportError:
|
||||
XGBStratifiedKFold = None
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_cudf_available() -> bool:
|
||||
"""Check cuDF package available or not"""
|
||||
if importlib.util.find_spec("cudf") is None:
|
||||
return False
|
||||
try:
|
||||
import cudf
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
_logger.exception("Importing cuDF failed, use DMatrix instead of QDM")
|
||||
return False
|
||||
|
||||
|
||||
class XGBoostLabelEncoder(LabelEncoder):
|
||||
"""Label encoder with JSON serialization methods."""
|
||||
|
||||
@@ -144,6 +160,7 @@ def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statem
|
||||
# `importlib.utils`, except it's unclear from its document on how to use it. This one
|
||||
# seems to be easy to understand and works out of box.
|
||||
|
||||
|
||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
|
||||
|
||||
@@ -2172,6 +2172,7 @@ class Booster:
|
||||
)
|
||||
return _prediction_output(shape, dims, preds, False)
|
||||
|
||||
# pylint: disable=too-many-statements
|
||||
def inplace_predict(
|
||||
self,
|
||||
data: DataType,
|
||||
@@ -2192,10 +2193,10 @@ class Booster:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
booster.set_param({'predictor': 'gpu_predictor'})
|
||||
booster.set_param({"predictor": "gpu_predictor"})
|
||||
booster.inplace_predict(cupy_array)
|
||||
|
||||
booster.set_param({'predictor': 'cpu_predictor})
|
||||
booster.set_param({"predictor": "cpu_predictor"})
|
||||
booster.inplace_predict(numpy_array)
|
||||
|
||||
.. versionadded:: 1.1.0
|
||||
@@ -2301,14 +2302,16 @@ class Booster:
|
||||
)
|
||||
return _prediction_output(shape, dims, preds, False)
|
||||
if isinstance(data, scipy.sparse.csr_matrix):
|
||||
csr = data
|
||||
from .data import _transform_scipy_csr
|
||||
|
||||
data = _transform_scipy_csr(data)
|
||||
_check_call(
|
||||
_LIB.XGBoosterPredictFromCSR(
|
||||
self.handle,
|
||||
_array_interface(csr.indptr),
|
||||
_array_interface(csr.indices),
|
||||
_array_interface(csr.data),
|
||||
c_bst_ulong(csr.shape[1]),
|
||||
_array_interface(data.indptr),
|
||||
_array_interface(data.indices),
|
||||
_array_interface(data.data),
|
||||
c_bst_ulong(data.shape[1]),
|
||||
from_pystr_to_cstr(json.dumps(args)),
|
||||
p_handle,
|
||||
ctypes.byref(shape),
|
||||
|
||||
@@ -853,7 +853,7 @@ async def _get_rabit_args(
|
||||
sched_addr = None
|
||||
|
||||
# make sure all workers are online so that we can obtain reliable scheduler_info
|
||||
client.wait_for_workers(n_workers)
|
||||
await client.wait_for_workers(n_workers) # type: ignore
|
||||
env = await client.run_on_scheduler(
|
||||
_start_tracker, n_workers, sched_addr, user_addr
|
||||
)
|
||||
|
||||
@@ -30,6 +30,7 @@ from .core import (
|
||||
c_array,
|
||||
c_str,
|
||||
from_pystr_to_cstr,
|
||||
make_jcargs,
|
||||
)
|
||||
|
||||
DispatchedDataBackendReturnType = Tuple[
|
||||
@@ -80,6 +81,21 @@ def _array_interface(data: np.ndarray) -> bytes:
|
||||
return interface_str
|
||||
|
||||
|
||||
def _transform_scipy_csr(data: DataType) -> DataType:
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
indptr, _ = _ensure_np_dtype(data.indptr, data.indptr.dtype)
|
||||
indices, _ = _ensure_np_dtype(data.indices, data.indices.dtype)
|
||||
values, _ = _ensure_np_dtype(data.data, data.data.dtype)
|
||||
if (
|
||||
indptr is not data.indptr
|
||||
or indices is not data.indices
|
||||
or values is not data.data
|
||||
):
|
||||
data = csr_matrix((values, indices, indptr), shape=data.shape)
|
||||
return data
|
||||
|
||||
|
||||
def _from_scipy_csr(
|
||||
data: DataType,
|
||||
missing: FloatCompatible,
|
||||
@@ -93,18 +109,14 @@ def _from_scipy_csr(
|
||||
f"length mismatch: {len(data.indices)} vs {len(data.data)}"
|
||||
)
|
||||
handle = ctypes.c_void_p()
|
||||
args = {
|
||||
"missing": float(missing),
|
||||
"nthread": int(nthread),
|
||||
}
|
||||
config = bytes(json.dumps(args), "utf-8")
|
||||
data = _transform_scipy_csr(data)
|
||||
_check_call(
|
||||
_LIB.XGDMatrixCreateFromCSR(
|
||||
_array_interface(data.indptr),
|
||||
_array_interface(data.indices),
|
||||
_array_interface(data.data),
|
||||
c_bst_ulong(data.shape[1]),
|
||||
config,
|
||||
make_jcargs(missing=float(missing), nthread=int(nthread)),
|
||||
ctypes.byref(handle),
|
||||
)
|
||||
)
|
||||
@@ -153,12 +165,13 @@ def _is_numpy_array(data: DataType) -> bool:
|
||||
|
||||
|
||||
def _ensure_np_dtype(
|
||||
data: DataType,
|
||||
dtype: Optional[NumpyDType]
|
||||
data: DataType, dtype: Optional[NumpyDType]
|
||||
) -> Tuple[np.ndarray, Optional[NumpyDType]]:
|
||||
if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]:
|
||||
data = data.astype(np.float32, copy=False)
|
||||
dtype = np.float32
|
||||
data = data.astype(dtype, copy=False)
|
||||
if not data.flags.aligned:
|
||||
data = np.require(data, requirements="A")
|
||||
return data, dtype
|
||||
|
||||
|
||||
@@ -1197,11 +1210,13 @@ def _proxy_transform(
|
||||
data, _ = _ensure_np_dtype(data, data.dtype)
|
||||
return data, None, feature_names, feature_types
|
||||
if _is_scipy_csr(data):
|
||||
data = _transform_scipy_csr(data)
|
||||
return data, None, feature_names, feature_types
|
||||
if _is_pandas_df(data):
|
||||
arr, feature_names, feature_types = _transform_pandas_df(
|
||||
data, enable_categorical, feature_names, feature_types
|
||||
)
|
||||
arr, _ = _ensure_np_dtype(arr, arr.dtype)
|
||||
return arr, None, feature_names, feature_types
|
||||
raise TypeError("Value type is not supported for data iterator:" + str(type(data)))
|
||||
|
||||
|
||||
@@ -674,7 +674,7 @@ class XGBModel(XGBModelBase):
|
||||
self.kwargs = {}
|
||||
self.kwargs[key] = value
|
||||
|
||||
if hasattr(self, "_Booster"):
|
||||
if self.__sklearn_is_fitted__():
|
||||
parameters = self.get_xgb_params()
|
||||
self.get_booster().set_param(parameters)
|
||||
|
||||
@@ -701,39 +701,12 @@ class XGBModel(XGBModelBase):
|
||||
np.iinfo(np.int32).max
|
||||
)
|
||||
|
||||
def parse_parameter(value: Any) -> Optional[Union[int, float, str]]:
|
||||
for t in (int, float, str):
|
||||
try:
|
||||
ret = t(value)
|
||||
return ret
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
# Get internal parameter values
|
||||
try:
|
||||
config = json.loads(self.get_booster().save_config())
|
||||
stack = [config]
|
||||
internal = {}
|
||||
while stack:
|
||||
obj = stack.pop()
|
||||
for k, v in obj.items():
|
||||
if k.endswith("_param"):
|
||||
for p_k, p_v in v.items():
|
||||
internal[p_k] = p_v
|
||||
elif isinstance(v, dict):
|
||||
stack.append(v)
|
||||
|
||||
for k, v in internal.items():
|
||||
if k in params and params[k] is None:
|
||||
params[k] = parse_parameter(v)
|
||||
except ValueError:
|
||||
pass
|
||||
return params
|
||||
|
||||
def get_xgb_params(self) -> Dict[str, Any]:
|
||||
"""Get xgboost specific parameters."""
|
||||
params = self.get_params()
|
||||
params: Dict[str, Any] = self.get_params()
|
||||
|
||||
# Parameters that should not go into native learner.
|
||||
wrapper_specific = {
|
||||
"importance_type",
|
||||
@@ -750,6 +723,7 @@ class XGBModel(XGBModelBase):
|
||||
for k, v in params.items():
|
||||
if k not in wrapper_specific and not callable(v):
|
||||
filtered[k] = v
|
||||
|
||||
return filtered
|
||||
|
||||
def get_num_boosting_rounds(self) -> int:
|
||||
@@ -1070,7 +1044,7 @@ class XGBModel(XGBModelBase):
|
||||
# error with incompatible data type.
|
||||
# Inplace predict doesn't handle as many data types as DMatrix, but it's
|
||||
# sufficient for dask interface where input is simpiler.
|
||||
predictor = self.get_params().get("predictor", None)
|
||||
predictor = self.get_xgb_params().get("predictor", None)
|
||||
if predictor in ("auto", None) and self.booster != "gblinear":
|
||||
return True
|
||||
return False
|
||||
@@ -1336,7 +1310,7 @@ class XGBModel(XGBModelBase):
|
||||
-------
|
||||
coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]``
|
||||
"""
|
||||
if self.get_params()["booster"] != "gblinear":
|
||||
if self.get_xgb_params()["booster"] != "gblinear":
|
||||
raise AttributeError(
|
||||
f"Coefficients are not defined for Booster type {self.booster}"
|
||||
)
|
||||
@@ -1366,7 +1340,7 @@ class XGBModel(XGBModelBase):
|
||||
-------
|
||||
intercept_ : array of shape ``(1,)`` or ``[n_classes]``
|
||||
"""
|
||||
if self.get_params()["booster"] != "gblinear":
|
||||
if self.get_xgb_params()["booster"] != "gblinear":
|
||||
raise AttributeError(
|
||||
f"Intercept (bias) is not defined for Booster type {self.booster}"
|
||||
)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
# type: ignore
|
||||
"""PySpark XGBoost integration interface
|
||||
"""
|
||||
"""PySpark XGBoost integration interface"""
|
||||
|
||||
try:
|
||||
import pyspark
|
||||
@@ -11,6 +10,7 @@ from .estimator import (
|
||||
SparkXGBClassifier,
|
||||
SparkXGBClassifierModel,
|
||||
SparkXGBRanker,
|
||||
SparkXGBRankerModel,
|
||||
SparkXGBRegressor,
|
||||
SparkXGBRegressorModel,
|
||||
)
|
||||
@@ -21,4 +21,5 @@ __all__ = [
|
||||
"SparkXGBRegressor",
|
||||
"SparkXGBRegressorModel",
|
||||
"SparkXGBRanker",
|
||||
"SparkXGBRankerModel",
|
||||
]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# type: ignore
|
||||
"""Xgboost pyspark integration submodule for core code."""
|
||||
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
|
||||
# pylint: disable=too-few-public-methods, too-many-lines
|
||||
# pylint: disable=too-few-public-methods, too-many-lines, too-many-branches
|
||||
import json
|
||||
from typing import Iterator, Optional, Tuple
|
||||
|
||||
@@ -32,6 +32,7 @@ from pyspark.sql.types import (
|
||||
ShortType,
|
||||
)
|
||||
from scipy.special import expit, softmax # pylint: disable=no-name-in-module
|
||||
from xgboost.compat import is_cudf_available
|
||||
from xgboost.core import Booster
|
||||
from xgboost.training import train as worker_train
|
||||
|
||||
@@ -139,6 +140,13 @@ _unsupported_predict_params = {
|
||||
}
|
||||
|
||||
|
||||
# TODO: supply hint message for all other unsupported params.
|
||||
_unsupported_params_hint_message = {
|
||||
"enable_categorical": "`xgboost.spark` estimators do not have 'enable_categorical' param, "
|
||||
"but you can set `feature_types` param and mark categorical features with 'c' string."
|
||||
}
|
||||
|
||||
|
||||
class _SparkXGBParams(
|
||||
HasFeaturesCol,
|
||||
HasLabelCol,
|
||||
@@ -522,7 +530,10 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
or k in _unsupported_predict_params
|
||||
or k in _unsupported_train_params
|
||||
):
|
||||
raise ValueError(f"Unsupported param '{k}'.")
|
||||
err_msg = _unsupported_params_hint_message.get(
|
||||
k, f"Unsupported param '{k}'."
|
||||
)
|
||||
raise ValueError(err_msg)
|
||||
_extra_params[k] = v
|
||||
_existing_extra_params = self.getOrDefault(self.arbitrary_params_dict)
|
||||
self._set(arbitrary_params_dict={**_existing_extra_params, **_extra_params})
|
||||
@@ -728,6 +739,10 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
else:
|
||||
dataset = dataset.repartition(num_workers)
|
||||
|
||||
if self.isDefined(self.qid_col) and self.getOrDefault(self.qid_col):
|
||||
# XGBoost requires qid to be sorted for each partition
|
||||
dataset = dataset.sortWithinPartitions(alias.qid, ascending=True)
|
||||
|
||||
train_params = self._get_distributed_train_params(dataset)
|
||||
booster_params, train_call_kwargs_params = self._get_xgb_train_call_args(
|
||||
train_params
|
||||
@@ -744,6 +759,8 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
"feature_weights": self.getOrDefault(self.feature_weights),
|
||||
"missing": float(self.getOrDefault(self.missing)),
|
||||
}
|
||||
if dmatrix_kwargs["feature_types"] is not None:
|
||||
dmatrix_kwargs["enable_categorical"] = True
|
||||
booster_params["nthread"] = cpu_per_task
|
||||
use_gpu = self.getOrDefault(self.use_gpu)
|
||||
|
||||
@@ -755,7 +772,8 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
k: v for k, v in train_call_kwargs_params.items() if v is not None
|
||||
}
|
||||
dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None}
|
||||
use_qdm = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
|
||||
|
||||
use_hist = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
|
||||
|
||||
def _train_booster(pandas_df_iter):
|
||||
"""Takes in an RDD partition and outputs a booster for that partition after
|
||||
@@ -769,6 +787,15 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
|
||||
gpu_id = None
|
||||
|
||||
# If cuDF is not installed, then using DMatrix instead of QDM,
|
||||
# because without cuDF, DMatrix performs better than QDM.
|
||||
# Note: Checking `is_cudf_available` in spark worker side because
|
||||
# spark worker might has different python environment with driver side.
|
||||
if use_gpu:
|
||||
use_qdm = use_hist and is_cudf_available()
|
||||
else:
|
||||
use_qdm = use_hist
|
||||
|
||||
if use_qdm and (booster_params.get("max_bin", None) is not None):
|
||||
dmatrix_kwargs["max_bin"] = booster_params["max_bin"]
|
||||
|
||||
@@ -839,7 +866,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
result_xgb_model = self._convert_to_sklearn_model(
|
||||
bytearray(booster, "utf-8"), config
|
||||
)
|
||||
return self._copyValues(self._create_pyspark_model(result_xgb_model))
|
||||
spark_model = self._create_pyspark_model(result_xgb_model)
|
||||
# According to pyspark ML convention, the model uid should be the same
|
||||
# with estimator uid.
|
||||
spark_model._resetUid(self.uid)
|
||||
return self._copyValues(spark_model)
|
||||
|
||||
def write(self):
|
||||
"""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.3)
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
|
||||
@@ -48,20 +48,21 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
|
||||
return cat < 0 || cat >= kMaxCat;
|
||||
}
|
||||
|
||||
/* \brief Whether should it traverse to left branch of a tree.
|
||||
/**
|
||||
* \brief Whether should it traverse to left branch of a tree.
|
||||
*
|
||||
* For one hot split, go to left if it's NOT the matching category.
|
||||
* Go to left if it's NOT the matching category, which matches one-hot encoding.
|
||||
*/
|
||||
template <bool validate = true>
|
||||
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat, bool dft_left) {
|
||||
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
|
||||
KCatBitField const s_cats(cats);
|
||||
// FIXME: Size() is not accurate since it represents the size of bit set instead of
|
||||
// actual number of categories.
|
||||
if (XGBOOST_EXPECT(validate && (InvalidCat(cat) || cat >= s_cats.Size()), false)) {
|
||||
return dft_left;
|
||||
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto pos = KCatBitField::ToBitPos(cat);
|
||||
// If the input category is larger than the size of the bit field, it implies that the
|
||||
// category is not chosen. Otherwise the bit field would have the category instead of
|
||||
// being smaller than the category value.
|
||||
if (pos.int_pos >= cats.size()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_thres
|
||||
feature_offsets_[fid] = accum_index;
|
||||
}
|
||||
|
||||
SetTypeSize(gmat.max_num_bins);
|
||||
SetTypeSize(gmat.MaxNumBinPerFeat());
|
||||
auto storage_size =
|
||||
feature_offsets_.back() * static_cast<std::underlying_type_t<BinTypeSize>>(bins_type_size_);
|
||||
index_.resize(storage_size, 0);
|
||||
|
||||
91
src/common/cuda_pinned_allocator.h
Normal file
91
src/common/cuda_pinned_allocator.h
Normal file
@@ -0,0 +1,91 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
* \file common.h
|
||||
* \brief cuda pinned allocator for usage with thrust containers
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace cuda {
|
||||
|
||||
// \p pinned_allocator is a CUDA-specific host memory allocator
|
||||
// that employs \c cudaMallocHost for allocation.
|
||||
//
|
||||
// This implementation is ported from the experimental/pinned_allocator
|
||||
// that Thrust used to provide.
|
||||
//
|
||||
// \see https://en.cppreference.com/w/cpp/memory/allocator
|
||||
template <typename T>
|
||||
class pinned_allocator;
|
||||
|
||||
template <>
|
||||
class pinned_allocator<void> {
|
||||
public:
|
||||
using value_type = void; // NOLINT: The type of the elements in the allocator
|
||||
using pointer = void*; // NOLINT: The type returned by address() / allocate()
|
||||
using const_pointer = const void*; // NOLINT: The type returned by address()
|
||||
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
|
||||
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers
|
||||
|
||||
template <typename U>
|
||||
struct rebind { // NOLINT
|
||||
using other = pinned_allocator<U>; // NOLINT: The rebound type
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
class pinned_allocator {
|
||||
public:
|
||||
using value_type = T; // NOLINT: The type of the elements in the allocator
|
||||
using pointer = T*; // NOLINT: The type returned by address() / allocate()
|
||||
using const_pointer = const T*; // NOLINT: The type returned by address()
|
||||
using reference = T&; // NOLINT: The parameter type for address()
|
||||
using const_reference = const T&; // NOLINT: The parameter type for address()
|
||||
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
|
||||
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers
|
||||
|
||||
template <typename U>
|
||||
struct rebind { // NOLINT
|
||||
using other = pinned_allocator<U>; // NOLINT: The rebound type
|
||||
};
|
||||
|
||||
XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions
|
||||
XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions
|
||||
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions
|
||||
|
||||
|
||||
template <typename U>
|
||||
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator<U> const&) {} // NOLINT
|
||||
|
||||
XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT
|
||||
XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT
|
||||
|
||||
inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT
|
||||
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
|
||||
|
||||
pointer result(nullptr);
|
||||
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
|
||||
|
||||
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT
|
||||
|
||||
XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; }
|
||||
|
||||
XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const {
|
||||
return !operator==(x);
|
||||
}
|
||||
};
|
||||
} // namespace cuda
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -62,7 +62,7 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto cbegin(TensorView<T, kDim> v) { // NOLINT
|
||||
auto cbegin(TensorView<T, kDim> const& v) { // NOLINT
|
||||
auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
|
||||
return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
|
||||
});
|
||||
@@ -70,19 +70,19 @@ auto cbegin(TensorView<T, kDim> v) { // NOLINT
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto cend(TensorView<T, kDim> v) { // NOLINT
|
||||
auto cend(TensorView<T, kDim> const& v) { // NOLINT
|
||||
return cbegin(v) + v.Size();
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto begin(TensorView<T, kDim> v) { // NOLINT
|
||||
auto begin(TensorView<T, kDim>& v) { // NOLINT
|
||||
auto it = common::MakeIndexTransformIter(
|
||||
[&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
|
||||
return it;
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto end(TensorView<T, kDim> v) { // NOLINT
|
||||
auto end(TensorView<T, kDim>& v) { // NOLINT
|
||||
return begin(v) + v.Size();
|
||||
}
|
||||
} // namespace linalg
|
||||
|
||||
@@ -144,7 +144,7 @@ class PartitionBuilder {
|
||||
auto gidx = gidx_calc(ridx);
|
||||
bool go_left = default_left;
|
||||
if (gidx > -1) {
|
||||
go_left = Decision(node_cats, cut_values[gidx], default_left);
|
||||
go_left = Decision(node_cats, cut_values[gidx]);
|
||||
}
|
||||
return go_left;
|
||||
} else {
|
||||
@@ -157,7 +157,7 @@ class PartitionBuilder {
|
||||
bool go_left = default_left;
|
||||
if (gidx > -1) {
|
||||
if (is_cat) {
|
||||
go_left = Decision(node_cats, cut_values[gidx], default_left);
|
||||
go_left = Decision(node_cats, cut_values[gidx]);
|
||||
} else {
|
||||
go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019-2021 by Contributors
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
* \file array_interface.h
|
||||
* \brief View of __array_interface__
|
||||
*/
|
||||
@@ -7,9 +7,11 @@
|
||||
#define XGBOOST_DATA_ARRAY_INTERFACE_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <type_traits> // std::alignment_of,std::remove_pointer_t
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -101,7 +103,7 @@ class ArrayInterfaceHandler {
|
||||
template <typename PtrType>
|
||||
static PtrType GetPtrFromArrayData(Object::Map const &obj) {
|
||||
auto data_it = obj.find("data");
|
||||
if (data_it == obj.cend()) {
|
||||
if (data_it == obj.cend() || IsA<Null>(data_it->second)) {
|
||||
LOG(FATAL) << "Empty data passed in.";
|
||||
}
|
||||
auto p_data = reinterpret_cast<PtrType>(
|
||||
@@ -111,7 +113,7 @@ class ArrayInterfaceHandler {
|
||||
|
||||
static void Validate(Object::Map const &array) {
|
||||
auto version_it = array.find("version");
|
||||
if (version_it == array.cend()) {
|
||||
if (version_it == array.cend() || IsA<Null>(version_it->second)) {
|
||||
LOG(FATAL) << "Missing `version' field for array interface";
|
||||
}
|
||||
if (get<Integer const>(version_it->second) > 3) {
|
||||
@@ -119,17 +121,19 @@ class ArrayInterfaceHandler {
|
||||
}
|
||||
|
||||
auto typestr_it = array.find("typestr");
|
||||
if (typestr_it == array.cend()) {
|
||||
if (typestr_it == array.cend() || IsA<Null>(typestr_it->second)) {
|
||||
LOG(FATAL) << "Missing `typestr' field for array interface";
|
||||
}
|
||||
|
||||
auto typestr = get<String const>(typestr_it->second);
|
||||
CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();
|
||||
|
||||
if (array.find("shape") == array.cend()) {
|
||||
auto shape_it = array.find("shape");
|
||||
if (shape_it == array.cend() || IsA<Null>(shape_it->second)) {
|
||||
LOG(FATAL) << "Missing `shape' field for array interface";
|
||||
}
|
||||
if (array.find("data") == array.cend()) {
|
||||
auto data_it = array.find("data");
|
||||
if (data_it == array.cend() || IsA<Null>(data_it->second)) {
|
||||
LOG(FATAL) << "Missing `data' field for array interface";
|
||||
}
|
||||
}
|
||||
@@ -139,8 +143,9 @@ class ArrayInterfaceHandler {
|
||||
static size_t ExtractMask(Object::Map const &column,
|
||||
common::Span<RBitField8::value_type> *p_out) {
|
||||
auto &s_mask = *p_out;
|
||||
if (column.find("mask") != column.cend()) {
|
||||
auto const &j_mask = get<Object const>(column.at("mask"));
|
||||
auto const &mask_it = column.find("mask");
|
||||
if (mask_it != column.cend() && !IsA<Null>(mask_it->second)) {
|
||||
auto const &j_mask = get<Object const>(mask_it->second);
|
||||
Validate(j_mask);
|
||||
|
||||
auto p_mask = GetPtrFromArrayData<RBitField8::value_type *>(j_mask);
|
||||
@@ -173,8 +178,9 @@ class ArrayInterfaceHandler {
|
||||
// assume 1 byte alignment.
|
||||
size_t const span_size = RBitField8::ComputeStorageSize(n_bits);
|
||||
|
||||
if (j_mask.find("strides") != j_mask.cend()) {
|
||||
auto strides = get<Array const>(column.at("strides"));
|
||||
auto strides_it = j_mask.find("strides");
|
||||
if (strides_it != j_mask.cend() && !IsA<Null>(strides_it->second)) {
|
||||
auto strides = get<Array const>(strides_it->second);
|
||||
CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1);
|
||||
CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous();
|
||||
}
|
||||
@@ -390,6 +396,11 @@ class ArrayInterface {
|
||||
|
||||
data = ArrayInterfaceHandler::ExtractData(array, n);
|
||||
static_assert(allow_mask ? D == 1 : D >= 1, "Masked ndarray is not supported.");
|
||||
|
||||
auto alignment = this->ElementAlignment();
|
||||
auto ptr = reinterpret_cast<uintptr_t>(this->data);
|
||||
CHECK_EQ(ptr % alignment, 0) << "Input pointer misalignment.";
|
||||
|
||||
if (allow_mask) {
|
||||
common::Span<RBitField8::value_type> s_mask;
|
||||
size_t n_bits = ArrayInterfaceHandler::ExtractMask(array, &s_mask);
|
||||
@@ -401,7 +412,9 @@ class ArrayInterface {
|
||||
<< "XGBoost doesn't support internal broadcasting.";
|
||||
}
|
||||
} else {
|
||||
CHECK(array.find("mask") == array.cend()) << "Masked array is not yet supported.";
|
||||
auto mask_it = array.find("mask");
|
||||
CHECK(mask_it == array.cend() || IsA<Null>(mask_it->second))
|
||||
<< "Masked array is not yet supported.";
|
||||
}
|
||||
|
||||
auto stream_it = array.find("stream");
|
||||
@@ -506,9 +519,15 @@ class ArrayInterface {
|
||||
return func(reinterpret_cast<uint64_t const *>(data));
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE size_t ElementSize() {
|
||||
return this->DispatchCall(
|
||||
[](auto *p_values) { return sizeof(std::remove_pointer_t<decltype(p_values)>); });
|
||||
XGBOOST_DEVICE std::size_t ElementSize() const {
|
||||
return this->DispatchCall([](auto *typed_data_ptr) {
|
||||
return sizeof(std::remove_pointer_t<decltype(typed_data_ptr)>);
|
||||
});
|
||||
}
|
||||
XGBOOST_DEVICE std::size_t ElementAlignment() const {
|
||||
return this->DispatchCall([](auto *typed_data_ptr) {
|
||||
return std::alignment_of<std::remove_pointer_t<decltype(typed_data_ptr)>>::value;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T = float, typename... Index>
|
||||
|
||||
@@ -20,13 +20,13 @@ GHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique<common::ColumnM
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
double sparse_thresh, bool sorted_sketch, int32_t n_threads,
|
||||
common::Span<float> hess) {
|
||||
common::Span<float> hess)
|
||||
: max_numeric_bins_per_feat{max_bins_per_feat} {
|
||||
CHECK(p_fmat->SingleColBlock());
|
||||
// We use sorted sketching for approx tree method since it's more efficient in
|
||||
// computation time (but higher memory usage).
|
||||
cut = common::SketchOnDMatrix(p_fmat, max_bins_per_feat, n_threads, sorted_sketch, hess);
|
||||
|
||||
max_num_bins = max_bins_per_feat;
|
||||
const uint32_t nbins = cut.Ptrs().back();
|
||||
hit_count.resize(nbins, 0);
|
||||
hit_count_tloc_.resize(n_threads * nbins, 0);
|
||||
@@ -63,7 +63,7 @@ GHistIndexMatrix::GHistIndexMatrix(MetaInfo const &info, common::HistogramCuts &
|
||||
: row_ptr(info.num_row_ + 1, 0),
|
||||
hit_count(cuts.TotalBins(), 0),
|
||||
cut{std::forward<common::HistogramCuts>(cuts)},
|
||||
max_num_bins(max_bin_per_feat),
|
||||
max_numeric_bins_per_feat(max_bin_per_feat),
|
||||
isDense_{info.num_col_ * info.num_row_ == info.num_nonzero_} {}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
@@ -86,13 +86,13 @@ void GHistIndexMatrix::PushBatch(SparsePage const &batch, common::Span<FeatureTy
|
||||
}
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix(SparsePage const &batch, common::Span<FeatureType const> ft,
|
||||
common::HistogramCuts const &cuts, int32_t max_bins_per_feat,
|
||||
bool isDense, double sparse_thresh, int32_t n_threads) {
|
||||
common::HistogramCuts cuts, int32_t max_bins_per_feat,
|
||||
bool isDense, double sparse_thresh, int32_t n_threads)
|
||||
: cut{std::move(cuts)},
|
||||
max_numeric_bins_per_feat{max_bins_per_feat},
|
||||
base_rowid{batch.base_rowid},
|
||||
isDense_{isDense} {
|
||||
CHECK_GE(n_threads, 1);
|
||||
base_rowid = batch.base_rowid;
|
||||
isDense_ = isDense;
|
||||
cut = cuts;
|
||||
max_num_bins = max_bins_per_feat;
|
||||
CHECK_EQ(row_ptr.size(), 0);
|
||||
// The number of threads is pegged to the batch size. If the OMP
|
||||
// block is parallelized on anything other than the batch/block size,
|
||||
@@ -127,12 +127,13 @@ INSTANTIATION_PUSH(data::SparsePageAdapterBatch)
|
||||
#undef INSTANTIATION_PUSH
|
||||
|
||||
void GHistIndexMatrix::ResizeIndex(const size_t n_index, const bool isDense) {
|
||||
if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
|
||||
if ((MaxNumBinPerFeat() - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) &&
|
||||
isDense) {
|
||||
// compress dense index to uint8
|
||||
index.SetBinTypeSize(common::kUint8BinsTypeSize);
|
||||
index.Resize((sizeof(uint8_t)) * n_index);
|
||||
} else if ((max_num_bins - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max()) &&
|
||||
max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) &&
|
||||
} else if ((MaxNumBinPerFeat() - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max()) &&
|
||||
MaxNumBinPerFeat() - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) &&
|
||||
isDense) {
|
||||
// compress dense index to uint16
|
||||
index.SetBinTypeSize(common::kUint16BinsTypeSize);
|
||||
@@ -148,10 +149,28 @@ common::ColumnMatrix const &GHistIndexMatrix::Transpose() const {
|
||||
return *columns_;
|
||||
}
|
||||
|
||||
bst_bin_t GHistIndexMatrix::GetGindex(size_t ridx, size_t fidx) const {
|
||||
auto begin = RowIdx(ridx);
|
||||
if (IsDense()) {
|
||||
return static_cast<bst_bin_t>(index[begin + fidx]);
|
||||
}
|
||||
auto end = RowIdx(ridx + 1);
|
||||
auto const& cut_ptrs = cut.Ptrs();
|
||||
auto f_begin = cut_ptrs[fidx];
|
||||
auto f_end = cut_ptrs[fidx + 1];
|
||||
return BinarySearchBin(begin, end, index, f_begin, f_end);
|
||||
}
|
||||
|
||||
float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
||||
auto const &values = cut.Values();
|
||||
auto const &mins = cut.MinValues();
|
||||
auto const &ptrs = cut.Ptrs();
|
||||
return this->GetFvalue(ptrs, values, mins, ridx, fidx, is_cat);
|
||||
}
|
||||
|
||||
float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
|
||||
std::vector<float> const &values, std::vector<float> const &mins,
|
||||
bst_row_t ridx, bst_feature_t fidx, bool is_cat) const {
|
||||
if (is_cat) {
|
||||
auto f_begin = ptrs[fidx];
|
||||
auto f_end = ptrs[fidx + 1];
|
||||
@@ -171,24 +190,27 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
||||
}
|
||||
return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
|
||||
};
|
||||
|
||||
if (columns_->GetColumnType(fidx) == common::kDenseColumn) {
|
||||
if (columns_->AnyMissing()) {
|
||||
switch (columns_->GetColumnType(fidx)) {
|
||||
case common::kDenseColumn: {
|
||||
if (columns_->AnyMissing()) {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
|
||||
return get_bin_val(column);
|
||||
});
|
||||
} else {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
|
||||
auto bin_idx = column[ridx];
|
||||
return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
|
||||
});
|
||||
}
|
||||
}
|
||||
case common::kSparseColumn: {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
|
||||
return get_bin_val(column);
|
||||
});
|
||||
} else {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
|
||||
auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
|
||||
return get_bin_val(column);
|
||||
});
|
||||
}
|
||||
} else {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
|
||||
return get_bin_val(column);
|
||||
});
|
||||
}
|
||||
|
||||
SPAN_CHECK(false);
|
||||
|
||||
@@ -65,7 +65,7 @@ void GetRowPtrFromEllpack(Context const* ctx, EllpackPageImpl const* page,
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix(Context const* ctx, MetaInfo const& info,
|
||||
EllpackPage const& in_page, BatchParam const& p)
|
||||
: max_num_bins{p.max_bin} {
|
||||
: max_numeric_bins_per_feat{p.max_bin} {
|
||||
auto page = in_page.Impl();
|
||||
isDense_ = page->is_dense;
|
||||
|
||||
|
||||
@@ -133,11 +133,15 @@ class GHistIndexMatrix {
|
||||
std::vector<size_t> hit_count;
|
||||
/*! \brief The corresponding cuts */
|
||||
common::HistogramCuts cut;
|
||||
/*! \brief max_bin for each feature. */
|
||||
bst_bin_t max_num_bins;
|
||||
/** \brief max_bin for each feature. */
|
||||
bst_bin_t max_numeric_bins_per_feat;
|
||||
/*! \brief base row index for current page (used by external memory) */
|
||||
size_t base_rowid{0};
|
||||
|
||||
bst_bin_t MaxNumBinPerFeat() const {
|
||||
return std::max(static_cast<bst_bin_t>(cut.MaxCategory() + 1), max_numeric_bins_per_feat);
|
||||
}
|
||||
|
||||
~GHistIndexMatrix();
|
||||
/**
|
||||
* \brief Constrcutor for SimpleDMatrix.
|
||||
@@ -160,7 +164,7 @@ class GHistIndexMatrix {
|
||||
* \brief Constructor for external memory.
|
||||
*/
|
||||
GHistIndexMatrix(SparsePage const& page, common::Span<FeatureType const> ft,
|
||||
common::HistogramCuts const& cuts, int32_t max_bins_per_feat, bool is_dense,
|
||||
common::HistogramCuts cuts, int32_t max_bins_per_feat, bool is_dense,
|
||||
double sparse_thresh, int32_t n_threads);
|
||||
GHistIndexMatrix(); // also for ext mem, empty ctor so that we can read the cache back.
|
||||
|
||||
@@ -223,7 +227,12 @@ class GHistIndexMatrix {
|
||||
|
||||
common::ColumnMatrix const& Transpose() const;
|
||||
|
||||
bst_bin_t GetGindex(size_t ridx, size_t fidx) const;
|
||||
|
||||
float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
|
||||
float GetFvalue(std::vector<std::uint32_t> const& ptrs, std::vector<float> const& values,
|
||||
std::vector<float> const& mins, bst_row_t ridx, bst_feature_t fidx,
|
||||
bool is_cat) const;
|
||||
|
||||
private:
|
||||
std::unique_ptr<common::ColumnMatrix> columns_;
|
||||
|
||||
@@ -35,7 +35,7 @@ class GHistIndexRawFormat : public SparsePageFormat<GHistIndexMatrix> {
|
||||
if (!fi->Read(&page->hit_count)) {
|
||||
return false;
|
||||
}
|
||||
if (!fi->Read(&page->max_num_bins)) {
|
||||
if (!fi->Read(&page->max_numeric_bins_per_feat)) {
|
||||
return false;
|
||||
}
|
||||
if (!fi->Read(&page->base_rowid)) {
|
||||
@@ -76,8 +76,8 @@ class GHistIndexRawFormat : public SparsePageFormat<GHistIndexMatrix> {
|
||||
page.hit_count.size() * sizeof(decltype(page.hit_count)::value_type) +
|
||||
sizeof(uint64_t);
|
||||
// max_bins, base row, is_dense
|
||||
fo->Write(page.max_num_bins);
|
||||
bytes += sizeof(page.max_num_bins);
|
||||
fo->Write(page.max_numeric_bins_per_feat);
|
||||
bytes += sizeof(page.max_numeric_bins_per_feat);
|
||||
fo->Write(page.base_rowid);
|
||||
bytes += sizeof(page.base_rowid);
|
||||
fo->Write(page.IsDense());
|
||||
|
||||
@@ -58,6 +58,13 @@ void GetCutsFromRef(std::shared_ptr<DMatrix> ref_, bst_feature_t n_features, Bat
|
||||
}
|
||||
};
|
||||
auto ellpack = [&]() {
|
||||
// workaround ellpack being initialized from CPU.
|
||||
if (p.gpu_id == Context::kCpuId) {
|
||||
p.gpu_id = ref_->Ctx()->gpu_id;
|
||||
}
|
||||
if (p.gpu_id == Context::kCpuId) {
|
||||
p.gpu_id = 0;
|
||||
}
|
||||
for (auto const& page : ref_->GetBatches<EllpackPage>(p)) {
|
||||
GetCutsFromEllpack(page, p_cuts);
|
||||
break;
|
||||
@@ -103,6 +110,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
size_t n_threads = ctx_.Threads();
|
||||
size_t n_features = column_sizes.size();
|
||||
linalg::Tensor<size_t, 2> column_sizes_tloc({n_threads, n_features}, Context::kCpuId);
|
||||
column_sizes_tloc.Data()->Fill(0);
|
||||
auto view = column_sizes_tloc.HostView();
|
||||
common::ParallelFor(value.Size(), n_threads, common::Sched::Static(256), [&](auto i) {
|
||||
auto const& line = value.GetLine(i);
|
||||
@@ -172,9 +180,9 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
size_t i = 0;
|
||||
while (iter.Next()) {
|
||||
if (!p_sketch) {
|
||||
p_sketch.reset(new common::HostSketchContainer{batch_param_.max_bin,
|
||||
proxy->Info().feature_types.ConstHostSpan(),
|
||||
column_sizes, false, ctx_.Threads()});
|
||||
p_sketch.reset(new common::HostSketchContainer{
|
||||
batch_param_.max_bin, proxy->Info().feature_types.ConstHostSpan(), column_sizes,
|
||||
!proxy->Info().group_ptr_.empty(), ctx_.Threads()});
|
||||
}
|
||||
HostAdapterDispatch(proxy, [&](auto const& batch) {
|
||||
proxy->Info().num_nonzero_ = batch_nnz[i];
|
||||
|
||||
@@ -42,6 +42,7 @@ DMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {
|
||||
out->Info() = this->Info().Slice(ridxs);
|
||||
out->Info().num_nonzero_ = h_offset.back();
|
||||
}
|
||||
out->ctx_ = this->ctx_;
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/objective.h"
|
||||
#include "xgboost/predictor.h"
|
||||
#include "xgboost/string_view.h"
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -395,23 +396,36 @@ void GBTree::LoadConfig(Json const& in) {
|
||||
tparam_.process_type = TreeProcessType::kDefault;
|
||||
int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
|
||||
if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
|
||||
LOG(WARNING)
|
||||
<< "Loading from a raw memory buffer on CPU only machine. "
|
||||
"Changing predictor to auto.";
|
||||
LOG(WARNING) << "Loading from a raw memory buffer on CPU only machine. "
|
||||
"Changing predictor to auto.";
|
||||
tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
|
||||
}
|
||||
|
||||
auto msg = StringView{
|
||||
R"(
|
||||
Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only
|
||||
machine. Consider using `save_model/load_model` instead. See:
|
||||
|
||||
https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
|
||||
|
||||
for more details about differences between saving model and serializing.)"};
|
||||
|
||||
if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
|
||||
tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
|
||||
LOG(WARNING)
|
||||
<< "Loading from a raw memory buffer on CPU only machine. "
|
||||
"Changing tree_method to hist.";
|
||||
LOG(WARNING) << msg << " Changing `tree_method` to `hist`.";
|
||||
}
|
||||
|
||||
auto const& j_updaters = get<Object const>(in["updater"]);
|
||||
updaters_.clear();
|
||||
|
||||
for (auto const& kv : j_updaters) {
|
||||
std::unique_ptr<TreeUpdater> up(
|
||||
TreeUpdater::Create(kv.first, ctx_, model_.learner_model_param->task));
|
||||
auto name = kv.first;
|
||||
if (n_gpus == 0 && name == "grow_gpu_hist") {
|
||||
name = "grow_quantile_histmaker";
|
||||
LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.";
|
||||
}
|
||||
std::unique_ptr<TreeUpdater> up{
|
||||
TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)};
|
||||
up->LoadConfig(kv.second);
|
||||
updaters_.push_back(std::move(up));
|
||||
}
|
||||
|
||||
@@ -63,7 +63,7 @@ bst_float PredValue(const SparsePage::Inst &inst,
|
||||
psum += (*trees[i])[nidx].LeafValue();
|
||||
}
|
||||
}
|
||||
p_feats->Drop(inst);
|
||||
p_feats->Drop();
|
||||
return psum;
|
||||
}
|
||||
|
||||
@@ -116,13 +116,11 @@ void FVecFill(const size_t block_size, const size_t batch_offset, const int num_
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DataView>
|
||||
void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batch,
|
||||
const size_t fvec_offset, std::vector<RegTree::FVec>* p_feats) {
|
||||
void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
|
||||
std::vector<RegTree::FVec> *p_feats) {
|
||||
for (size_t i = 0; i < block_size; ++i) {
|
||||
RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
|
||||
const SparsePage::Inst inst = (*batch)[batch_offset + i];
|
||||
feats.Drop(inst);
|
||||
feats.Drop();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,11 +140,15 @@ struct SparsePageView {
|
||||
struct GHistIndexMatrixView {
|
||||
private:
|
||||
GHistIndexMatrix const &page_;
|
||||
uint64_t n_features_;
|
||||
std::uint64_t const n_features_;
|
||||
common::Span<FeatureType const> ft_;
|
||||
common::Span<Entry> workspace_;
|
||||
std::vector<size_t> current_unroll_;
|
||||
|
||||
std::vector<std::uint32_t> const& ptrs_;
|
||||
std::vector<float> const& mins_;
|
||||
std::vector<float> const& values_;
|
||||
|
||||
public:
|
||||
size_t base_rowid;
|
||||
|
||||
@@ -159,6 +161,9 @@ struct GHistIndexMatrixView {
|
||||
ft_{ft},
|
||||
workspace_{workplace},
|
||||
current_unroll_(n_threads > 0 ? n_threads : 1, 0),
|
||||
ptrs_{_page.cut.Ptrs()},
|
||||
mins_{_page.cut.MinValues()},
|
||||
values_{_page.cut.Values()},
|
||||
base_rowid{_page.base_rowid} {}
|
||||
|
||||
SparsePage::Inst operator[](size_t r) {
|
||||
@@ -167,7 +172,7 @@ struct GHistIndexMatrixView {
|
||||
size_t non_missing{beg};
|
||||
|
||||
for (bst_feature_t c = 0; c < n_features_; ++c) {
|
||||
float f = page_.GetFvalue(r, c, common::IsCat(ft_, c));
|
||||
float f = page_.GetFvalue(ptrs_, values_, mins_, r, c, common::IsCat(ft_, c));
|
||||
if (!common::CheckNAN(f)) {
|
||||
workspace_[non_missing] = Entry{c, f};
|
||||
++non_missing;
|
||||
@@ -250,10 +255,9 @@ void PredictBatchByBlockOfRowsKernel(
|
||||
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset,
|
||||
p_thread_temp);
|
||||
// process block of rows through all trees to keep cache locality
|
||||
PredictByAllTrees(model, tree_begin, tree_end, out_preds,
|
||||
batch_offset + batch.base_rowid, num_group, thread_temp,
|
||||
fvec_offset, block_size);
|
||||
FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp);
|
||||
PredictByAllTrees(model, tree_begin, tree_end, out_preds, batch_offset + batch.base_rowid,
|
||||
num_group, thread_temp, fvec_offset, block_size);
|
||||
FVecDrop(block_size, fvec_offset, p_thread_temp);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -470,7 +474,7 @@ class CPUPredictor : public Predictor {
|
||||
bst_node_t tid = GetLeafIndex<true, true>(tree, feats, cats);
|
||||
preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid);
|
||||
}
|
||||
feats.Drop(page[i]);
|
||||
feats.Drop();
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -544,7 +548,7 @@ class CPUPredictor : public Predictor {
|
||||
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
|
||||
}
|
||||
}
|
||||
feats.Drop(page[i]);
|
||||
feats.Drop();
|
||||
// add base margin to BIAS
|
||||
if (base_margin.Size() != 0) {
|
||||
CHECK_EQ(base_margin.Shape(1), ngroup);
|
||||
|
||||
@@ -18,9 +18,7 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs
|
||||
if (has_categorical && common::IsCat(cats.split_type, nid)) {
|
||||
auto node_categories =
|
||||
cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size);
|
||||
return common::Decision<true>(node_categories, fvalue, node.DefaultLeft())
|
||||
? node.LeftChild()
|
||||
: node.RightChild();
|
||||
return common::Decision(node_categories, fvalue) ? node.LeftChild() : node.RightChild();
|
||||
} else {
|
||||
return node.LeftChild() + !(fvalue < node.SplitCond());
|
||||
}
|
||||
|
||||
@@ -248,8 +248,10 @@ class EvaluateSplitAgent {
|
||||
|
||||
template <int kBlockSize>
|
||||
__global__ __launch_bounds__(kBlockSize) void EvaluateSplitsKernel(
|
||||
bst_feature_t number_active_features, common::Span<const EvaluateSplitInputs> d_inputs,
|
||||
const EvaluateSplitSharedInputs shared_inputs, common::Span<bst_feature_t> sorted_idx,
|
||||
bst_feature_t max_active_features,
|
||||
common::Span<const EvaluateSplitInputs> d_inputs,
|
||||
const EvaluateSplitSharedInputs shared_inputs,
|
||||
common::Span<bst_feature_t> sorted_idx,
|
||||
const TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
|
||||
common::Span<DeviceSplitCandidate> out_candidates) {
|
||||
// Aligned && shared storage for best_split
|
||||
@@ -263,11 +265,15 @@ __global__ __launch_bounds__(kBlockSize) void EvaluateSplitsKernel(
|
||||
__syncthreads();
|
||||
|
||||
// Allocate blocks to one feature of one node
|
||||
const auto input_idx = blockIdx.x / number_active_features;
|
||||
const auto input_idx = blockIdx.x / max_active_features;
|
||||
const EvaluateSplitInputs &inputs = d_inputs[input_idx];
|
||||
// One block for each feature. Features are sampled, so fidx != blockIdx.x
|
||||
|
||||
int fidx = inputs.feature_set[blockIdx.x % number_active_features];
|
||||
// Some blocks may not have any feature to work on, simply return
|
||||
int feature_offset = blockIdx.x % max_active_features;
|
||||
if (feature_offset >= inputs.feature_set.size()) {
|
||||
return;
|
||||
}
|
||||
int fidx = inputs.feature_set[feature_offset];
|
||||
|
||||
using AgentT = EvaluateSplitAgent<kBlockSize>;
|
||||
__shared__ typename AgentT::TempStorage temp_storage;
|
||||
@@ -338,7 +344,8 @@ __device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inpu
|
||||
}
|
||||
|
||||
void GPUHistEvaluator::LaunchEvaluateSplits(
|
||||
bst_feature_t number_active_features, common::Span<const EvaluateSplitInputs> d_inputs,
|
||||
bst_feature_t max_active_features,
|
||||
common::Span<const EvaluateSplitInputs> d_inputs,
|
||||
EvaluateSplitSharedInputs shared_inputs,
|
||||
TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
|
||||
common::Span<DeviceSplitCandidate> out_splits) {
|
||||
@@ -346,20 +353,25 @@ void GPUHistEvaluator::LaunchEvaluateSplits(
|
||||
this->SortHistogram(d_inputs, shared_inputs, evaluator);
|
||||
}
|
||||
|
||||
size_t combined_num_features = number_active_features * d_inputs.size();
|
||||
dh::TemporaryArray<DeviceSplitCandidate> feature_best_splits(combined_num_features);
|
||||
size_t combined_num_features = max_active_features * d_inputs.size();
|
||||
dh::TemporaryArray<DeviceSplitCandidate> feature_best_splits(
|
||||
combined_num_features, DeviceSplitCandidate());
|
||||
|
||||
// One block for each feature
|
||||
uint32_t constexpr kBlockThreads = 32;
|
||||
dh::LaunchKernel {static_cast<uint32_t>(combined_num_features), kBlockThreads, 0}(
|
||||
EvaluateSplitsKernel<kBlockThreads>, number_active_features, d_inputs,
|
||||
shared_inputs, this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()),
|
||||
dh::LaunchKernel{static_cast<uint32_t>(combined_num_features), kBlockThreads,
|
||||
0}(
|
||||
EvaluateSplitsKernel<kBlockThreads>, max_active_features, d_inputs,
|
||||
shared_inputs,
|
||||
this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()),
|
||||
evaluator, dh::ToSpan(feature_best_splits));
|
||||
|
||||
// Reduce to get best candidate for left and right child over all features
|
||||
auto reduce_offset = dh::MakeTransformIterator<size_t>(
|
||||
thrust::make_counting_iterator(0llu),
|
||||
[=] __device__(size_t idx) -> size_t { return idx * number_active_features; });
|
||||
auto reduce_offset =
|
||||
dh::MakeTransformIterator<size_t>(thrust::make_counting_iterator(0llu),
|
||||
[=] __device__(size_t idx) -> size_t {
|
||||
return idx * max_active_features;
|
||||
});
|
||||
size_t temp_storage_bytes = 0;
|
||||
auto num_segments = out_splits.size();
|
||||
cub::DeviceSegmentedReduce::Sum(nullptr, temp_storage_bytes, feature_best_splits.data(),
|
||||
@@ -386,15 +398,16 @@ void GPUHistEvaluator::CopyToHost(const std::vector<bst_node_t> &nidx) {
|
||||
}
|
||||
|
||||
void GPUHistEvaluator::EvaluateSplits(
|
||||
const std::vector<bst_node_t> &nidx, bst_feature_t number_active_features,
|
||||
common::Span<const EvaluateSplitInputs> d_inputs, EvaluateSplitSharedInputs shared_inputs,
|
||||
const std::vector<bst_node_t> &nidx, bst_feature_t max_active_features,
|
||||
common::Span<const EvaluateSplitInputs> d_inputs,
|
||||
EvaluateSplitSharedInputs shared_inputs,
|
||||
common::Span<GPUExpandEntry> out_entries) {
|
||||
auto evaluator = this->tree_evaluator_.template GetEvaluator<GPUTrainingParam>();
|
||||
|
||||
dh::TemporaryArray<DeviceSplitCandidate> splits_out_storage(d_inputs.size());
|
||||
auto out_splits = dh::ToSpan(splits_out_storage);
|
||||
this->LaunchEvaluateSplits(number_active_features, d_inputs, shared_inputs, evaluator,
|
||||
out_splits);
|
||||
this->LaunchEvaluateSplits(max_active_features, d_inputs, shared_inputs,
|
||||
evaluator, out_splits);
|
||||
|
||||
auto d_sorted_idx = this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size());
|
||||
auto d_entries = out_entries;
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
*/
|
||||
#ifndef EVALUATE_SPLITS_CUH_
|
||||
#define EVALUATE_SPLITS_CUH_
|
||||
#include <thrust/system/cuda/experimental/pinned_allocator.h>
|
||||
#include <xgboost/span.h>
|
||||
|
||||
#include "../../common/categorical.h"
|
||||
#include "../../common/cuda_pinned_allocator.h"
|
||||
#include "../split_evaluator.h"
|
||||
#include "../updater_gpu_common.cuh"
|
||||
#include "expand_entry.cuh"
|
||||
@@ -57,7 +57,7 @@ struct CatAccessor {
|
||||
class GPUHistEvaluator {
|
||||
using CatST = common::CatBitField::value_type; // categorical storage type
|
||||
// use pinned memory to stage the categories, used for sort based splits.
|
||||
using Alloc = thrust::system::cuda::experimental::pinned_allocator<CatST>;
|
||||
using Alloc = xgboost::common::cuda::pinned_allocator<CatST>;
|
||||
|
||||
private:
|
||||
TreeEvaluator tree_evaluator_;
|
||||
@@ -170,13 +170,18 @@ class GPUHistEvaluator {
|
||||
TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator);
|
||||
|
||||
// impl of evaluate splits, contains CUDA kernels so it's public
|
||||
void LaunchEvaluateSplits(bst_feature_t number_active_features,common::Span<const EvaluateSplitInputs> d_inputs,EvaluateSplitSharedInputs shared_inputs,
|
||||
TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
|
||||
common::Span<DeviceSplitCandidate> out_splits);
|
||||
void LaunchEvaluateSplits(
|
||||
bst_feature_t max_active_features,
|
||||
common::Span<const EvaluateSplitInputs> d_inputs,
|
||||
EvaluateSplitSharedInputs shared_inputs,
|
||||
TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
|
||||
common::Span<DeviceSplitCandidate> out_splits);
|
||||
/**
|
||||
* \brief Evaluate splits for left and right nodes.
|
||||
*/
|
||||
void EvaluateSplits(const std::vector<bst_node_t> &nidx,bst_feature_t number_active_features,common::Span<const EvaluateSplitInputs> d_inputs,
|
||||
void EvaluateSplits(const std::vector<bst_node_t> &nidx,
|
||||
bst_feature_t max_active_features,
|
||||
common::Span<const EvaluateSplitInputs> d_inputs,
|
||||
EvaluateSplitSharedInputs shared_inputs,
|
||||
common::Span<GPUExpandEntry> out_splits);
|
||||
/**
|
||||
|
||||
@@ -389,6 +389,7 @@ class HistEvaluator {
|
||||
tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
|
||||
tree[candidate.nid].SplitIndex(), left_weight,
|
||||
right_weight);
|
||||
evaluator = tree_evaluator_.GetEvaluator();
|
||||
|
||||
auto max_node = std::max(left_child, tree[candidate.nid].RightChild());
|
||||
max_node = std::max(candidate.nid, max_node);
|
||||
|
||||
@@ -48,6 +48,8 @@ class TreeEvaluator {
|
||||
monotone_.HostVector().resize(n_features, 0);
|
||||
has_constraint_ = false;
|
||||
} else {
|
||||
CHECK_LE(p.monotone_constraints.size(), n_features)
|
||||
<< "The size of monotone constraint should be less or equal to the number of features.";
|
||||
monotone_.HostVector() = p.monotone_constraints;
|
||||
monotone_.HostVector().resize(n_features, 0);
|
||||
// Initialised to some small size, can grow if needed
|
||||
|
||||
@@ -188,7 +188,8 @@ struct GPUHistMakerDevice {
|
||||
common::Span<GradientPair> gpair;
|
||||
|
||||
dh::device_vector<int> monotone_constraints;
|
||||
dh::device_vector<float> update_predictions;
|
||||
// node idx for each sample
|
||||
dh::device_vector<bst_node_t> positions;
|
||||
|
||||
TrainParam param;
|
||||
|
||||
@@ -285,7 +286,7 @@ struct GPUHistMakerDevice {
|
||||
matrix.feature_segments,
|
||||
matrix.gidx_fvalue_map,
|
||||
matrix.min_fvalue,
|
||||
matrix.is_dense
|
||||
matrix.is_dense && !collective::IsDistributed()
|
||||
};
|
||||
auto split = this->evaluator_.EvaluateSingleSplit(inputs, shared_inputs);
|
||||
return split;
|
||||
@@ -299,12 +300,14 @@ struct GPUHistMakerDevice {
|
||||
std::vector<bst_node_t> nidx(2 * candidates.size());
|
||||
auto h_node_inputs = pinned2.GetSpan<EvaluateSplitInputs>(2 * candidates.size());
|
||||
auto matrix = page->GetDeviceAccessor(ctx_->gpu_id);
|
||||
EvaluateSplitSharedInputs shared_inputs{
|
||||
GPUTrainingParam{param}, *quantiser, feature_types, matrix.feature_segments,
|
||||
matrix.gidx_fvalue_map, matrix.min_fvalue,
|
||||
matrix.is_dense
|
||||
};
|
||||
EvaluateSplitSharedInputs shared_inputs{GPUTrainingParam{param}, *quantiser, feature_types,
|
||||
matrix.feature_segments, matrix.gidx_fvalue_map,
|
||||
matrix.min_fvalue,
|
||||
// is_dense represents the local data
|
||||
matrix.is_dense && !collective::IsDistributed()};
|
||||
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
|
||||
// Store the feature set ptrs so they dont go out of scope before the kernel is called
|
||||
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
|
||||
for (size_t i = 0; i < candidates.size(); i++) {
|
||||
auto candidate = candidates.at(i);
|
||||
int left_nidx = tree[candidate.nid].LeftChild();
|
||||
@@ -313,29 +316,34 @@ struct GPUHistMakerDevice {
|
||||
nidx[i * 2 + 1] = right_nidx;
|
||||
auto left_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(left_nidx));
|
||||
left_sampled_features->SetDevice(ctx_->gpu_id);
|
||||
feature_sets.emplace_back(left_sampled_features);
|
||||
common::Span<bst_feature_t> left_feature_set =
|
||||
interaction_constraints.Query(left_sampled_features->DeviceSpan(), left_nidx);
|
||||
auto right_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(right_nidx));
|
||||
right_sampled_features->SetDevice(ctx_->gpu_id);
|
||||
feature_sets.emplace_back(right_sampled_features);
|
||||
common::Span<bst_feature_t> right_feature_set =
|
||||
interaction_constraints.Query(right_sampled_features->DeviceSpan(), left_nidx);
|
||||
h_node_inputs[i * 2] = {left_nidx, candidate.depth + 1, candidate.split.left_sum,
|
||||
left_feature_set, hist.GetNodeHistogram(left_nidx)};
|
||||
h_node_inputs[i * 2 + 1] = {right_nidx, candidate.depth + 1, candidate.split.right_sum,
|
||||
right_feature_set, hist.GetNodeHistogram(right_nidx)};
|
||||
interaction_constraints.Query(right_sampled_features->DeviceSpan(),
|
||||
right_nidx);
|
||||
h_node_inputs[i * 2] = {left_nidx, candidate.depth + 1,
|
||||
candidate.split.left_sum, left_feature_set,
|
||||
hist.GetNodeHistogram(left_nidx)};
|
||||
h_node_inputs[i * 2 + 1] = {right_nidx, candidate.depth + 1,
|
||||
candidate.split.right_sum, right_feature_set,
|
||||
hist.GetNodeHistogram(right_nidx)};
|
||||
}
|
||||
bst_feature_t number_active_features = h_node_inputs[0].feature_set.size();
|
||||
bst_feature_t max_active_features = 0;
|
||||
for (auto input : h_node_inputs) {
|
||||
CHECK_EQ(input.feature_set.size(), number_active_features)
|
||||
<< "Current implementation assumes that the number of active features "
|
||||
"(after sampling) in any node is the same";
|
||||
max_active_features = std::max(max_active_features,
|
||||
bst_feature_t(input.feature_set.size()));
|
||||
}
|
||||
dh::safe_cuda(cudaMemcpyAsync(d_node_inputs.data().get(), h_node_inputs.data(),
|
||||
h_node_inputs.size() * sizeof(EvaluateSplitInputs),
|
||||
cudaMemcpyDefault));
|
||||
dh::safe_cuda(cudaMemcpyAsync(
|
||||
d_node_inputs.data().get(), h_node_inputs.data(),
|
||||
h_node_inputs.size() * sizeof(EvaluateSplitInputs), cudaMemcpyDefault));
|
||||
|
||||
this->evaluator_.EvaluateSplits(nidx, number_active_features, dh::ToSpan(d_node_inputs),
|
||||
shared_inputs, dh::ToSpan(entries));
|
||||
this->evaluator_.EvaluateSplits(nidx, max_active_features,
|
||||
dh::ToSpan(d_node_inputs), shared_inputs,
|
||||
dh::ToSpan(entries));
|
||||
dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(),
|
||||
entries.data().get(), sizeof(GPUExpandEntry) * entries.size(),
|
||||
cudaMemcpyDeviceToHost));
|
||||
@@ -403,8 +411,7 @@ struct GPUHistMakerDevice {
|
||||
go_left = data.split_node.DefaultLeft();
|
||||
} else {
|
||||
if (data.split_type == FeatureType::kCategorical) {
|
||||
go_left = common::Decision<false>(data.node_cats.Bits(), cut_value,
|
||||
data.split_node.DefaultLeft());
|
||||
go_left = common::Decision(data.node_cats.Bits(), cut_value);
|
||||
} else {
|
||||
go_left = cut_value <= data.split_node.SplitCond();
|
||||
}
|
||||
@@ -424,7 +431,7 @@ struct GPUHistMakerDevice {
|
||||
LOG(FATAL) << "Current objective function can not be used with external memory.";
|
||||
}
|
||||
p_out_position->Resize(0);
|
||||
update_predictions.clear();
|
||||
positions.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -459,8 +466,6 @@ struct GPUHistMakerDevice {
|
||||
HostDeviceVector<bst_node_t>* p_out_position) {
|
||||
auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id);
|
||||
auto d_gpair = this->gpair;
|
||||
update_predictions.resize(row_partitioner->GetRows().size());
|
||||
auto d_update_predictions = dh::ToSpan(update_predictions);
|
||||
p_out_position->SetDevice(ctx_->gpu_id);
|
||||
p_out_position->Resize(row_partitioner->GetRows().size());
|
||||
|
||||
@@ -481,7 +486,7 @@ struct GPUHistMakerDevice {
|
||||
if (common::IsCat(d_feature_types, position)) {
|
||||
auto node_cats = categories.subspan(categories_segments[position].beg,
|
||||
categories_segments[position].size);
|
||||
go_left = common::Decision<false>(node_cats, element, node.DefaultLeft());
|
||||
go_left = common::Decision(node_cats, element);
|
||||
} else {
|
||||
go_left = element <= node.SplitCond();
|
||||
}
|
||||
@@ -495,32 +500,45 @@ struct GPUHistMakerDevice {
|
||||
node = d_nodes[position];
|
||||
}
|
||||
|
||||
d_update_predictions[row_id] = node.LeafValue();
|
||||
return position;
|
||||
}; // NOLINT
|
||||
|
||||
auto d_out_position = p_out_position->DeviceSpan();
|
||||
row_partitioner->FinalisePosition(d_out_position, new_position_op);
|
||||
|
||||
auto s_position = p_out_position->ConstDeviceSpan();
|
||||
positions.resize(s_position.size());
|
||||
dh::safe_cuda(cudaMemcpyAsync(positions.data().get(), s_position.data(),
|
||||
s_position.size_bytes(), cudaMemcpyDeviceToDevice));
|
||||
|
||||
dh::LaunchN(row_partitioner->GetRows().size(), [=] __device__(size_t idx) {
|
||||
bst_node_t position = d_out_position[idx];
|
||||
d_update_predictions[idx] = d_nodes[position].LeafValue();
|
||||
bool is_row_sampled = d_gpair[idx].GetHess() - .0f == 0.f;
|
||||
d_out_position[idx] = is_row_sampled ? ~position : position;
|
||||
});
|
||||
}
|
||||
|
||||
bool UpdatePredictionCache(linalg::VectorView<float> out_preds_d, RegTree const* p_tree) {
|
||||
if (update_predictions.empty()) {
|
||||
if (positions.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
CHECK(p_tree);
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
CHECK_EQ(out_preds_d.DeviceIdx(), ctx_->gpu_id);
|
||||
auto d_update_predictions = dh::ToSpan(update_predictions);
|
||||
CHECK_EQ(out_preds_d.Size(), d_update_predictions.size());
|
||||
dh::LaunchN(out_preds_d.Size(), [=] XGBOOST_DEVICE(size_t idx) mutable {
|
||||
out_preds_d(idx) += d_update_predictions[idx];
|
||||
|
||||
auto d_position = dh::ToSpan(positions);
|
||||
CHECK_EQ(out_preds_d.Size(), d_position.size());
|
||||
|
||||
auto const& h_nodes = p_tree->GetNodes();
|
||||
dh::caching_device_vector<RegTree::Node> nodes(h_nodes.size());
|
||||
dh::safe_cuda(cudaMemcpyAsync(nodes.data().get(), h_nodes.data(),
|
||||
h_nodes.size() * sizeof(RegTree::Node), cudaMemcpyHostToDevice));
|
||||
auto d_nodes = dh::ToSpan(nodes);
|
||||
dh::LaunchN(d_position.size(), [=] XGBOOST_DEVICE(std::size_t idx) mutable {
|
||||
bst_node_t nidx = d_position[idx];
|
||||
auto weight = d_nodes[nidx].LeafValue();
|
||||
out_preds_d(idx) += weight;
|
||||
});
|
||||
return true;
|
||||
}
|
||||
@@ -863,6 +881,7 @@ class GPUHistMaker : public TreeUpdater {
|
||||
std::unique_ptr<GPUHistMakerDevice<GradientSumT>> maker; // NOLINT
|
||||
|
||||
char const* Name() const override { return "grow_gpu_hist"; }
|
||||
bool HasNodePosition() const override { return true; }
|
||||
|
||||
private:
|
||||
bool initialised_{false};
|
||||
|
||||
@@ -78,7 +78,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
|
||||
|
||||
{
|
||||
GradientPairPrecise grad_stat;
|
||||
if (p_fmat->IsDense()) {
|
||||
if (p_fmat->IsDense() && !collective::IsDistributed()) {
|
||||
/**
|
||||
* Specialized code for dense data: For dense data (with no missing value), the sum
|
||||
* of gradient histogram is equal to snode[nid]
|
||||
|
||||
@@ -89,7 +89,7 @@ class TreeRefresher : public TreeUpdater {
|
||||
dmlc::BeginPtr(stemp[tid]) + offset);
|
||||
offset += tree->param.num_nodes;
|
||||
}
|
||||
feats.Drop(inst);
|
||||
feats.Drop();
|
||||
});
|
||||
}
|
||||
// aggregate the statistics
|
||||
|
||||
@@ -23,10 +23,15 @@ case "${container}" in
|
||||
gpu|rmm)
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
if [[ $container == "rmm" ]]
|
||||
then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
fi
|
||||
;;
|
||||
|
||||
gpu_build_centos7|jvm_gpu_build)
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
;;
|
||||
|
||||
*)
|
||||
|
||||
@@ -15,7 +15,8 @@ fi
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \
|
||||
|
||||
@@ -16,7 +16,8 @@ else
|
||||
fi
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/prune_libnccl.sh
|
||||
|
||||
@@ -14,5 +14,7 @@ else
|
||||
fi
|
||||
|
||||
tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} tests/ci_build/build_jvm_packages.sh \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
|
||||
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
|
||||
tests/ci_build/build_jvm_packages.sh \
|
||||
${SPARK_VERSION} -Duse.cuda=ON ${arch_flag}
|
||||
|
||||
@@ -12,10 +12,10 @@ if ( $is_release_branch -eq 0 ) {
|
||||
}
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON `
|
||||
-DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_UNITY_BUILD=ON ${arch_flag}
|
||||
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON `
|
||||
-DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON ${arch_flag}
|
||||
$msbuild = -join @(
|
||||
"C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\MSBuild\\15.0"
|
||||
"C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\MSBuild\\Current"
|
||||
"\\Bin\\MSBuild.exe"
|
||||
)
|
||||
& $msbuild xgboost.sln /m /p:Configuration=Release /nodeReuse:false
|
||||
|
||||
@@ -22,9 +22,10 @@ function set_buildkite_env_vars_in_container {
|
||||
|
||||
set -x
|
||||
|
||||
CUDA_VERSION=11.0.3
|
||||
RAPIDS_VERSION=22.10
|
||||
SPARK_VERSION=3.0.1
|
||||
CUDA_VERSION=11.8.0
|
||||
NCCL_VERSION=2.16.5-1
|
||||
RAPIDS_VERSION=23.02
|
||||
SPARK_VERSION=3.1.1
|
||||
JDK_VERSION=8
|
||||
|
||||
if [[ -z ${BUILDKITE:-} ]]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user