Compare commits
14 Commits
master-roc
...
release_1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e7decb9775 | ||
|
|
1920118bcb | ||
|
|
2032547426 | ||
|
|
e7ac2486eb | ||
|
|
a3d195e73e | ||
|
|
fab3c05ced | ||
|
|
584b45a9cc | ||
|
|
30c1b5c54c | ||
|
|
36e247aca4 | ||
|
|
c4aff733bb | ||
|
|
cdbfd21d31 | ||
|
|
508a0b0dbd | ||
|
|
e04e773f9f | ||
|
|
1debabb321 |
2
.github/workflows/jvm_tests.yml
vendored
2
.github/workflows/jvm_tests.yml
vendored
@ -9,7 +9,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [windows-latest, ubuntu-latest]
|
os: [windows-latest, ubuntu-latest, macos-10.15]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|||||||
37
.github/workflows/python_tests.yml
vendored
37
.github/workflows/python_tests.yml
vendored
@ -51,7 +51,8 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- {os: windows-2016, compiler: 'msvc', python-version: '3.8'}
|
- {os: windows-2016, python-version: '3.8'}
|
||||||
|
- {os: macos-10.15, python-version "3.8" }
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
@ -71,15 +72,27 @@ jobs:
|
|||||||
conda info
|
conda info
|
||||||
conda list
|
conda list
|
||||||
|
|
||||||
- name: Build XGBoost with msvc
|
- name: Build XGBoost on Windows
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
if: matrix.config.compiler == 'msvc'
|
if: matrix.config.os == 'windows-2016'
|
||||||
run: |
|
run: |
|
||||||
mkdir build_msvc
|
mkdir build_msvc
|
||||||
cd build_msvc
|
cd build_msvc
|
||||||
cmake .. -G"Visual Studio 15 2017" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON
|
cmake .. -G"Visual Studio 15 2017" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON
|
||||||
cmake --build . --config Release --parallel $(nproc)
|
cmake --build . --config Release --parallel $(nproc)
|
||||||
|
|
||||||
|
- name: Build XGBoost on macos
|
||||||
|
if: matrix.config.os == 'macos-10.15'
|
||||||
|
run: |
|
||||||
|
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
|
||||||
|
brew install ninja libomp
|
||||||
|
brew pin libomp
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON
|
||||||
|
ninja
|
||||||
|
|
||||||
- name: Install Python package
|
- name: Install Python package
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
@ -92,3 +105,21 @@ jobs:
|
|||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
pytest -s -v ./tests/python
|
pytest -s -v ./tests/python
|
||||||
|
|
||||||
|
- name: Rename Python wheel
|
||||||
|
shell: bash -l {0}
|
||||||
|
if: matrix.config.os == 'macos-10.15'
|
||||||
|
run: |
|
||||||
|
TAG=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
|
||||||
|
python tests/ci_build/rename_whl.py python-package/dist/*.whl ${{ github.sha }} ${TAG}
|
||||||
|
|
||||||
|
- name: Upload Python wheel
|
||||||
|
shell: bash -l {0}
|
||||||
|
if: |
|
||||||
|
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
||||||
|
matrix.os == 'macos-latest'
|
||||||
|
run: |
|
||||||
|
python -m awscli s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read
|
||||||
|
env:
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
|
||||||
|
|||||||
17
.github/workflows/r_tests.yml
vendored
17
.github/workflows/r_tests.yml
vendored
@ -3,7 +3,7 @@ name: XGBoost-R-Tests
|
|||||||
on: [push, pull_request]
|
on: [push, pull_request]
|
||||||
|
|
||||||
env:
|
env:
|
||||||
R_PACKAGES: c('XML', 'igraph', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
||||||
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
@ -40,6 +40,11 @@ jobs:
|
|||||||
install.packages(${{ env.R_PACKAGES }},
|
install.packages(${{ env.R_PACKAGES }},
|
||||||
repos = 'http://cloud.r-project.org',
|
repos = 'http://cloud.r-project.org',
|
||||||
dependencies = c('Depends', 'Imports', 'LinkingTo'))
|
dependencies = c('Depends', 'Imports', 'LinkingTo'))
|
||||||
|
- name: Install igraph on Windows
|
||||||
|
shell: Rscript {0}
|
||||||
|
if: matrix.config.os == 'windows-latest'
|
||||||
|
run: |
|
||||||
|
install.packages('igraph', type='binary')
|
||||||
|
|
||||||
- name: Run lintr
|
- name: Run lintr
|
||||||
run: |
|
run: |
|
||||||
@ -83,6 +88,11 @@ jobs:
|
|||||||
install.packages(${{ env.R_PACKAGES }},
|
install.packages(${{ env.R_PACKAGES }},
|
||||||
repos = 'http://cloud.r-project.org',
|
repos = 'http://cloud.r-project.org',
|
||||||
dependencies = c('Depends', 'Imports', 'LinkingTo'))
|
dependencies = c('Depends', 'Imports', 'LinkingTo'))
|
||||||
|
- name: Install igraph on Windows
|
||||||
|
shell: Rscript {0}
|
||||||
|
if: matrix.config.os == 'windows-2016'
|
||||||
|
run: |
|
||||||
|
install.packages('igraph', type='binary', dependencies = c('Depends', 'Imports', 'LinkingTo'))
|
||||||
|
|
||||||
- uses: actions/setup-python@v2
|
- uses: actions/setup-python@v2
|
||||||
with:
|
with:
|
||||||
@ -91,7 +101,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test R
|
- name: Test R
|
||||||
run: |
|
run: |
|
||||||
python tests/ci_build/test_r_package.py --compiler="${{ matrix.config.compiler }}" --build-tool="${{ matrix.config.build }}"
|
python tests/ci_build/test_r_package.py --compiler='${{ matrix.config.compiler }}' --build-tool='${{ matrix.config.build }}'
|
||||||
|
|
||||||
test-R-CRAN:
|
test-R-CRAN:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@ -115,7 +125,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Install system packages
|
- name: Install system packages
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update && sudo apt-get install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev pandoc pandoc-citeproc
|
sudo apt-get update && sudo apt-get install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev pandoc pandoc-citeproc libglpk-dev
|
||||||
|
|
||||||
- name: Cache R packages
|
- name: Cache R packages
|
||||||
uses: actions/cache@v2
|
uses: actions/cache@v2
|
||||||
@ -130,6 +140,7 @@ jobs:
|
|||||||
install.packages(${{ env.R_PACKAGES }},
|
install.packages(${{ env.R_PACKAGES }},
|
||||||
repos = 'http://cloud.r-project.org',
|
repos = 'http://cloud.r-project.org',
|
||||||
dependencies = c('Depends', 'Imports', 'LinkingTo'))
|
dependencies = c('Depends', 'Imports', 'LinkingTo'))
|
||||||
|
install.packages('igraph', repos = 'http://cloud.r-project.org', dependencies = c('Depends', 'Imports', 'LinkingTo'))
|
||||||
|
|
||||||
- name: Check R Package
|
- name: Check R Package
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
10
.travis.yml
10
.travis.yml
@ -10,14 +10,6 @@ env:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
include:
|
include:
|
||||||
- os: osx
|
|
||||||
arch: amd64
|
|
||||||
osx_image: xcode10.2
|
|
||||||
env: TASK=python_test
|
|
||||||
- os: osx
|
|
||||||
arch: amd64
|
|
||||||
osx_image: xcode10.2
|
|
||||||
env: TASK=java_test
|
|
||||||
- os: linux
|
- os: linux
|
||||||
arch: s390x
|
arch: s390x
|
||||||
env: TASK=s390x_test
|
env: TASK=s390x_test
|
||||||
@ -33,8 +25,6 @@ addons:
|
|||||||
|
|
||||||
before_install:
|
before_install:
|
||||||
- source tests/travis/travis_setup_env.sh
|
- source tests/travis/travis_setup_env.sh
|
||||||
- if [ "${TASK}" != "python_sdist_test" ]; then export PYTHONPATH=${PYTHONPATH}:${PWD}/python-package; fi
|
|
||||||
- echo "MAVEN_OPTS='-Xmx2g -XX:MaxPermSize=1024m -XX:ReservedCodeCacheSize=512m -Dorg.slf4j.simpleLogger.defaultLogLevel=error'" > ~/.mavenrc
|
|
||||||
|
|
||||||
install:
|
install:
|
||||||
- source tests/travis/setup.sh
|
- source tests/travis/setup.sh
|
||||||
|
|||||||
@ -4,3 +4,4 @@
|
|||||||
^.*\.Rproj$
|
^.*\.Rproj$
|
||||||
^\.Rproj\.user$
|
^\.Rproj\.user$
|
||||||
README.md
|
README.md
|
||||||
|
CMakeLists.txt
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 1.5.0.1
|
Version: 1.5.0.2
|
||||||
Date: 2020-08-28
|
Date: 2021-11-19
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Tianqi", "Chen", role = c("aut"),
|
person("Tianqi", "Chen", role = c("aut"),
|
||||||
email = "tianqi.tchen@gmail.com"),
|
email = "tianqi.tchen@gmail.com"),
|
||||||
|
|||||||
@ -397,6 +397,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
|||||||
shape <- predts$shape
|
shape <- predts$shape
|
||||||
ret <- predts$results
|
ret <- predts$results
|
||||||
|
|
||||||
|
n_ret <- length(ret)
|
||||||
n_row <- nrow(newdata)
|
n_row <- nrow(newdata)
|
||||||
if (n_row != shape[1]) {
|
if (n_row != shape[1]) {
|
||||||
stop("Incorrect predict shape.")
|
stop("Incorrect predict shape.")
|
||||||
@ -405,36 +406,55 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
|||||||
arr <- array(data = ret, dim = rev(shape))
|
arr <- array(data = ret, dim = rev(shape))
|
||||||
|
|
||||||
cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "BIAS") else NULL
|
cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "BIAS") else NULL
|
||||||
|
n_groups <- shape[2]
|
||||||
|
|
||||||
|
## Needed regardless of whether strict shape is being used.
|
||||||
if (predcontrib) {
|
if (predcontrib) {
|
||||||
dimnames(arr) <- list(cnames, NULL, NULL)
|
dimnames(arr) <- list(cnames, NULL, NULL)
|
||||||
if (!strict_shape) {
|
|
||||||
arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
|
|
||||||
}
|
|
||||||
} else if (predinteraction) {
|
} else if (predinteraction) {
|
||||||
dimnames(arr) <- list(cnames, cnames, NULL, NULL)
|
dimnames(arr) <- list(cnames, cnames, NULL, NULL)
|
||||||
if (!strict_shape) {
|
|
||||||
arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
|
|
||||||
}
|
}
|
||||||
|
if (strict_shape) {
|
||||||
|
return(arr) # strict shape is calculated by libxgboost uniformly.
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!strict_shape) {
|
|
||||||
n_groups <- shape[2]
|
|
||||||
if (predleaf) {
|
if (predleaf) {
|
||||||
arr <- matrix(arr, nrow = n_row, byrow = TRUE)
|
## Predict leaf
|
||||||
} else if (predcontrib && n_groups != 1) {
|
arr <- if (n_ret == n_row) {
|
||||||
arr <- lapply(seq_len(n_groups), function(g) arr[g, , ])
|
matrix(arr, ncol = 1)
|
||||||
} else if (predinteraction && n_groups != 1) {
|
} else {
|
||||||
arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ])
|
matrix(arr, nrow = n_row, byrow = TRUE)
|
||||||
} else if (!reshape && n_groups != 1) {
|
|
||||||
arr <- ret
|
|
||||||
} else if (reshape && n_groups != 1) {
|
|
||||||
arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
|
|
||||||
}
|
}
|
||||||
arr <- drop(arr)
|
} else if (predcontrib) {
|
||||||
if (length(dim(arr)) == 1) {
|
## Predict contribution
|
||||||
arr <- as.vector(arr)
|
arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
|
||||||
} else if (length(dim(arr)) == 2) {
|
arr <- if (n_ret == n_row) {
|
||||||
arr <- as.matrix(arr)
|
matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
|
||||||
|
} else if (n_groups != 1) {
|
||||||
|
## turns array into list of matrices
|
||||||
|
lapply(seq_len(n_groups), function(g) arr[g, , ])
|
||||||
|
} else {
|
||||||
|
## remove the first axis (group)
|
||||||
|
as.matrix(arr[1, , ])
|
||||||
|
}
|
||||||
|
} else if (predinteraction) {
|
||||||
|
## Predict interaction
|
||||||
|
arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
|
||||||
|
arr <- if (n_ret == n_row) {
|
||||||
|
matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
|
||||||
|
} else if (n_groups != 1) {
|
||||||
|
## turns array into list of matrices
|
||||||
|
lapply(seq_len(n_groups), function(g) arr[g, , , ])
|
||||||
|
} else {
|
||||||
|
## remove the first axis (group)
|
||||||
|
arr[1, , , ]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
## Normal prediction
|
||||||
|
arr <- if (reshape && n_groups != 1) {
|
||||||
|
matrix(arr, ncol = n_groups, byrow = TRUE)
|
||||||
|
} else {
|
||||||
|
as.vector(ret)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return(arr)
|
return(arr)
|
||||||
|
|||||||
@ -11,6 +11,7 @@
|
|||||||
#' @param missing a float value to represents missing values in data (used only when input is a dense matrix).
|
#' @param missing a float value to represents missing values in data (used only when input is a dense matrix).
|
||||||
#' It is useful when a 0 or some other extreme value represents missing values in data.
|
#' It is useful when a 0 or some other extreme value represents missing values in data.
|
||||||
#' @param silent whether to suppress printing an informational message after loading from a file.
|
#' @param silent whether to suppress printing an informational message after loading from a file.
|
||||||
|
#' @param nthread Number of threads used for creating DMatrix.
|
||||||
#' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list.
|
#' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
|||||||
@ -115,14 +115,14 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
|||||||
} else {
|
} else {
|
||||||
concatenated <- list()
|
concatenated <- list()
|
||||||
output_names <- vector()
|
output_names <- vector()
|
||||||
for (importance_type in c("weight", "gain", "cover")) {
|
for (importance_type in c("weight", "total_gain", "total_cover")) {
|
||||||
args <- list(importance_type = importance_type, feature_names = feature_names)
|
args <- list(importance_type = importance_type, feature_names = feature_names, tree_idx = trees)
|
||||||
results <- .Call(
|
results <- .Call(
|
||||||
XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
|
XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
|
||||||
)
|
)
|
||||||
names(results) <- c("features", "shape", importance_type)
|
names(results) <- c("features", "shape", importance_type)
|
||||||
concatenated[
|
concatenated[
|
||||||
switch(importance_type, "weight" = "Frequency", "gain" = "Gain", "cover" = "Cover")
|
switch(importance_type, "weight" = "Frequency", "total_gain" = "Gain", "total_cover" = "Cover")
|
||||||
] <- results[importance_type]
|
] <- results[importance_type]
|
||||||
output_names <- results$features
|
output_names <- results$features
|
||||||
}
|
}
|
||||||
|
|||||||
@ -9,8 +9,8 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
|||||||
early_stopping_rounds = NULL, maximize = NULL,
|
early_stopping_rounds = NULL, maximize = NULL,
|
||||||
save_period = NULL, save_name = "xgboost.model",
|
save_period = NULL, save_name = "xgboost.model",
|
||||||
xgb_model = NULL, callbacks = list(), ...) {
|
xgb_model = NULL, callbacks = list(), ...) {
|
||||||
|
merged <- check.booster.params(params, ...)
|
||||||
dtrain <- xgb.get.DMatrix(data, label, missing, weight, nthread = params$nthread)
|
dtrain <- xgb.get.DMatrix(data, label, missing, weight, nthread = merged$nthread)
|
||||||
|
|
||||||
watchlist <- list(train = dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
|
|
||||||
|
|||||||
@ -4,7 +4,14 @@
|
|||||||
\alias{xgb.DMatrix}
|
\alias{xgb.DMatrix}
|
||||||
\title{Construct xgb.DMatrix object}
|
\title{Construct xgb.DMatrix object}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.DMatrix(data, info = list(), missing = NA, silent = FALSE, ...)
|
xgb.DMatrix(
|
||||||
|
data,
|
||||||
|
info = list(),
|
||||||
|
missing = NA,
|
||||||
|
silent = FALSE,
|
||||||
|
nthread = NULL,
|
||||||
|
...
|
||||||
|
)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
|
\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
|
||||||
@ -18,6 +25,8 @@ It is useful when a 0 or some other extreme value represents missing values in d
|
|||||||
|
|
||||||
\item{silent}{whether to suppress printing an informational message after loading from a file.}
|
\item{silent}{whether to suppress printing an informational message after loading from a file.}
|
||||||
|
|
||||||
|
\item{nthread}{Number of threads used for creating DMatrix.}
|
||||||
|
|
||||||
\item{...}{the \code{info} data could be passed directly as parameters, without creating an \code{info} list.}
|
\item{...}{the \code{info} data could be passed directly as parameters, without creating an \code{info} list.}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
library(testthat)
|
||||||
context('Test helper functions')
|
context('Test helper functions')
|
||||||
|
|
||||||
require(xgboost)
|
require(xgboost)
|
||||||
@ -227,7 +228,7 @@ if (grepl('Windows', Sys.info()[['sysname']]) ||
|
|||||||
X <- 10^runif(100, -20, 20)
|
X <- 10^runif(100, -20, 20)
|
||||||
if (capabilities('long.double')) {
|
if (capabilities('long.double')) {
|
||||||
X2X <- as.numeric(format(X, digits = 17))
|
X2X <- as.numeric(format(X, digits = 17))
|
||||||
expect_identical(X, X2X)
|
expect_equal(X, X2X, tolerance = float_tolerance)
|
||||||
}
|
}
|
||||||
# retrieved attributes to be the same as written
|
# retrieved attributes to be the same as written
|
||||||
for (x in X) {
|
for (x in X) {
|
||||||
@ -310,7 +311,35 @@ test_that("xgb.importance works with and without feature names", {
|
|||||||
# for multiclass
|
# for multiclass
|
||||||
imp.Tree <- xgb.importance(model = mbst.Tree)
|
imp.Tree <- xgb.importance(model = mbst.Tree)
|
||||||
expect_equal(dim(imp.Tree), c(4, 4))
|
expect_equal(dim(imp.Tree), c(4, 4))
|
||||||
xgb.importance(model = mbst.Tree, trees = seq(from = 0, by = nclass, length.out = nrounds))
|
|
||||||
|
trees <- seq(from = 0, by = 2, length.out = 2)
|
||||||
|
importance <- xgb.importance(feature_names = feature.names, model = bst.Tree, trees = trees)
|
||||||
|
|
||||||
|
importance_from_dump <- function() {
|
||||||
|
model_text_dump <- xgb.dump(model = bst.Tree, with_stats = TRUE, trees = trees)
|
||||||
|
imp <- xgb.model.dt.tree(
|
||||||
|
feature_names = feature.names,
|
||||||
|
text = model_text_dump,
|
||||||
|
trees = trees
|
||||||
|
)[
|
||||||
|
Feature != "Leaf", .(
|
||||||
|
Gain = sum(Quality),
|
||||||
|
Cover = sum(Cover),
|
||||||
|
Frequency = .N
|
||||||
|
),
|
||||||
|
by = Feature
|
||||||
|
][
|
||||||
|
, `:=`(
|
||||||
|
Gain = Gain / sum(Gain),
|
||||||
|
Cover = Cover / sum(Cover),
|
||||||
|
Frequency = Frequency / sum(Frequency)
|
||||||
|
)
|
||||||
|
][
|
||||||
|
order(Gain, decreasing = TRUE)
|
||||||
|
]
|
||||||
|
imp
|
||||||
|
}
|
||||||
|
expect_equal(importance_from_dump(), importance, tolerance = 1e-6)
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.importance works with GLM model", {
|
test_that("xgb.importance works with GLM model", {
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-dev
|
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
|
||||||
@ -18,7 +18,7 @@ Making a Release
|
|||||||
|
|
||||||
1. Create an issue for the release, noting the estimated date and expected features or major fixes, pin that issue.
|
1. Create an issue for the release, noting the estimated date and expected features or major fixes, pin that issue.
|
||||||
2. Bump release version.
|
2. Bump release version.
|
||||||
1. Modify ``CMakeLists.txt`` source tree, run CMake.
|
1. Modify ``CMakeLists.txt`` in source tree and ``cmake/Python_version.in`` if needed, run CMake.
|
||||||
2. Modify ``DESCRIPTION`` in R-package.
|
2. Modify ``DESCRIPTION`` in R-package.
|
||||||
3. Run ``change_version.sh`` in ``jvm-packages/dev``
|
3. Run ``change_version.sh`` in ``jvm-packages/dev``
|
||||||
3. Commit the change, create a PR on GitHub on release branch. Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.
|
3. Commit the change, create a PR on GitHub on release branch. Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.
|
||||||
|
|||||||
@ -32,8 +32,8 @@ After 1.4 release, we added a new parameter called ``strict_shape``, one can set
|
|||||||
- When using ``output_margin`` to avoid transformation and ``strict_shape`` is set to ``True``:
|
- When using ``output_margin`` to avoid transformation and ``strict_shape`` is set to ``True``:
|
||||||
|
|
||||||
Similar to the previous case, output is a 2-dim array, except for that ``multi:softmax``
|
Similar to the previous case, output is a 2-dim array, except for that ``multi:softmax``
|
||||||
has equivalent output of ``multi:softprob`` due to dropped transformation. If strict
|
has equivalent output shape of ``multi:softprob`` due to dropped transformation. If
|
||||||
shape is set to False then output can have 1 or 2 dim depending on used model.
|
strict shape is set to False then output can have 1 or 2 dim depending on used model.
|
||||||
|
|
||||||
- When using ``preds_contribs`` with ``strict_shape`` set to ``True``:
|
- When using ``preds_contribs`` with ``strict_shape`` set to ``True``:
|
||||||
|
|
||||||
|
|||||||
@ -211,7 +211,7 @@ struct Entry {
|
|||||||
*/
|
*/
|
||||||
struct BatchParam {
|
struct BatchParam {
|
||||||
/*! \brief The GPU device to use. */
|
/*! \brief The GPU device to use. */
|
||||||
int gpu_id;
|
int gpu_id {-1};
|
||||||
/*! \brief Maximum number of bins per feature for histograms. */
|
/*! \brief Maximum number of bins per feature for histograms. */
|
||||||
int max_bin{0};
|
int max_bin{0};
|
||||||
/*! \brief Hessian, used for sketching with future approx implementation. */
|
/*! \brief Hessian, used for sketching with future approx implementation. */
|
||||||
|
|||||||
@ -182,9 +182,10 @@ class GradientBooster : public Model, public Configurable {
|
|||||||
bool with_stats,
|
bool with_stats,
|
||||||
std::string format) const = 0;
|
std::string format) const = 0;
|
||||||
|
|
||||||
virtual void FeatureScore(std::string const &importance_type,
|
virtual void FeatureScore(std::string const& importance_type,
|
||||||
std::vector<bst_feature_t> *features,
|
common::Span<int32_t const> trees,
|
||||||
std::vector<float> *scores) const = 0;
|
std::vector<bst_feature_t>* features,
|
||||||
|
std::vector<float>* scores) const = 0;
|
||||||
/*!
|
/*!
|
||||||
* \brief Whether the current booster uses GPU.
|
* \brief Whether the current booster uses GPU.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -155,9 +155,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
|||||||
/*!
|
/*!
|
||||||
* \brief Calculate feature score. See doc in C API for outputs.
|
* \brief Calculate feature score. See doc in C API for outputs.
|
||||||
*/
|
*/
|
||||||
virtual void CalcFeatureScore(std::string const &importance_type,
|
virtual void CalcFeatureScore(std::string const& importance_type,
|
||||||
std::vector<bst_feature_t> *features,
|
common::Span<int32_t const> trees,
|
||||||
std::vector<float> *scores) = 0;
|
std::vector<bst_feature_t>* features,
|
||||||
|
std::vector<float>* scores) = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* \brief Get number of boosted rounds from gradient booster.
|
* \brief Get number of boosted rounds from gradient booster.
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
|
|||||||
@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@ -37,7 +37,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
1.5.0-dev
|
1.5.0
|
||||||
|
|||||||
@ -174,7 +174,9 @@ __model_doc = f'''
|
|||||||
Device ordinal.
|
Device ordinal.
|
||||||
validate_parameters : Optional[bool]
|
validate_parameters : Optional[bool]
|
||||||
Give warnings for unknown parameter.
|
Give warnings for unknown parameter.
|
||||||
|
predictor : Optional[str]
|
||||||
|
Force XGBoost to use specific predictor, available choices are [cpu_predictor,
|
||||||
|
gpu_predictor].
|
||||||
enable_categorical : bool
|
enable_categorical : bool
|
||||||
|
|
||||||
.. versionadded:: 1.5.0
|
.. versionadded:: 1.5.0
|
||||||
@ -807,7 +809,11 @@ class XGBModel(XGBModelBase):
|
|||||||
# Inplace predict doesn't handle as many data types as DMatrix, but it's
|
# Inplace predict doesn't handle as many data types as DMatrix, but it's
|
||||||
# sufficient for dask interface where input is simpiler.
|
# sufficient for dask interface where input is simpiler.
|
||||||
predictor = self.get_params().get("predictor", None)
|
predictor = self.get_params().get("predictor", None)
|
||||||
if predictor in ("auto", None) and self.booster != "gblinear":
|
if (
|
||||||
|
not self.enable_categorical
|
||||||
|
and predictor in ("auto", None)
|
||||||
|
and self.booster != "gblinear"
|
||||||
|
):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -834,7 +840,9 @@ class XGBModel(XGBModelBase):
|
|||||||
iteration_range: Optional[Tuple[int, int]] = None,
|
iteration_range: Optional[Tuple[int, int]] = None,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""Predict with `X`. If the model is trained with early stopping, then `best_iteration`
|
"""Predict with `X`. If the model is trained with early stopping, then `best_iteration`
|
||||||
is used automatically.
|
is used automatically. For tree models, when data is on GPU, like cupy array or
|
||||||
|
cuDF dataframe and `predictor` is not specified, the prediction is run on GPU
|
||||||
|
automatically, otherwise it will run on CPU.
|
||||||
|
|
||||||
.. note:: This function is only thread safe for `gbtree` and `dart`.
|
.. note:: This function is only thread safe for `gbtree` and `dart`.
|
||||||
|
|
||||||
@ -862,6 +870,7 @@ class XGBModel(XGBModelBase):
|
|||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
prediction
|
prediction
|
||||||
|
|
||||||
"""
|
"""
|
||||||
iteration_range = _convert_ntree_limit(
|
iteration_range = _convert_ntree_limit(
|
||||||
self.get_booster(), ntree_limit, iteration_range
|
self.get_booster(), ntree_limit, iteration_range
|
||||||
@ -886,7 +895,10 @@ class XGBModel(XGBModelBase):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
test = DMatrix(
|
test = DMatrix(
|
||||||
X, base_margin=base_margin, missing=self.missing, nthread=self.n_jobs
|
X, base_margin=base_margin,
|
||||||
|
missing=self.missing,
|
||||||
|
nthread=self.n_jobs,
|
||||||
|
enable_categorical=self.enable_categorical
|
||||||
)
|
)
|
||||||
return self.get_booster().predict(
|
return self.get_booster().predict(
|
||||||
data=test,
|
data=test,
|
||||||
|
|||||||
@ -472,13 +472,15 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
|||||||
if is_new_callback:
|
if is_new_callback:
|
||||||
assert all(isinstance(c, callback.TrainingCallback)
|
assert all(isinstance(c, callback.TrainingCallback)
|
||||||
for c in callbacks), "You can't mix new and old callback styles."
|
for c in callbacks), "You can't mix new and old callback styles."
|
||||||
if isinstance(verbose_eval, bool) and verbose_eval:
|
if verbose_eval:
|
||||||
verbose_eval = 1 if verbose_eval is True else verbose_eval
|
verbose_eval = 1 if verbose_eval is True else verbose_eval
|
||||||
callbacks.append(callback.EvaluationMonitor(period=verbose_eval,
|
callbacks.append(
|
||||||
show_stdv=show_stdv))
|
callback.EvaluationMonitor(period=verbose_eval, show_stdv=show_stdv)
|
||||||
|
)
|
||||||
if early_stopping_rounds:
|
if early_stopping_rounds:
|
||||||
callbacks.append(callback.EarlyStopping(
|
callbacks.append(
|
||||||
rounds=early_stopping_rounds, maximize=maximize))
|
callback.EarlyStopping(rounds=early_stopping_rounds, maximize=maximize)
|
||||||
|
)
|
||||||
callbacks = callback.CallbackContainer(callbacks, metric=feval, is_cv=True)
|
callbacks = callback.CallbackContainer(callbacks, metric=feval, is_cv=True)
|
||||||
else:
|
else:
|
||||||
callbacks = _configure_deprecated_callbacks(
|
callbacks = _configure_deprecated_callbacks(
|
||||||
|
|||||||
@ -1159,9 +1159,17 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *json_config,
|
|||||||
custom_feature_names = get<Array const>(config["feature_names"]);
|
custom_feature_names = get<Array const>(config["feature_names"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& scores = learner->GetThreadLocal().ret_vec_float;
|
std::vector<int32_t> tree_idx;
|
||||||
|
if (!IsA<Null>(config["tree_idx"])) {
|
||||||
|
auto j_tree_idx = get<Array const>(config["tree_idx"]);
|
||||||
|
for (auto const &idx : j_tree_idx) {
|
||||||
|
tree_idx.push_back(get<Integer const>(idx));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &scores = learner->GetThreadLocal().ret_vec_float;
|
||||||
std::vector<bst_feature_t> features;
|
std::vector<bst_feature_t> features;
|
||||||
learner->CalcFeatureScore(importance, &features, &scores);
|
learner->CalcFeatureScore(importance, common::Span<int32_t const>(tree_idx), &features, &scores);
|
||||||
|
|
||||||
auto n_features = learner->GetNumFeature();
|
auto n_features = learner->GetNumFeature();
|
||||||
GenerateFeatureMap(learner, custom_feature_names, n_features, &feature_map);
|
GenerateFeatureMap(learner, custom_feature_names, n_features, &feature_map);
|
||||||
|
|||||||
@ -7,11 +7,28 @@
|
|||||||
#define XGBOOST_COMMON_THREADING_UTILS_H_
|
#define XGBOOST_COMMON_THREADING_UTILS_H_
|
||||||
|
|
||||||
#include <dmlc/common.h>
|
#include <dmlc/common.h>
|
||||||
#include <vector>
|
#include <dmlc/omp.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <limits>
|
||||||
#include <type_traits> // std::is_signed
|
#include <type_traits> // std::is_signed
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
|
|
||||||
|
#if !defined(_OPENMP)
|
||||||
|
extern "C" {
|
||||||
|
inline int32_t omp_get_thread_limit() __GOMP_NOTHROW { return 1; } // NOLINT
|
||||||
|
}
|
||||||
|
#endif // !defined(_OPENMP)
|
||||||
|
|
||||||
|
// MSVC doesn't implement the thread limit.
|
||||||
|
#if defined(_OPENMP) && defined(_MSC_VER)
|
||||||
|
extern "C" {
|
||||||
|
inline int32_t omp_get_thread_limit() { return std::numeric_limits<int32_t>::max(); } // NOLINT
|
||||||
|
}
|
||||||
|
#endif // defined(_MSC_VER)
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
|
|
||||||
@ -153,7 +170,7 @@ struct Sched {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename Index, typename Func>
|
template <typename Index, typename Func>
|
||||||
void ParallelFor(Index size, size_t n_threads, Sched sched, Func fn) {
|
void ParallelFor(Index size, int32_t n_threads, Sched sched, Func fn) {
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
// msvc doesn't support unsigned integer as openmp index.
|
// msvc doesn't support unsigned integer as openmp index.
|
||||||
using OmpInd = std::conditional_t<std::is_signed<Index>::value, Index, omp_ulong>;
|
using OmpInd = std::conditional_t<std::is_signed<Index>::value, Index, omp_ulong>;
|
||||||
@ -220,6 +237,13 @@ void ParallelFor(Index size, size_t n_threads, Func fn) {
|
|||||||
template <typename Index, typename Func>
|
template <typename Index, typename Func>
|
||||||
void ParallelFor(Index size, Func fn) {
|
void ParallelFor(Index size, Func fn) {
|
||||||
ParallelFor(size, omp_get_max_threads(), Sched::Static(), fn);
|
ParallelFor(size, omp_get_max_threads(), Sched::Static(), fn);
|
||||||
|
} // !defined(_OPENMP)
|
||||||
|
|
||||||
|
|
||||||
|
inline int32_t OmpGetThreadLimit() {
|
||||||
|
int32_t limit = omp_get_thread_limit();
|
||||||
|
CHECK_GE(limit, 1) << "Invalid thread limit for OpenMP.";
|
||||||
|
return limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* \brief Configure parallel threads.
|
/* \brief Configure parallel threads.
|
||||||
@ -235,15 +259,18 @@ inline int32_t OmpSetNumThreads(int32_t* p_threads) {
|
|||||||
if (threads <= 0) {
|
if (threads <= 0) {
|
||||||
threads = omp_get_num_procs();
|
threads = omp_get_num_procs();
|
||||||
}
|
}
|
||||||
|
threads = std::min(threads, OmpGetThreadLimit());
|
||||||
omp_set_num_threads(threads);
|
omp_set_num_threads(threads);
|
||||||
return nthread_original;
|
return nthread_original;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int32_t OmpSetNumThreadsWithoutHT(int32_t* p_threads) {
|
inline int32_t OmpSetNumThreadsWithoutHT(int32_t* p_threads) {
|
||||||
auto& threads = *p_threads;
|
auto& threads = *p_threads;
|
||||||
int32_t nthread_original = omp_get_max_threads();
|
int32_t nthread_original = omp_get_max_threads();
|
||||||
if (threads <= 0) {
|
if (threads <= 0) {
|
||||||
threads = nthread_original;
|
threads = nthread_original;
|
||||||
}
|
}
|
||||||
|
threads = std::min(threads, OmpGetThreadLimit());
|
||||||
omp_set_num_threads(threads);
|
omp_set_num_threads(threads);
|
||||||
return nthread_original;
|
return nthread_original;
|
||||||
}
|
}
|
||||||
@ -252,6 +279,7 @@ inline int32_t OmpGetNumThreads(int32_t n_threads) {
|
|||||||
if (n_threads <= 0) {
|
if (n_threads <= 0) {
|
||||||
n_threads = omp_get_num_procs();
|
n_threads = omp_get_num_procs();
|
||||||
}
|
}
|
||||||
|
n_threads = std::min(n_threads, OmpGetThreadLimit());
|
||||||
return n_threads;
|
return n_threads;
|
||||||
}
|
}
|
||||||
} // namespace common
|
} // namespace common
|
||||||
|
|||||||
@ -49,10 +49,10 @@ class SimpleDMatrix : public DMatrix {
|
|||||||
MetaInfo info_;
|
MetaInfo info_;
|
||||||
// Primary storage type
|
// Primary storage type
|
||||||
std::shared_ptr<SparsePage> sparse_page_ = std::make_shared<SparsePage>();
|
std::shared_ptr<SparsePage> sparse_page_ = std::make_shared<SparsePage>();
|
||||||
std::shared_ptr<CSCPage> column_page_;
|
std::shared_ptr<CSCPage> column_page_{nullptr};
|
||||||
std::shared_ptr<SortedCSCPage> sorted_column_page_;
|
std::shared_ptr<SortedCSCPage> sorted_column_page_{nullptr};
|
||||||
std::shared_ptr<EllpackPage> ellpack_page_;
|
std::shared_ptr<EllpackPage> ellpack_page_{nullptr};
|
||||||
std::shared_ptr<GHistIndexMatrix> gradient_index_;
|
std::shared_ptr<GHistIndexMatrix> gradient_index_{nullptr};
|
||||||
BatchParam batch_param_;
|
BatchParam batch_param_;
|
||||||
|
|
||||||
bool EllpackExists() const override {
|
bool EllpackExists() const override {
|
||||||
|
|||||||
@ -232,9 +232,11 @@ class GBLinear : public GradientBooster {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void FeatureScore(std::string const &importance_type,
|
void FeatureScore(std::string const &importance_type,
|
||||||
|
common::Span<int32_t const> trees,
|
||||||
std::vector<bst_feature_t> *out_features,
|
std::vector<bst_feature_t> *out_features,
|
||||||
std::vector<float> *out_scores) const override {
|
std::vector<float> *out_scores) const override {
|
||||||
CHECK(!model_.weight.empty()) << "Model is not initialized";
|
CHECK(!model_.weight.empty()) << "Model is not initialized";
|
||||||
|
CHECK(trees.empty()) << "gblinear doesn't support number of trees for feature importance.";
|
||||||
CHECK_EQ(importance_type, "weight")
|
CHECK_EQ(importance_type, "weight")
|
||||||
<< "gblinear only has `weight` defined for feature importance.";
|
<< "gblinear only has `weight` defined for feature importance.";
|
||||||
out_features->resize(this->learner_model_param_->num_feature, 0);
|
out_features->resize(this->learner_model_param_->num_feature, 0);
|
||||||
|
|||||||
@ -300,18 +300,28 @@ class GBTree : public GradientBooster {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void FeatureScore(std::string const &importance_type,
|
void FeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
|
||||||
std::vector<bst_feature_t> *features,
|
std::vector<bst_feature_t>* features,
|
||||||
std::vector<float> *scores) const override {
|
std::vector<float>* scores) const override {
|
||||||
// Because feature with no importance doesn't appear in the return value so
|
// Because feature with no importance doesn't appear in the return value so
|
||||||
// we need to set up another pair of vectors to store the values during
|
// we need to set up another pair of vectors to store the values during
|
||||||
// computation.
|
// computation.
|
||||||
std::vector<size_t> split_counts(this->model_.learner_model_param->num_feature, 0);
|
std::vector<size_t> split_counts(this->model_.learner_model_param->num_feature, 0);
|
||||||
std::vector<float> gain_map(this->model_.learner_model_param->num_feature, 0);
|
std::vector<float> gain_map(this->model_.learner_model_param->num_feature, 0);
|
||||||
|
std::vector<int32_t> tree_idx;
|
||||||
|
if (trees.empty()) {
|
||||||
|
tree_idx.resize(this->model_.trees.size());
|
||||||
|
std::iota(tree_idx.begin(), tree_idx.end(), 0);
|
||||||
|
trees = common::Span<int32_t const>(tree_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto total_n_trees = model_.trees.size();
|
||||||
auto add_score = [&](auto fn) {
|
auto add_score = [&](auto fn) {
|
||||||
for (auto const &p_tree : model_.trees) {
|
for (auto idx : trees) {
|
||||||
|
CHECK_LE(idx, total_n_trees) << "Invalid tree index.";
|
||||||
|
auto const& p_tree = model_.trees[idx];
|
||||||
p_tree->WalkTree([&](bst_node_t nidx) {
|
p_tree->WalkTree([&](bst_node_t nidx) {
|
||||||
auto const &node = (*p_tree)[nidx];
|
auto const& node = (*p_tree)[nidx];
|
||||||
if (!node.IsLeaf()) {
|
if (!node.IsLeaf()) {
|
||||||
split_counts[node.SplitIndex()]++;
|
split_counts[node.SplitIndex()]++;
|
||||||
fn(p_tree, nidx, node.SplitIndex());
|
fn(p_tree, nidx, node.SplitIndex());
|
||||||
|
|||||||
@ -1214,11 +1214,10 @@ class LearnerImpl : public LearnerIO {
|
|||||||
*out_preds = &out_predictions.predictions;
|
*out_preds = &out_predictions.predictions;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CalcFeatureScore(std::string const &importance_type,
|
void CalcFeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
|
||||||
std::vector<bst_feature_t> *features,
|
std::vector<bst_feature_t>* features, std::vector<float>* scores) override {
|
||||||
std::vector<float> *scores) override {
|
|
||||||
this->Configure();
|
this->Configure();
|
||||||
gbm_->FeatureScore(importance_type, features, scores);
|
gbm_->FeatureScore(importance_type, trees, features, scores);
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::map<std::string, std::string>& GetConfigurationArguments() const override {
|
const std::map<std::string, std::string>& GetConfigurationArguments() const override {
|
||||||
|
|||||||
@ -291,7 +291,7 @@ float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info
|
|||||||
// labels is a vector of size n_samples.
|
// labels is a vector of size n_samples.
|
||||||
float label = labels[idx % n_samples] == class_id;
|
float label = labels[idx % n_samples] == class_id;
|
||||||
|
|
||||||
float w = get_weight(i % n_samples);
|
float w = weights.empty() ? 1.0f : weights[d_sorted_idx[i] % n_samples];
|
||||||
float fp = (1.0 - label) * w;
|
float fp = (1.0 - label) * w;
|
||||||
float tp = label * w;
|
float tp = label * w;
|
||||||
return thrust::make_pair(fp, tp);
|
return thrust::make_pair(fp, tp);
|
||||||
|
|||||||
@ -309,9 +309,8 @@ struct EvalGammaNLogLik {
|
|||||||
float constexpr kPsi = 1.0;
|
float constexpr kPsi = 1.0;
|
||||||
bst_float theta = -1. / py;
|
bst_float theta = -1. / py;
|
||||||
bst_float a = kPsi;
|
bst_float a = kPsi;
|
||||||
// b = -std::log(-theta);
|
float b = -std::log(-theta);
|
||||||
float b = 1.0f;
|
// c = 1. / kPsi^2 * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
|
||||||
// c = 1. / kPsi * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
|
|
||||||
// = 1.0f * std::log(y) - std::log(y) - 0 = 0
|
// = 1.0f * std::log(y) - std::log(y) - 0 = 0
|
||||||
float c = 0;
|
float c = 0;
|
||||||
// general form for exponential family.
|
// general form for exponential family.
|
||||||
|
|||||||
@ -109,10 +109,9 @@ class ColMaker: public TreeUpdater {
|
|||||||
interaction_constraints_.Configure(param_, dmat->Info().num_row_);
|
interaction_constraints_.Configure(param_, dmat->Info().num_row_);
|
||||||
// build tree
|
// build tree
|
||||||
for (auto tree : trees) {
|
for (auto tree : trees) {
|
||||||
Builder builder(
|
CHECK(tparam_);
|
||||||
param_,
|
Builder builder(param_, colmaker_param_, interaction_constraints_, tparam_,
|
||||||
colmaker_param_,
|
column_densities_);
|
||||||
interaction_constraints_, column_densities_);
|
|
||||||
builder.Update(gpair->ConstHostVector(), dmat, tree);
|
builder.Update(gpair->ConstHostVector(), dmat, tree);
|
||||||
}
|
}
|
||||||
param_.learning_rate = lr;
|
param_.learning_rate = lr;
|
||||||
@ -154,12 +153,12 @@ class ColMaker: public TreeUpdater {
|
|||||||
class Builder {
|
class Builder {
|
||||||
public:
|
public:
|
||||||
// constructor
|
// constructor
|
||||||
explicit Builder(const TrainParam& param,
|
explicit Builder(const TrainParam ¶m, const ColMakerTrainParam &colmaker_train_param,
|
||||||
const ColMakerTrainParam& colmaker_train_param,
|
|
||||||
FeatureInteractionConstraintHost _interaction_constraints,
|
FeatureInteractionConstraintHost _interaction_constraints,
|
||||||
const std::vector<float> &column_densities)
|
GenericParameter const *ctx, const std::vector<float> &column_densities)
|
||||||
: param_(param), colmaker_train_param_{colmaker_train_param},
|
: param_(param),
|
||||||
nthread_(omp_get_max_threads()),
|
colmaker_train_param_{colmaker_train_param},
|
||||||
|
ctx_{ctx},
|
||||||
tree_evaluator_(param_, column_densities.size(), GenericParameter::kCpuId),
|
tree_evaluator_(param_, column_densities.size(), GenericParameter::kCpuId),
|
||||||
interaction_constraints_{std::move(_interaction_constraints)},
|
interaction_constraints_{std::move(_interaction_constraints)},
|
||||||
column_densities_(column_densities) {}
|
column_densities_(column_densities) {}
|
||||||
@ -238,7 +237,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
// setup temp space for each thread
|
// setup temp space for each thread
|
||||||
// reserve a small space
|
// reserve a small space
|
||||||
stemp_.clear();
|
stemp_.clear();
|
||||||
stemp_.resize(this->nthread_, std::vector<ThreadEntry>());
|
stemp_.resize(this->ctx_->Threads(), std::vector<ThreadEntry>());
|
||||||
for (auto& i : stemp_) {
|
for (auto& i : stemp_) {
|
||||||
i.clear(); i.reserve(256);
|
i.clear(); i.reserve(256);
|
||||||
}
|
}
|
||||||
@ -451,8 +450,9 @@ class ColMaker: public TreeUpdater {
|
|||||||
// start enumeration
|
// start enumeration
|
||||||
const auto num_features = static_cast<bst_omp_uint>(feat_set.size());
|
const auto num_features = static_cast<bst_omp_uint>(feat_set.size());
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
|
CHECK(this->ctx_);
|
||||||
const int batch_size = // NOLINT
|
const int batch_size = // NOLINT
|
||||||
std::max(static_cast<int>(num_features / this->nthread_ / 32), 1);
|
std::max(static_cast<int>(num_features / this->ctx_->Threads() / 32), 1);
|
||||||
#endif // defined(_OPENMP)
|
#endif // defined(_OPENMP)
|
||||||
{
|
{
|
||||||
auto page = batch.GetView();
|
auto page = batch.GetView();
|
||||||
@ -553,7 +553,8 @@ class ColMaker: public TreeUpdater {
|
|||||||
virtual void SyncBestSolution(const std::vector<int> &qexpand) {
|
virtual void SyncBestSolution(const std::vector<int> &qexpand) {
|
||||||
for (int nid : qexpand) {
|
for (int nid : qexpand) {
|
||||||
NodeEntry &e = snode_[nid];
|
NodeEntry &e = snode_[nid];
|
||||||
for (int tid = 0; tid < this->nthread_; ++tid) {
|
CHECK(this->ctx_);
|
||||||
|
for (int tid = 0; tid < this->ctx_->Threads(); ++tid) {
|
||||||
e.best.Update(stemp_[tid][nid].best);
|
e.best.Update(stemp_[tid][nid].best);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -609,7 +610,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
const TrainParam& param_;
|
const TrainParam& param_;
|
||||||
const ColMakerTrainParam& colmaker_train_param_;
|
const ColMakerTrainParam& colmaker_train_param_;
|
||||||
// number of omp thread used during training
|
// number of omp thread used during training
|
||||||
const int nthread_;
|
GenericParameter const* ctx_;
|
||||||
common::ColumnSampler column_sampler_;
|
common::ColumnSampler column_sampler_;
|
||||||
// Instance Data: current node position in the tree of each instance
|
// Instance Data: current node position in the tree of each instance
|
||||||
std::vector<int> position_;
|
std::vector<int> position_;
|
||||||
|
|||||||
@ -115,9 +115,6 @@ bool QuantileHistMaker::UpdatePredictionCache(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GradientSumT>
|
|
||||||
QuantileHistMaker::Builder<GradientSumT>::~Builder() = default;
|
|
||||||
|
|
||||||
|
|
||||||
template <typename GradientSumT>
|
template <typename GradientSumT>
|
||||||
template <bool any_missing>
|
template <bool any_missing>
|
||||||
|
|||||||
@ -204,7 +204,6 @@ class QuantileHistMaker: public TreeUpdater {
|
|||||||
new HistogramBuilder<GradientSumT, CPUExpandEntry>} {
|
new HistogramBuilder<GradientSumT, CPUExpandEntry>} {
|
||||||
builder_monitor_.Init("Quantile::Builder");
|
builder_monitor_.Init("Quantile::Builder");
|
||||||
}
|
}
|
||||||
~Builder();
|
|
||||||
// update one tree, growing
|
// update one tree, growing
|
||||||
virtual void Update(const GHistIndexMatrix& gmat,
|
virtual void Update(const GHistIndexMatrix& gmat,
|
||||||
const ColumnMatrix& column_matrix,
|
const ColumnMatrix& column_matrix,
|
||||||
|
|||||||
@ -430,7 +430,7 @@ TEST(GBTree, FeatureScore) {
|
|||||||
|
|
||||||
std::vector<bst_feature_t> features_weight;
|
std::vector<bst_feature_t> features_weight;
|
||||||
std::vector<float> scores_weight;
|
std::vector<float> scores_weight;
|
||||||
learner->CalcFeatureScore("weight", &features_weight, &scores_weight);
|
learner->CalcFeatureScore("weight", {}, &features_weight, &scores_weight);
|
||||||
ASSERT_EQ(features_weight.size(), scores_weight.size());
|
ASSERT_EQ(features_weight.size(), scores_weight.size());
|
||||||
ASSERT_LE(features_weight.size(), learner->GetNumFeature());
|
ASSERT_LE(features_weight.size(), learner->GetNumFeature());
|
||||||
ASSERT_TRUE(std::is_sorted(features_weight.begin(), features_weight.end()));
|
ASSERT_TRUE(std::is_sorted(features_weight.begin(), features_weight.end()));
|
||||||
@ -438,11 +438,11 @@ TEST(GBTree, FeatureScore) {
|
|||||||
auto test_eq = [&learner, &scores_weight](std::string type) {
|
auto test_eq = [&learner, &scores_weight](std::string type) {
|
||||||
std::vector<bst_feature_t> features;
|
std::vector<bst_feature_t> features;
|
||||||
std::vector<float> scores;
|
std::vector<float> scores;
|
||||||
learner->CalcFeatureScore(type, &features, &scores);
|
learner->CalcFeatureScore(type, {}, &features, &scores);
|
||||||
|
|
||||||
std::vector<bst_feature_t> features_total;
|
std::vector<bst_feature_t> features_total;
|
||||||
std::vector<float> scores_total;
|
std::vector<float> scores_total;
|
||||||
learner->CalcFeatureScore("total_" + type, &features_total, &scores_total);
|
learner->CalcFeatureScore("total_" + type, {}, &features_total, &scores_total);
|
||||||
|
|
||||||
for (size_t i = 0; i < scores_weight.size(); ++i) {
|
for (size_t i = 0; i < scores_weight.size(); ++i) {
|
||||||
ASSERT_LE(RelError(scores_total[i] / scores[i], scores_weight[i]), kRtEps);
|
ASSERT_LE(RelError(scores_total[i] / scores[i], scores_weight[i]), kRtEps);
|
||||||
|
|||||||
@ -143,7 +143,7 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
|||||||
}
|
}
|
||||||
|
|
||||||
xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
|
xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
|
||||||
xgboost::HostDeviceVector<xgboost::bst_float> preds,
|
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
|
||||||
std::vector<xgboost::bst_float> labels,
|
std::vector<xgboost::bst_float> labels,
|
||||||
std::vector<xgboost::bst_float> weights,
|
std::vector<xgboost::bst_float> weights,
|
||||||
std::vector<xgboost::bst_uint> groups) {
|
std::vector<xgboost::bst_uint> groups) {
|
||||||
|
|||||||
@ -86,7 +86,7 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
|||||||
|
|
||||||
xgboost::bst_float GetMetricEval(
|
xgboost::bst_float GetMetricEval(
|
||||||
xgboost::Metric * metric,
|
xgboost::Metric * metric,
|
||||||
xgboost::HostDeviceVector<xgboost::bst_float> preds,
|
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
|
||||||
std::vector<xgboost::bst_float> labels,
|
std::vector<xgboost::bst_float> labels,
|
||||||
std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(),
|
std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(),
|
||||||
std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>());
|
std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>());
|
||||||
|
|||||||
@ -90,6 +90,16 @@ TEST(Metric, DeclareUnifiedTest(MultiAUC)) {
|
|||||||
},
|
},
|
||||||
{0, 1, 1}); // no class 2.
|
{0, 1, 1}); // no class 2.
|
||||||
EXPECT_TRUE(std::isnan(auc)) << auc;
|
EXPECT_TRUE(std::isnan(auc)) << auc;
|
||||||
|
|
||||||
|
HostDeviceVector<float> predts{
|
||||||
|
0.0f, 1.0f, 0.0f,
|
||||||
|
1.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f,
|
||||||
|
0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
std::vector<float> labels {1.0f, 0.0f, 2.0f, 1.0f};
|
||||||
|
auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f});
|
||||||
|
ASSERT_GT(auc, 0.714);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Metric, DeclareUnifiedTest(RankingAUC)) {
|
TEST(Metric, DeclareUnifiedTest(RankingAUC)) {
|
||||||
|
|||||||
@ -13,9 +13,11 @@ class TestGPUEvalMetrics:
|
|||||||
def test_roc_auc_binary(self, n_samples):
|
def test_roc_auc_binary(self, n_samples):
|
||||||
self.cpu_test.run_roc_auc_binary("gpu_hist", n_samples)
|
self.cpu_test.run_roc_auc_binary("gpu_hist", n_samples)
|
||||||
|
|
||||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
@pytest.mark.parametrize(
|
||||||
def test_roc_auc_multi(self, n_samples):
|
"n_samples,weighted", [(4, False), (100, False), (1000, False), (1000, True)]
|
||||||
self.cpu_test.run_roc_auc_multi("gpu_hist", n_samples)
|
)
|
||||||
|
def test_roc_auc_multi(self, n_samples, weighted):
|
||||||
|
self.cpu_test.run_roc_auc_multi("gpu_hist", n_samples, weighted)
|
||||||
|
|
||||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
||||||
def test_roc_auc_ltr(self, n_samples):
|
def test_roc_auc_ltr(self, n_samples):
|
||||||
|
|||||||
@ -59,6 +59,7 @@ def test_categorical():
|
|||||||
)
|
)
|
||||||
X = pd.DataFrame(X.todense()).astype("category")
|
X = pd.DataFrame(X.todense()).astype("category")
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
|
assert not clf._can_use_inplace_predict()
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
model = os.path.join(tempdir, "categorial.json")
|
model = os.path.join(tempdir, "categorial.json")
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
from typing import Union
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
@ -22,17 +23,14 @@ class TestCallbacks:
|
|||||||
cls.X_valid = X[split:, ...]
|
cls.X_valid = X[split:, ...]
|
||||||
cls.y_valid = y[split:, ...]
|
cls.y_valid = y[split:, ...]
|
||||||
|
|
||||||
def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval):
|
def run_evaluation_monitor(
|
||||||
evals_result = {}
|
self,
|
||||||
with tm.captured_output() as (out, err):
|
D_train: xgb.DMatrix,
|
||||||
xgb.train({'objective': 'binary:logistic',
|
D_valid: xgb.DMatrix,
|
||||||
'eval_metric': 'error'}, D_train,
|
rounds: int,
|
||||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
verbose_eval: Union[bool, int]
|
||||||
num_boost_round=rounds,
|
):
|
||||||
evals_result=evals_result,
|
def check_output(output: str) -> None:
|
||||||
verbose_eval=verbose_eval)
|
|
||||||
output: str = out.getvalue().strip()
|
|
||||||
|
|
||||||
if int(verbose_eval) == 1:
|
if int(verbose_eval) == 1:
|
||||||
# Should print each iteration info
|
# Should print each iteration info
|
||||||
assert len(output.split('\n')) == rounds
|
assert len(output.split('\n')) == rounds
|
||||||
@ -40,11 +38,32 @@ class TestCallbacks:
|
|||||||
# Should print first and latest iteration info
|
# Should print first and latest iteration info
|
||||||
assert len(output.split('\n')) == 2
|
assert len(output.split('\n')) == 2
|
||||||
else:
|
else:
|
||||||
# Should print info by each period additionaly to first and latest iteration
|
# Should print info by each period additionaly to first and latest
|
||||||
|
# iteration
|
||||||
num_periods = rounds // int(verbose_eval)
|
num_periods = rounds // int(verbose_eval)
|
||||||
# Extra information is required for latest iteration
|
# Extra information is required for latest iteration
|
||||||
is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
|
is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
|
||||||
assert len(output.split('\n')) == 1 + num_periods + int(is_extra_info_required)
|
assert len(output.split('\n')) == (
|
||||||
|
1 + num_periods + int(is_extra_info_required)
|
||||||
|
)
|
||||||
|
|
||||||
|
evals_result: xgb.callback.TrainingCallback.EvalsLog = {}
|
||||||
|
params = {'objective': 'binary:logistic', 'eval_metric': 'error'}
|
||||||
|
with tm.captured_output() as (out, err):
|
||||||
|
xgb.train(
|
||||||
|
params, D_train,
|
||||||
|
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||||
|
num_boost_round=rounds,
|
||||||
|
evals_result=evals_result,
|
||||||
|
verbose_eval=verbose_eval,
|
||||||
|
)
|
||||||
|
output: str = out.getvalue().strip()
|
||||||
|
check_output(output)
|
||||||
|
|
||||||
|
with tm.captured_output() as (out, err):
|
||||||
|
xgb.cv(params, D_train, num_boost_round=rounds, verbose_eval=verbose_eval)
|
||||||
|
output = out.getvalue().strip()
|
||||||
|
check_output(output)
|
||||||
|
|
||||||
def test_evaluation_monitor(self):
|
def test_evaluation_monitor(self):
|
||||||
D_train = xgb.DMatrix(self.X_train, self.y_train)
|
D_train = xgb.DMatrix(self.X_train, self.y_train)
|
||||||
|
|||||||
@ -124,6 +124,35 @@ class TestEvalMetrics:
|
|||||||
skl_gamma_dev = mean_gamma_deviance(y, score)
|
skl_gamma_dev = mean_gamma_deviance(y, score)
|
||||||
np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
|
np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
def test_gamma_lik(self) -> None:
|
||||||
|
import scipy.stats as stats
|
||||||
|
rng = np.random.default_rng(1994)
|
||||||
|
n_samples = 32
|
||||||
|
n_features = 10
|
||||||
|
|
||||||
|
X = rng.normal(0, 1, size=n_samples * n_features).reshape((n_samples, n_features))
|
||||||
|
|
||||||
|
alpha, loc, beta = 5.0, 11.1, 22
|
||||||
|
y = stats.gamma.rvs(alpha, loc=loc, scale=beta, size=n_samples, random_state=rng)
|
||||||
|
reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=64)
|
||||||
|
reg.fit(X, y, eval_metric="gamma-nloglik", eval_set=[(X, y)])
|
||||||
|
|
||||||
|
score = reg.predict(X)
|
||||||
|
|
||||||
|
booster = reg.get_booster()
|
||||||
|
nloglik = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1].split(":")[0])
|
||||||
|
|
||||||
|
# \beta_i = - (1 / \theta_i a)
|
||||||
|
# where \theta_i is the canonical parameter
|
||||||
|
# XGBoost uses the canonical link function of gamma in evaluation function.
|
||||||
|
# so \theta = - (1.0 / y)
|
||||||
|
# dispersion is hardcoded as 1.0, so shape (a in scipy parameter) is also 1.0
|
||||||
|
beta = - (1.0 / (- (1.0 / y))) # == y
|
||||||
|
nloglik_stats = -stats.gamma.logpdf(score, a=1.0, scale=beta)
|
||||||
|
|
||||||
|
np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)
|
||||||
|
|
||||||
def run_roc_auc_binary(self, tree_method, n_samples):
|
def run_roc_auc_binary(self, tree_method, n_samples):
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
@ -162,11 +191,11 @@ class TestEvalMetrics:
|
|||||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
@pytest.mark.parametrize("n_samples", [100, 1000])
|
||||||
def test_roc_auc(self, n_samples):
|
def test_roc_auc(self, n_samples):
|
||||||
self.run_roc_auc_binary("hist", n_samples)
|
self.run_roc_auc_binary("hist", n_samples)
|
||||||
|
|
||||||
def run_roc_auc_multi(self, tree_method, n_samples):
|
def run_roc_auc_multi(self, tree_method, n_samples, weighted):
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
from sklearn.metrics import roc_auc_score
|
from sklearn.metrics import roc_auc_score
|
||||||
@ -184,8 +213,14 @@ class TestEvalMetrics:
|
|||||||
n_classes=n_classes,
|
n_classes=n_classes,
|
||||||
random_state=rng
|
random_state=rng
|
||||||
)
|
)
|
||||||
|
if weighted:
|
||||||
|
weights = rng.randn(n_samples)
|
||||||
|
weights -= weights.min()
|
||||||
|
weights /= weights.max()
|
||||||
|
else:
|
||||||
|
weights = None
|
||||||
|
|
||||||
Xy = xgb.DMatrix(X, y)
|
Xy = xgb.DMatrix(X, y, weight=weights)
|
||||||
booster = xgb.train(
|
booster = xgb.train(
|
||||||
{
|
{
|
||||||
"tree_method": tree_method,
|
"tree_method": tree_method,
|
||||||
@ -197,16 +232,22 @@ class TestEvalMetrics:
|
|||||||
num_boost_round=8,
|
num_boost_round=8,
|
||||||
)
|
)
|
||||||
score = booster.predict(Xy)
|
score = booster.predict(Xy)
|
||||||
skl_auc = roc_auc_score(y, score, average="weighted", multi_class="ovr")
|
skl_auc = roc_auc_score(
|
||||||
|
y, score, average="weighted", sample_weight=weights, multi_class="ovr"
|
||||||
|
)
|
||||||
auc = float(booster.eval(Xy).split(":")[1])
|
auc = float(booster.eval(Xy).split(":")[1])
|
||||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
||||||
|
|
||||||
X = rng.randn(*X.shape)
|
X = rng.randn(*X.shape)
|
||||||
score = booster.predict(xgb.DMatrix(X))
|
score = booster.predict(xgb.DMatrix(X, weight=weights))
|
||||||
skl_auc = roc_auc_score(y, score, average="weighted", multi_class="ovr")
|
skl_auc = roc_auc_score(
|
||||||
auc = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1])
|
y, score, average="weighted", sample_weight=weights, multi_class="ovr"
|
||||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
)
|
||||||
|
auc = float(booster.eval(xgb.DMatrix(X, y, weight=weights)).split(":")[1])
|
||||||
|
np.testing.assert_allclose(skl_auc, auc, rtol=1e-5)
|
||||||
|
|
||||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
@pytest.mark.parametrize(
|
||||||
def test_roc_auc_multi(self, n_samples):
|
"n_samples,weighted", [(4, False), (100, False), (1000, False), (1000, True)]
|
||||||
self.run_roc_auc_multi("hist", n_samples)
|
)
|
||||||
|
def test_roc_auc_multi(self, n_samples, weighted):
|
||||||
|
self.run_roc_auc_multi("hist", n_samples, weighted)
|
||||||
|
|||||||
@ -1,6 +1,12 @@
|
|||||||
# -*- coding: utf-8 -*-
|
import os
|
||||||
|
import tempfile
|
||||||
|
import subprocess
|
||||||
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import testing as tm
|
||||||
|
|
||||||
|
|
||||||
class TestOMP:
|
class TestOMP:
|
||||||
@ -71,3 +77,31 @@ class TestOMP:
|
|||||||
assert auc_1 == auc_2 == auc_3
|
assert auc_1 == auc_2 == auc_3
|
||||||
assert np.array_equal(auc_1, auc_2)
|
assert np.array_equal(auc_1, auc_2)
|
||||||
assert np.array_equal(auc_1, auc_3)
|
assert np.array_equal(auc_1, auc_3)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
def test_with_omp_thread_limit(self):
|
||||||
|
args = [
|
||||||
|
"python", os.path.join(
|
||||||
|
tm.PROJECT_ROOT, "tests", "python", "with_omp_limit.py"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
results = []
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
for i in (1, 2, 16):
|
||||||
|
path = os.path.join(tmpdir, str(i))
|
||||||
|
with open(path, "w") as fd:
|
||||||
|
fd.write("\n")
|
||||||
|
cp = args.copy()
|
||||||
|
cp.append(path)
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
env["OMP_THREAD_LIMIT"] = str(i)
|
||||||
|
|
||||||
|
status = subprocess.call(cp, env=env)
|
||||||
|
assert status == 0
|
||||||
|
|
||||||
|
with open(path, "r") as fd:
|
||||||
|
results.append(float(fd.read()))
|
||||||
|
|
||||||
|
for auc in results:
|
||||||
|
np.testing.assert_allclose(auc, results[0])
|
||||||
|
|||||||
26
tests/python/with_omp_limit.py
Normal file
26
tests/python/with_omp_limit.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import os
|
||||||
|
import xgboost as xgb
|
||||||
|
from sklearn.datasets import make_classification
|
||||||
|
from sklearn.metrics import roc_auc_score
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def run_omp(output_path: str):
|
||||||
|
X, y = make_classification(
|
||||||
|
n_samples=200, n_features=32, n_classes=3, n_informative=8
|
||||||
|
)
|
||||||
|
Xy = xgb.DMatrix(X, y, nthread=16)
|
||||||
|
booster = xgb.train(
|
||||||
|
{"num_class": 3, "objective": "multi:softprob", "n_jobs": 16},
|
||||||
|
Xy,
|
||||||
|
num_boost_round=8,
|
||||||
|
)
|
||||||
|
score = booster.predict(Xy)
|
||||||
|
auc = roc_auc_score(y, score, average="weighted", multi_class="ovr")
|
||||||
|
with open(output_path, "w") as fd:
|
||||||
|
fd.write(str(auc))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
out = sys.argv[1]
|
||||||
|
run_omp(out)
|
||||||
@ -1,16 +1,5 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
if [ ${TRAVIS_OS_NAME} == "osx" ]; then
|
|
||||||
# https://travis-ci.community/t/macos-build-fails-because-of-homebrew-bundle-unknown-command/7296/27
|
|
||||||
# Use libomp 11.1.0: https://github.com/dmlc/xgboost/issues/7039
|
|
||||||
brew update # Force update, so that update doesn't overwrite our version of libomp.rb
|
|
||||||
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
|
|
||||||
brew install cmake libomp
|
|
||||||
brew pin libomp
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_sdist_test" ]; then
|
if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_sdist_test" ]; then
|
||||||
if [ ${TRAVIS_OS_NAME} == "osx" ]; then
|
if [ ${TRAVIS_OS_NAME} == "osx" ]; then
|
||||||
wget --no-verbose -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
|
wget --no-verbose -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user