Compare commits

...

14 Commits

Author SHA1 Message Date
Jiaming Yuan
e7decb9775
[R] release 1.5.0.2 (#7452)
* [R] release 1.5.0.2

* Add cmake list to r build ignore.
2021-11-19 21:39:38 +08:00
Jiaming Yuan
1920118bcb
[backport] [CI] Install igraph as binary. (#7417) (#7447) 2021-11-18 16:35:04 +08:00
Jiaming Yuan
2032547426
Fix R CRAN failures. (#7404) (#7441)
* Remove hist builder dtor.

* Initialize values.

* Tolerance.

* Remove the use of nthread in col maker.
2021-11-17 18:34:53 +08:00
Jiaming Yuan
e7ac2486eb
[backport] [R] Fix global feature importance and predict with 1 sample. (#7394) (#7397)
* [R] Fix global feature importance.

* Add implementation for tree index.  The parameter is not documented in C API since we
should work on porting the model slicing to R instead of supporting more use of tree
index.

* Fix the difference between "gain" and "total_gain".

* debug.

* Fix prediction.
2021-11-06 00:07:36 +08:00
Jiaming Yuan
a3d195e73e
Handle OMP_THREAD_LIMIT. (#7390) (#7391) 2021-11-03 20:25:51 +08:00
Jiaming Yuan
fab3c05ced
Move macos test to github action. (#7382) (#7392)
Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
2021-11-03 18:39:47 +08:00
Jiaming Yuan
584b45a9cc
Release 1.5.0. (#7317) 2021-10-15 12:21:04 +08:00
Jiaming Yuan
30c1b5c54c
[backport] Fix prediction with cat data in sklearn interface. (#7306) (#7312)
* Specify DMatrix parameter for pre-processing dataframe.
* Add document about the behaviour of prediction.
2021-10-12 18:49:57 +08:00
Jiaming Yuan
36e247aca4
Fix weighted samples in multi-class AUC. (#7300) (#7305) 2021-10-11 18:00:36 +08:00
Jiaming Yuan
c4aff733bb
[backport] Fix cv verbose_eval (#7291) (#7296) 2021-10-08 14:24:27 +08:00
Jiaming Yuan
cdbfd21d31
[backport] Fix gamma neg log likelihood. (#7275) (#7285) 2021-10-05 23:01:11 +08:00
Jiaming Yuan
508a0b0dbd
[backport] [R] Fix document for nthread. (#7263) (#7269) 2021-09-28 14:41:32 +08:00
Jiaming Yuan
e04e773f9f
Add RC1 tag for building packages. (#7261) 2021-09-28 11:50:18 +08:00
Jiaming Yuan
1debabb321
Change version to 1.5.0. (#7258) 2021-09-26 13:27:54 +08:00
50 changed files with 452 additions and 179 deletions

View File

@ -9,7 +9,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
os: [windows-latest, ubuntu-latest] os: [windows-latest, ubuntu-latest, macos-10.15]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2

View File

@ -51,7 +51,8 @@ jobs:
strategy: strategy:
matrix: matrix:
config: config:
- {os: windows-2016, compiler: 'msvc', python-version: '3.8'} - {os: windows-2016, python-version: '3.8'}
- {os: macos-10.15, python-version "3.8" }
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@ -71,15 +72,27 @@ jobs:
conda info conda info
conda list conda list
- name: Build XGBoost with msvc - name: Build XGBoost on Windows
shell: bash -l {0} shell: bash -l {0}
if: matrix.config.compiler == 'msvc' if: matrix.config.os == 'windows-2016'
run: | run: |
mkdir build_msvc mkdir build_msvc
cd build_msvc cd build_msvc
cmake .. -G"Visual Studio 15 2017" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON cmake .. -G"Visual Studio 15 2017" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON
cmake --build . --config Release --parallel $(nproc) cmake --build . --config Release --parallel $(nproc)
- name: Build XGBoost on macos
if: matrix.config.os == 'macos-10.15'
run: |
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
brew install ninja libomp
brew pin libomp
mkdir build
cd build
cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON
ninja
- name: Install Python package - name: Install Python package
shell: bash -l {0} shell: bash -l {0}
run: | run: |
@ -92,3 +105,21 @@ jobs:
shell: bash -l {0} shell: bash -l {0}
run: | run: |
pytest -s -v ./tests/python pytest -s -v ./tests/python
- name: Rename Python wheel
shell: bash -l {0}
if: matrix.config.os == 'macos-10.15'
run: |
TAG=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
python tests/ci_build/rename_whl.py python-package/dist/*.whl ${{ github.sha }} ${TAG}
- name: Upload Python wheel
shell: bash -l {0}
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
matrix.os == 'macos-latest'
run: |
python -m awscli s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}

View File

@ -3,7 +3,7 @@ name: XGBoost-R-Tests
on: [push, pull_request] on: [push, pull_request]
env: env:
R_PACKAGES: c('XML', 'igraph', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic') R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
jobs: jobs:
@ -40,6 +40,11 @@ jobs:
install.packages(${{ env.R_PACKAGES }}, install.packages(${{ env.R_PACKAGES }},
repos = 'http://cloud.r-project.org', repos = 'http://cloud.r-project.org',
dependencies = c('Depends', 'Imports', 'LinkingTo')) dependencies = c('Depends', 'Imports', 'LinkingTo'))
- name: Install igraph on Windows
shell: Rscript {0}
if: matrix.config.os == 'windows-latest'
run: |
install.packages('igraph', type='binary')
- name: Run lintr - name: Run lintr
run: | run: |
@ -83,6 +88,11 @@ jobs:
install.packages(${{ env.R_PACKAGES }}, install.packages(${{ env.R_PACKAGES }},
repos = 'http://cloud.r-project.org', repos = 'http://cloud.r-project.org',
dependencies = c('Depends', 'Imports', 'LinkingTo')) dependencies = c('Depends', 'Imports', 'LinkingTo'))
- name: Install igraph on Windows
shell: Rscript {0}
if: matrix.config.os == 'windows-2016'
run: |
install.packages('igraph', type='binary', dependencies = c('Depends', 'Imports', 'LinkingTo'))
- uses: actions/setup-python@v2 - uses: actions/setup-python@v2
with: with:
@ -91,7 +101,7 @@ jobs:
- name: Test R - name: Test R
run: | run: |
python tests/ci_build/test_r_package.py --compiler="${{ matrix.config.compiler }}" --build-tool="${{ matrix.config.build }}" python tests/ci_build/test_r_package.py --compiler='${{ matrix.config.compiler }}' --build-tool='${{ matrix.config.build }}'
test-R-CRAN: test-R-CRAN:
runs-on: ubuntu-latest runs-on: ubuntu-latest
@ -115,7 +125,7 @@ jobs:
- name: Install system packages - name: Install system packages
run: | run: |
sudo apt-get update && sudo apt-get install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev pandoc pandoc-citeproc sudo apt-get update && sudo apt-get install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev pandoc pandoc-citeproc libglpk-dev
- name: Cache R packages - name: Cache R packages
uses: actions/cache@v2 uses: actions/cache@v2
@ -130,6 +140,7 @@ jobs:
install.packages(${{ env.R_PACKAGES }}, install.packages(${{ env.R_PACKAGES }},
repos = 'http://cloud.r-project.org', repos = 'http://cloud.r-project.org',
dependencies = c('Depends', 'Imports', 'LinkingTo')) dependencies = c('Depends', 'Imports', 'LinkingTo'))
install.packages('igraph', repos = 'http://cloud.r-project.org', dependencies = c('Depends', 'Imports', 'LinkingTo'))
- name: Check R Package - name: Check R Package
run: | run: |

View File

@ -10,14 +10,6 @@ env:
jobs: jobs:
include: include:
- os: osx
arch: amd64
osx_image: xcode10.2
env: TASK=python_test
- os: osx
arch: amd64
osx_image: xcode10.2
env: TASK=java_test
- os: linux - os: linux
arch: s390x arch: s390x
env: TASK=s390x_test env: TASK=s390x_test
@ -33,8 +25,6 @@ addons:
before_install: before_install:
- source tests/travis/travis_setup_env.sh - source tests/travis/travis_setup_env.sh
- if [ "${TASK}" != "python_sdist_test" ]; then export PYTHONPATH=${PYTHONPATH}:${PWD}/python-package; fi
- echo "MAVEN_OPTS='-Xmx2g -XX:MaxPermSize=1024m -XX:ReservedCodeCacheSize=512m -Dorg.slf4j.simpleLogger.defaultLogLevel=error'" > ~/.mavenrc
install: install:
- source tests/travis/setup.sh - source tests/travis/setup.sh

View File

@ -4,3 +4,4 @@
^.*\.Rproj$ ^.*\.Rproj$
^\.Rproj\.user$ ^\.Rproj\.user$
README.md README.md
CMakeLists.txt

View File

@ -1,8 +1,8 @@
Package: xgboost Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 1.5.0.1 Version: 1.5.0.2
Date: 2020-08-28 Date: 2021-11-19
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),

View File

@ -397,6 +397,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
shape <- predts$shape shape <- predts$shape
ret <- predts$results ret <- predts$results
n_ret <- length(ret)
n_row <- nrow(newdata) n_row <- nrow(newdata)
if (n_row != shape[1]) { if (n_row != shape[1]) {
stop("Incorrect predict shape.") stop("Incorrect predict shape.")
@ -405,36 +406,55 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
arr <- array(data = ret, dim = rev(shape)) arr <- array(data = ret, dim = rev(shape))
cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "BIAS") else NULL cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "BIAS") else NULL
n_groups <- shape[2]
## Needed regardless of whether strict shape is being used.
if (predcontrib) { if (predcontrib) {
dimnames(arr) <- list(cnames, NULL, NULL) dimnames(arr) <- list(cnames, NULL, NULL)
if (!strict_shape) {
arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
}
} else if (predinteraction) { } else if (predinteraction) {
dimnames(arr) <- list(cnames, cnames, NULL, NULL) dimnames(arr) <- list(cnames, cnames, NULL, NULL)
if (!strict_shape) {
arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
} }
if (strict_shape) {
return(arr) # strict shape is calculated by libxgboost uniformly.
} }
if (!strict_shape) {
n_groups <- shape[2]
if (predleaf) { if (predleaf) {
arr <- matrix(arr, nrow = n_row, byrow = TRUE) ## Predict leaf
} else if (predcontrib && n_groups != 1) { arr <- if (n_ret == n_row) {
arr <- lapply(seq_len(n_groups), function(g) arr[g, , ]) matrix(arr, ncol = 1)
} else if (predinteraction && n_groups != 1) { } else {
arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ]) matrix(arr, nrow = n_row, byrow = TRUE)
} else if (!reshape && n_groups != 1) {
arr <- ret
} else if (reshape && n_groups != 1) {
arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
} }
arr <- drop(arr) } else if (predcontrib) {
if (length(dim(arr)) == 1) { ## Predict contribution
arr <- as.vector(arr) arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
} else if (length(dim(arr)) == 2) { arr <- if (n_ret == n_row) {
arr <- as.matrix(arr) matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
} else if (n_groups != 1) {
## turns array into list of matrices
lapply(seq_len(n_groups), function(g) arr[g, , ])
} else {
## remove the first axis (group)
as.matrix(arr[1, , ])
}
} else if (predinteraction) {
## Predict interaction
arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
arr <- if (n_ret == n_row) {
matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
} else if (n_groups != 1) {
## turns array into list of matrices
lapply(seq_len(n_groups), function(g) arr[g, , , ])
} else {
## remove the first axis (group)
arr[1, , , ]
}
} else {
## Normal prediction
arr <- if (reshape && n_groups != 1) {
matrix(arr, ncol = n_groups, byrow = TRUE)
} else {
as.vector(ret)
} }
} }
return(arr) return(arr)

View File

@ -11,6 +11,7 @@
#' @param missing a float value to represents missing values in data (used only when input is a dense matrix). #' @param missing a float value to represents missing values in data (used only when input is a dense matrix).
#' It is useful when a 0 or some other extreme value represents missing values in data. #' It is useful when a 0 or some other extreme value represents missing values in data.
#' @param silent whether to suppress printing an informational message after loading from a file. #' @param silent whether to suppress printing an informational message after loading from a file.
#' @param nthread Number of threads used for creating DMatrix.
#' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list. #' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list.
#' #'
#' @examples #' @examples

View File

@ -115,14 +115,14 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
} else { } else {
concatenated <- list() concatenated <- list()
output_names <- vector() output_names <- vector()
for (importance_type in c("weight", "gain", "cover")) { for (importance_type in c("weight", "total_gain", "total_cover")) {
args <- list(importance_type = importance_type, feature_names = feature_names) args <- list(importance_type = importance_type, feature_names = feature_names, tree_idx = trees)
results <- .Call( results <- .Call(
XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null") XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
) )
names(results) <- c("features", "shape", importance_type) names(results) <- c("features", "shape", importance_type)
concatenated[ concatenated[
switch(importance_type, "weight" = "Frequency", "gain" = "Gain", "cover" = "Cover") switch(importance_type, "weight" = "Frequency", "total_gain" = "Gain", "total_cover" = "Cover")
] <- results[importance_type] ] <- results[importance_type]
output_names <- results$features output_names <- results$features
} }

View File

@ -9,8 +9,8 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
early_stopping_rounds = NULL, maximize = NULL, early_stopping_rounds = NULL, maximize = NULL,
save_period = NULL, save_name = "xgboost.model", save_period = NULL, save_name = "xgboost.model",
xgb_model = NULL, callbacks = list(), ...) { xgb_model = NULL, callbacks = list(), ...) {
merged <- check.booster.params(params, ...)
dtrain <- xgb.get.DMatrix(data, label, missing, weight, nthread = params$nthread) dtrain <- xgb.get.DMatrix(data, label, missing, weight, nthread = merged$nthread)
watchlist <- list(train = dtrain) watchlist <- list(train = dtrain)

View File

@ -4,7 +4,14 @@
\alias{xgb.DMatrix} \alias{xgb.DMatrix}
\title{Construct xgb.DMatrix object} \title{Construct xgb.DMatrix object}
\usage{ \usage{
xgb.DMatrix(data, info = list(), missing = NA, silent = FALSE, ...) xgb.DMatrix(
data,
info = list(),
missing = NA,
silent = FALSE,
nthread = NULL,
...
)
} }
\arguments{ \arguments{
\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character \item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
@ -18,6 +25,8 @@ It is useful when a 0 or some other extreme value represents missing values in d
\item{silent}{whether to suppress printing an informational message after loading from a file.} \item{silent}{whether to suppress printing an informational message after loading from a file.}
\item{nthread}{Number of threads used for creating DMatrix.}
\item{...}{the \code{info} data could be passed directly as parameters, without creating an \code{info} list.} \item{...}{the \code{info} data could be passed directly as parameters, without creating an \code{info} list.}
} }
\description{ \description{

View File

@ -1,3 +1,4 @@
library(testthat)
context('Test helper functions') context('Test helper functions')
require(xgboost) require(xgboost)
@ -227,7 +228,7 @@ if (grepl('Windows', Sys.info()[['sysname']]) ||
X <- 10^runif(100, -20, 20) X <- 10^runif(100, -20, 20)
if (capabilities('long.double')) { if (capabilities('long.double')) {
X2X <- as.numeric(format(X, digits = 17)) X2X <- as.numeric(format(X, digits = 17))
expect_identical(X, X2X) expect_equal(X, X2X, tolerance = float_tolerance)
} }
# retrieved attributes to be the same as written # retrieved attributes to be the same as written
for (x in X) { for (x in X) {
@ -310,7 +311,35 @@ test_that("xgb.importance works with and without feature names", {
# for multiclass # for multiclass
imp.Tree <- xgb.importance(model = mbst.Tree) imp.Tree <- xgb.importance(model = mbst.Tree)
expect_equal(dim(imp.Tree), c(4, 4)) expect_equal(dim(imp.Tree), c(4, 4))
xgb.importance(model = mbst.Tree, trees = seq(from = 0, by = nclass, length.out = nrounds))
trees <- seq(from = 0, by = 2, length.out = 2)
importance <- xgb.importance(feature_names = feature.names, model = bst.Tree, trees = trees)
importance_from_dump <- function() {
model_text_dump <- xgb.dump(model = bst.Tree, with_stats = TRUE, trees = trees)
imp <- xgb.model.dt.tree(
feature_names = feature.names,
text = model_text_dump,
trees = trees
)[
Feature != "Leaf", .(
Gain = sum(Quality),
Cover = sum(Cover),
Frequency = .N
),
by = Feature
][
, `:=`(
Gain = Gain / sum(Gain),
Cover = Cover / sum(Cover),
Frequency = Frequency / sum(Frequency)
)
][
order(Gain, decreasing = TRUE)
]
imp
}
expect_equal(importance_from_dump(), importance, tolerance = 1e-6)
}) })
test_that("xgb.importance works with GLM model", { test_that("xgb.importance works with GLM model", {

View File

@ -1 +1 @@
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-dev @xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@

View File

@ -18,7 +18,7 @@ Making a Release
1. Create an issue for the release, noting the estimated date and expected features or major fixes, pin that issue. 1. Create an issue for the release, noting the estimated date and expected features or major fixes, pin that issue.
2. Bump release version. 2. Bump release version.
1. Modify ``CMakeLists.txt`` source tree, run CMake. 1. Modify ``CMakeLists.txt`` in source tree and ``cmake/Python_version.in`` if needed, run CMake.
2. Modify ``DESCRIPTION`` in R-package. 2. Modify ``DESCRIPTION`` in R-package.
3. Run ``change_version.sh`` in ``jvm-packages/dev`` 3. Run ``change_version.sh`` in ``jvm-packages/dev``
3. Commit the change, create a PR on GitHub on release branch. Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``. 3. Commit the change, create a PR on GitHub on release branch. Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.

View File

@ -32,8 +32,8 @@ After 1.4 release, we added a new parameter called ``strict_shape``, one can set
- When using ``output_margin`` to avoid transformation and ``strict_shape`` is set to ``True``: - When using ``output_margin`` to avoid transformation and ``strict_shape`` is set to ``True``:
Similar to the previous case, output is a 2-dim array, except for that ``multi:softmax`` Similar to the previous case, output is a 2-dim array, except for that ``multi:softmax``
has equivalent output of ``multi:softprob`` due to dropped transformation. If strict has equivalent output shape of ``multi:softprob`` due to dropped transformation. If
shape is set to False then output can have 1 or 2 dim depending on used model. strict shape is set to False then output can have 1 or 2 dim depending on used model.
- When using ``preds_contribs`` with ``strict_shape`` set to ``True``: - When using ``preds_contribs`` with ``strict_shape`` set to ``True``:

View File

@ -211,7 +211,7 @@ struct Entry {
*/ */
struct BatchParam { struct BatchParam {
/*! \brief The GPU device to use. */ /*! \brief The GPU device to use. */
int gpu_id; int gpu_id {-1};
/*! \brief Maximum number of bins per feature for histograms. */ /*! \brief Maximum number of bins per feature for histograms. */
int max_bin{0}; int max_bin{0};
/*! \brief Hessian, used for sketching with future approx implementation. */ /*! \brief Hessian, used for sketching with future approx implementation. */

View File

@ -182,9 +182,10 @@ class GradientBooster : public Model, public Configurable {
bool with_stats, bool with_stats,
std::string format) const = 0; std::string format) const = 0;
virtual void FeatureScore(std::string const &importance_type, virtual void FeatureScore(std::string const& importance_type,
std::vector<bst_feature_t> *features, common::Span<int32_t const> trees,
std::vector<float> *scores) const = 0; std::vector<bst_feature_t>* features,
std::vector<float>* scores) const = 0;
/*! /*!
* \brief Whether the current booster uses GPU. * \brief Whether the current booster uses GPU.
*/ */

View File

@ -155,9 +155,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
/*! /*!
* \brief Calculate feature score. See doc in C API for outputs. * \brief Calculate feature score. See doc in C API for outputs.
*/ */
virtual void CalcFeatureScore(std::string const &importance_type, virtual void CalcFeatureScore(std::string const& importance_type,
std::vector<bst_feature_t> *features, common::Span<int32_t const> trees,
std::vector<float> *scores) = 0; std::vector<bst_feature_t>* features,
std::vector<float>* scores) = 0;
/* /*
* \brief Get number of boosted rounds from gradient booster. * \brief Get number of boosted rounds from gradient booster.

View File

@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>

View File

@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</parent> </parent>
<artifactId>xgboost4j-example_2.12</artifactId> <artifactId>xgboost4j-example_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>
@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
@ -37,7 +37,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</parent> </parent>
<artifactId>xgboost4j-flink_2.12</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</parent> </parent>
<artifactId>xgboost4j-gpu_2.12</artifactId> <artifactId>xgboost4j-gpu_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<properties> <properties>

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</parent> </parent>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId> <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build> <build>
@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</parent> </parent>
<artifactId>xgboost4j-spark_2.12</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<build> <build>
@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
</parent> </parent>
<artifactId>xgboost4j_2.12</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>1.5.0-SNAPSHOT</version> <version>1.5.0</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@ -1 +1 @@
1.5.0-dev 1.5.0

View File

@ -174,7 +174,9 @@ __model_doc = f'''
Device ordinal. Device ordinal.
validate_parameters : Optional[bool] validate_parameters : Optional[bool]
Give warnings for unknown parameter. Give warnings for unknown parameter.
predictor : Optional[str]
Force XGBoost to use specific predictor, available choices are [cpu_predictor,
gpu_predictor].
enable_categorical : bool enable_categorical : bool
.. versionadded:: 1.5.0 .. versionadded:: 1.5.0
@ -807,7 +809,11 @@ class XGBModel(XGBModelBase):
# Inplace predict doesn't handle as many data types as DMatrix, but it's # Inplace predict doesn't handle as many data types as DMatrix, but it's
# sufficient for dask interface where input is simpiler. # sufficient for dask interface where input is simpiler.
predictor = self.get_params().get("predictor", None) predictor = self.get_params().get("predictor", None)
if predictor in ("auto", None) and self.booster != "gblinear": if (
not self.enable_categorical
and predictor in ("auto", None)
and self.booster != "gblinear"
):
return True return True
return False return False
@ -834,7 +840,9 @@ class XGBModel(XGBModelBase):
iteration_range: Optional[Tuple[int, int]] = None, iteration_range: Optional[Tuple[int, int]] = None,
) -> np.ndarray: ) -> np.ndarray:
"""Predict with `X`. If the model is trained with early stopping, then `best_iteration` """Predict with `X`. If the model is trained with early stopping, then `best_iteration`
is used automatically. is used automatically. For tree models, when data is on GPU, like cupy array or
cuDF dataframe and `predictor` is not specified, the prediction is run on GPU
automatically, otherwise it will run on CPU.
.. note:: This function is only thread safe for `gbtree` and `dart`. .. note:: This function is only thread safe for `gbtree` and `dart`.
@ -862,6 +870,7 @@ class XGBModel(XGBModelBase):
Returns Returns
------- -------
prediction prediction
""" """
iteration_range = _convert_ntree_limit( iteration_range = _convert_ntree_limit(
self.get_booster(), ntree_limit, iteration_range self.get_booster(), ntree_limit, iteration_range
@ -886,7 +895,10 @@ class XGBModel(XGBModelBase):
pass pass
test = DMatrix( test = DMatrix(
X, base_margin=base_margin, missing=self.missing, nthread=self.n_jobs X, base_margin=base_margin,
missing=self.missing,
nthread=self.n_jobs,
enable_categorical=self.enable_categorical
) )
return self.get_booster().predict( return self.get_booster().predict(
data=test, data=test,

View File

@ -472,13 +472,15 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
if is_new_callback: if is_new_callback:
assert all(isinstance(c, callback.TrainingCallback) assert all(isinstance(c, callback.TrainingCallback)
for c in callbacks), "You can't mix new and old callback styles." for c in callbacks), "You can't mix new and old callback styles."
if isinstance(verbose_eval, bool) and verbose_eval: if verbose_eval:
verbose_eval = 1 if verbose_eval is True else verbose_eval verbose_eval = 1 if verbose_eval is True else verbose_eval
callbacks.append(callback.EvaluationMonitor(period=verbose_eval, callbacks.append(
show_stdv=show_stdv)) callback.EvaluationMonitor(period=verbose_eval, show_stdv=show_stdv)
)
if early_stopping_rounds: if early_stopping_rounds:
callbacks.append(callback.EarlyStopping( callbacks.append(
rounds=early_stopping_rounds, maximize=maximize)) callback.EarlyStopping(rounds=early_stopping_rounds, maximize=maximize)
)
callbacks = callback.CallbackContainer(callbacks, metric=feval, is_cv=True) callbacks = callback.CallbackContainer(callbacks, metric=feval, is_cv=True)
else: else:
callbacks = _configure_deprecated_callbacks( callbacks = _configure_deprecated_callbacks(

View File

@ -1159,9 +1159,17 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *json_config,
custom_feature_names = get<Array const>(config["feature_names"]); custom_feature_names = get<Array const>(config["feature_names"]);
} }
auto& scores = learner->GetThreadLocal().ret_vec_float; std::vector<int32_t> tree_idx;
if (!IsA<Null>(config["tree_idx"])) {
auto j_tree_idx = get<Array const>(config["tree_idx"]);
for (auto const &idx : j_tree_idx) {
tree_idx.push_back(get<Integer const>(idx));
}
}
auto &scores = learner->GetThreadLocal().ret_vec_float;
std::vector<bst_feature_t> features; std::vector<bst_feature_t> features;
learner->CalcFeatureScore(importance, &features, &scores); learner->CalcFeatureScore(importance, common::Span<int32_t const>(tree_idx), &features, &scores);
auto n_features = learner->GetNumFeature(); auto n_features = learner->GetNumFeature();
GenerateFeatureMap(learner, custom_feature_names, n_features, &feature_map); GenerateFeatureMap(learner, custom_feature_names, n_features, &feature_map);

View File

@ -7,11 +7,28 @@
#define XGBOOST_COMMON_THREADING_UTILS_H_ #define XGBOOST_COMMON_THREADING_UTILS_H_
#include <dmlc/common.h> #include <dmlc/common.h>
#include <vector> #include <dmlc/omp.h>
#include <algorithm> #include <algorithm>
#include <limits>
#include <type_traits> // std::is_signed #include <type_traits> // std::is_signed
#include <vector>
#include "xgboost/logging.h" #include "xgboost/logging.h"
#if !defined(_OPENMP)
extern "C" {
inline int32_t omp_get_thread_limit() __GOMP_NOTHROW { return 1; } // NOLINT
}
#endif // !defined(_OPENMP)
// MSVC doesn't implement the thread limit.
#if defined(_OPENMP) && defined(_MSC_VER)
extern "C" {
inline int32_t omp_get_thread_limit() { return std::numeric_limits<int32_t>::max(); } // NOLINT
}
#endif // defined(_MSC_VER)
namespace xgboost { namespace xgboost {
namespace common { namespace common {
@ -153,7 +170,7 @@ struct Sched {
}; };
template <typename Index, typename Func> template <typename Index, typename Func>
void ParallelFor(Index size, size_t n_threads, Sched sched, Func fn) { void ParallelFor(Index size, int32_t n_threads, Sched sched, Func fn) {
#if defined(_MSC_VER) #if defined(_MSC_VER)
// msvc doesn't support unsigned integer as openmp index. // msvc doesn't support unsigned integer as openmp index.
using OmpInd = std::conditional_t<std::is_signed<Index>::value, Index, omp_ulong>; using OmpInd = std::conditional_t<std::is_signed<Index>::value, Index, omp_ulong>;
@ -220,6 +237,13 @@ void ParallelFor(Index size, size_t n_threads, Func fn) {
template <typename Index, typename Func> template <typename Index, typename Func>
void ParallelFor(Index size, Func fn) { void ParallelFor(Index size, Func fn) {
ParallelFor(size, omp_get_max_threads(), Sched::Static(), fn); ParallelFor(size, omp_get_max_threads(), Sched::Static(), fn);
} // !defined(_OPENMP)
inline int32_t OmpGetThreadLimit() {
int32_t limit = omp_get_thread_limit();
CHECK_GE(limit, 1) << "Invalid thread limit for OpenMP.";
return limit;
} }
/* \brief Configure parallel threads. /* \brief Configure parallel threads.
@ -235,15 +259,18 @@ inline int32_t OmpSetNumThreads(int32_t* p_threads) {
if (threads <= 0) { if (threads <= 0) {
threads = omp_get_num_procs(); threads = omp_get_num_procs();
} }
threads = std::min(threads, OmpGetThreadLimit());
omp_set_num_threads(threads); omp_set_num_threads(threads);
return nthread_original; return nthread_original;
} }
inline int32_t OmpSetNumThreadsWithoutHT(int32_t* p_threads) { inline int32_t OmpSetNumThreadsWithoutHT(int32_t* p_threads) {
auto& threads = *p_threads; auto& threads = *p_threads;
int32_t nthread_original = omp_get_max_threads(); int32_t nthread_original = omp_get_max_threads();
if (threads <= 0) { if (threads <= 0) {
threads = nthread_original; threads = nthread_original;
} }
threads = std::min(threads, OmpGetThreadLimit());
omp_set_num_threads(threads); omp_set_num_threads(threads);
return nthread_original; return nthread_original;
} }
@ -252,6 +279,7 @@ inline int32_t OmpGetNumThreads(int32_t n_threads) {
if (n_threads <= 0) { if (n_threads <= 0) {
n_threads = omp_get_num_procs(); n_threads = omp_get_num_procs();
} }
n_threads = std::min(n_threads, OmpGetThreadLimit());
return n_threads; return n_threads;
} }
} // namespace common } // namespace common

View File

@ -49,10 +49,10 @@ class SimpleDMatrix : public DMatrix {
MetaInfo info_; MetaInfo info_;
// Primary storage type // Primary storage type
std::shared_ptr<SparsePage> sparse_page_ = std::make_shared<SparsePage>(); std::shared_ptr<SparsePage> sparse_page_ = std::make_shared<SparsePage>();
std::shared_ptr<CSCPage> column_page_; std::shared_ptr<CSCPage> column_page_{nullptr};
std::shared_ptr<SortedCSCPage> sorted_column_page_; std::shared_ptr<SortedCSCPage> sorted_column_page_{nullptr};
std::shared_ptr<EllpackPage> ellpack_page_; std::shared_ptr<EllpackPage> ellpack_page_{nullptr};
std::shared_ptr<GHistIndexMatrix> gradient_index_; std::shared_ptr<GHistIndexMatrix> gradient_index_{nullptr};
BatchParam batch_param_; BatchParam batch_param_;
bool EllpackExists() const override { bool EllpackExists() const override {

View File

@ -232,9 +232,11 @@ class GBLinear : public GradientBooster {
} }
void FeatureScore(std::string const &importance_type, void FeatureScore(std::string const &importance_type,
common::Span<int32_t const> trees,
std::vector<bst_feature_t> *out_features, std::vector<bst_feature_t> *out_features,
std::vector<float> *out_scores) const override { std::vector<float> *out_scores) const override {
CHECK(!model_.weight.empty()) << "Model is not initialized"; CHECK(!model_.weight.empty()) << "Model is not initialized";
CHECK(trees.empty()) << "gblinear doesn't support number of trees for feature importance.";
CHECK_EQ(importance_type, "weight") CHECK_EQ(importance_type, "weight")
<< "gblinear only has `weight` defined for feature importance."; << "gblinear only has `weight` defined for feature importance.";
out_features->resize(this->learner_model_param_->num_feature, 0); out_features->resize(this->learner_model_param_->num_feature, 0);

View File

@ -300,18 +300,28 @@ class GBTree : public GradientBooster {
} }
} }
void FeatureScore(std::string const &importance_type, void FeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
std::vector<bst_feature_t> *features, std::vector<bst_feature_t>* features,
std::vector<float> *scores) const override { std::vector<float>* scores) const override {
// Because feature with no importance doesn't appear in the return value so // Because feature with no importance doesn't appear in the return value so
// we need to set up another pair of vectors to store the values during // we need to set up another pair of vectors to store the values during
// computation. // computation.
std::vector<size_t> split_counts(this->model_.learner_model_param->num_feature, 0); std::vector<size_t> split_counts(this->model_.learner_model_param->num_feature, 0);
std::vector<float> gain_map(this->model_.learner_model_param->num_feature, 0); std::vector<float> gain_map(this->model_.learner_model_param->num_feature, 0);
std::vector<int32_t> tree_idx;
if (trees.empty()) {
tree_idx.resize(this->model_.trees.size());
std::iota(tree_idx.begin(), tree_idx.end(), 0);
trees = common::Span<int32_t const>(tree_idx);
}
auto total_n_trees = model_.trees.size();
auto add_score = [&](auto fn) { auto add_score = [&](auto fn) {
for (auto const &p_tree : model_.trees) { for (auto idx : trees) {
CHECK_LE(idx, total_n_trees) << "Invalid tree index.";
auto const& p_tree = model_.trees[idx];
p_tree->WalkTree([&](bst_node_t nidx) { p_tree->WalkTree([&](bst_node_t nidx) {
auto const &node = (*p_tree)[nidx]; auto const& node = (*p_tree)[nidx];
if (!node.IsLeaf()) { if (!node.IsLeaf()) {
split_counts[node.SplitIndex()]++; split_counts[node.SplitIndex()]++;
fn(p_tree, nidx, node.SplitIndex()); fn(p_tree, nidx, node.SplitIndex());

View File

@ -1214,11 +1214,10 @@ class LearnerImpl : public LearnerIO {
*out_preds = &out_predictions.predictions; *out_preds = &out_predictions.predictions;
} }
void CalcFeatureScore(std::string const &importance_type, void CalcFeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
std::vector<bst_feature_t> *features, std::vector<bst_feature_t>* features, std::vector<float>* scores) override {
std::vector<float> *scores) override {
this->Configure(); this->Configure();
gbm_->FeatureScore(importance_type, features, scores); gbm_->FeatureScore(importance_type, trees, features, scores);
} }
const std::map<std::string, std::string>& GetConfigurationArguments() const override { const std::map<std::string, std::string>& GetConfigurationArguments() const override {

View File

@ -291,7 +291,7 @@ float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info
// labels is a vector of size n_samples. // labels is a vector of size n_samples.
float label = labels[idx % n_samples] == class_id; float label = labels[idx % n_samples] == class_id;
float w = get_weight(i % n_samples); float w = weights.empty() ? 1.0f : weights[d_sorted_idx[i] % n_samples];
float fp = (1.0 - label) * w; float fp = (1.0 - label) * w;
float tp = label * w; float tp = label * w;
return thrust::make_pair(fp, tp); return thrust::make_pair(fp, tp);

View File

@ -309,9 +309,8 @@ struct EvalGammaNLogLik {
float constexpr kPsi = 1.0; float constexpr kPsi = 1.0;
bst_float theta = -1. / py; bst_float theta = -1. / py;
bst_float a = kPsi; bst_float a = kPsi;
// b = -std::log(-theta); float b = -std::log(-theta);
float b = 1.0f; // c = 1. / kPsi^2 * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
// c = 1. / kPsi * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
// = 1.0f * std::log(y) - std::log(y) - 0 = 0 // = 1.0f * std::log(y) - std::log(y) - 0 = 0
float c = 0; float c = 0;
// general form for exponential family. // general form for exponential family.

View File

@ -109,10 +109,9 @@ class ColMaker: public TreeUpdater {
interaction_constraints_.Configure(param_, dmat->Info().num_row_); interaction_constraints_.Configure(param_, dmat->Info().num_row_);
// build tree // build tree
for (auto tree : trees) { for (auto tree : trees) {
Builder builder( CHECK(tparam_);
param_, Builder builder(param_, colmaker_param_, interaction_constraints_, tparam_,
colmaker_param_, column_densities_);
interaction_constraints_, column_densities_);
builder.Update(gpair->ConstHostVector(), dmat, tree); builder.Update(gpair->ConstHostVector(), dmat, tree);
} }
param_.learning_rate = lr; param_.learning_rate = lr;
@ -154,12 +153,12 @@ class ColMaker: public TreeUpdater {
class Builder { class Builder {
public: public:
// constructor // constructor
explicit Builder(const TrainParam& param, explicit Builder(const TrainParam &param, const ColMakerTrainParam &colmaker_train_param,
const ColMakerTrainParam& colmaker_train_param,
FeatureInteractionConstraintHost _interaction_constraints, FeatureInteractionConstraintHost _interaction_constraints,
const std::vector<float> &column_densities) GenericParameter const *ctx, const std::vector<float> &column_densities)
: param_(param), colmaker_train_param_{colmaker_train_param}, : param_(param),
nthread_(omp_get_max_threads()), colmaker_train_param_{colmaker_train_param},
ctx_{ctx},
tree_evaluator_(param_, column_densities.size(), GenericParameter::kCpuId), tree_evaluator_(param_, column_densities.size(), GenericParameter::kCpuId),
interaction_constraints_{std::move(_interaction_constraints)}, interaction_constraints_{std::move(_interaction_constraints)},
column_densities_(column_densities) {} column_densities_(column_densities) {}
@ -238,7 +237,7 @@ class ColMaker: public TreeUpdater {
// setup temp space for each thread // setup temp space for each thread
// reserve a small space // reserve a small space
stemp_.clear(); stemp_.clear();
stemp_.resize(this->nthread_, std::vector<ThreadEntry>()); stemp_.resize(this->ctx_->Threads(), std::vector<ThreadEntry>());
for (auto& i : stemp_) { for (auto& i : stemp_) {
i.clear(); i.reserve(256); i.clear(); i.reserve(256);
} }
@ -451,8 +450,9 @@ class ColMaker: public TreeUpdater {
// start enumeration // start enumeration
const auto num_features = static_cast<bst_omp_uint>(feat_set.size()); const auto num_features = static_cast<bst_omp_uint>(feat_set.size());
#if defined(_OPENMP) #if defined(_OPENMP)
CHECK(this->ctx_);
const int batch_size = // NOLINT const int batch_size = // NOLINT
std::max(static_cast<int>(num_features / this->nthread_ / 32), 1); std::max(static_cast<int>(num_features / this->ctx_->Threads() / 32), 1);
#endif // defined(_OPENMP) #endif // defined(_OPENMP)
{ {
auto page = batch.GetView(); auto page = batch.GetView();
@ -553,7 +553,8 @@ class ColMaker: public TreeUpdater {
virtual void SyncBestSolution(const std::vector<int> &qexpand) { virtual void SyncBestSolution(const std::vector<int> &qexpand) {
for (int nid : qexpand) { for (int nid : qexpand) {
NodeEntry &e = snode_[nid]; NodeEntry &e = snode_[nid];
for (int tid = 0; tid < this->nthread_; ++tid) { CHECK(this->ctx_);
for (int tid = 0; tid < this->ctx_->Threads(); ++tid) {
e.best.Update(stemp_[tid][nid].best); e.best.Update(stemp_[tid][nid].best);
} }
} }
@ -609,7 +610,7 @@ class ColMaker: public TreeUpdater {
const TrainParam& param_; const TrainParam& param_;
const ColMakerTrainParam& colmaker_train_param_; const ColMakerTrainParam& colmaker_train_param_;
// number of omp thread used during training // number of omp thread used during training
const int nthread_; GenericParameter const* ctx_;
common::ColumnSampler column_sampler_; common::ColumnSampler column_sampler_;
// Instance Data: current node position in the tree of each instance // Instance Data: current node position in the tree of each instance
std::vector<int> position_; std::vector<int> position_;

View File

@ -115,9 +115,6 @@ bool QuantileHistMaker::UpdatePredictionCache(
} }
} }
template <typename GradientSumT>
QuantileHistMaker::Builder<GradientSumT>::~Builder() = default;
template <typename GradientSumT> template <typename GradientSumT>
template <bool any_missing> template <bool any_missing>

View File

@ -204,7 +204,6 @@ class QuantileHistMaker: public TreeUpdater {
new HistogramBuilder<GradientSumT, CPUExpandEntry>} { new HistogramBuilder<GradientSumT, CPUExpandEntry>} {
builder_monitor_.Init("Quantile::Builder"); builder_monitor_.Init("Quantile::Builder");
} }
~Builder();
// update one tree, growing // update one tree, growing
virtual void Update(const GHistIndexMatrix& gmat, virtual void Update(const GHistIndexMatrix& gmat,
const ColumnMatrix& column_matrix, const ColumnMatrix& column_matrix,

View File

@ -430,7 +430,7 @@ TEST(GBTree, FeatureScore) {
std::vector<bst_feature_t> features_weight; std::vector<bst_feature_t> features_weight;
std::vector<float> scores_weight; std::vector<float> scores_weight;
learner->CalcFeatureScore("weight", &features_weight, &scores_weight); learner->CalcFeatureScore("weight", {}, &features_weight, &scores_weight);
ASSERT_EQ(features_weight.size(), scores_weight.size()); ASSERT_EQ(features_weight.size(), scores_weight.size());
ASSERT_LE(features_weight.size(), learner->GetNumFeature()); ASSERT_LE(features_weight.size(), learner->GetNumFeature());
ASSERT_TRUE(std::is_sorted(features_weight.begin(), features_weight.end())); ASSERT_TRUE(std::is_sorted(features_weight.begin(), features_weight.end()));
@ -438,11 +438,11 @@ TEST(GBTree, FeatureScore) {
auto test_eq = [&learner, &scores_weight](std::string type) { auto test_eq = [&learner, &scores_weight](std::string type) {
std::vector<bst_feature_t> features; std::vector<bst_feature_t> features;
std::vector<float> scores; std::vector<float> scores;
learner->CalcFeatureScore(type, &features, &scores); learner->CalcFeatureScore(type, {}, &features, &scores);
std::vector<bst_feature_t> features_total; std::vector<bst_feature_t> features_total;
std::vector<float> scores_total; std::vector<float> scores_total;
learner->CalcFeatureScore("total_" + type, &features_total, &scores_total); learner->CalcFeatureScore("total_" + type, {}, &features_total, &scores_total);
for (size_t i = 0; i < scores_weight.size(); ++i) { for (size_t i = 0; i < scores_weight.size(); ++i) {
ASSERT_LE(RelError(scores_total[i] / scores[i], scores_weight[i]), kRtEps); ASSERT_LE(RelError(scores_total[i] / scores[i], scores_weight[i]), kRtEps);

View File

@ -143,7 +143,7 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
} }
xgboost::bst_float GetMetricEval(xgboost::Metric * metric, xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
xgboost::HostDeviceVector<xgboost::bst_float> preds, xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
std::vector<xgboost::bst_float> labels, std::vector<xgboost::bst_float> labels,
std::vector<xgboost::bst_float> weights, std::vector<xgboost::bst_float> weights,
std::vector<xgboost::bst_uint> groups) { std::vector<xgboost::bst_uint> groups) {

View File

@ -86,7 +86,7 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
xgboost::bst_float GetMetricEval( xgboost::bst_float GetMetricEval(
xgboost::Metric * metric, xgboost::Metric * metric,
xgboost::HostDeviceVector<xgboost::bst_float> preds, xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
std::vector<xgboost::bst_float> labels, std::vector<xgboost::bst_float> labels,
std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(), std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(),
std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>()); std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>());

View File

@ -90,6 +90,16 @@ TEST(Metric, DeclareUnifiedTest(MultiAUC)) {
}, },
{0, 1, 1}); // no class 2. {0, 1, 1}); // no class 2.
EXPECT_TRUE(std::isnan(auc)) << auc; EXPECT_TRUE(std::isnan(auc)) << auc;
HostDeviceVector<float> predts{
0.0f, 1.0f, 0.0f,
1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f,
0.0f, 0.0f, 1.0f,
};
std::vector<float> labels {1.0f, 0.0f, 2.0f, 1.0f};
auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f});
ASSERT_GT(auc, 0.714);
} }
TEST(Metric, DeclareUnifiedTest(RankingAUC)) { TEST(Metric, DeclareUnifiedTest(RankingAUC)) {

View File

@ -13,9 +13,11 @@ class TestGPUEvalMetrics:
def test_roc_auc_binary(self, n_samples): def test_roc_auc_binary(self, n_samples):
self.cpu_test.run_roc_auc_binary("gpu_hist", n_samples) self.cpu_test.run_roc_auc_binary("gpu_hist", n_samples)
@pytest.mark.parametrize("n_samples", [4, 100, 1000]) @pytest.mark.parametrize(
def test_roc_auc_multi(self, n_samples): "n_samples,weighted", [(4, False), (100, False), (1000, False), (1000, True)]
self.cpu_test.run_roc_auc_multi("gpu_hist", n_samples) )
def test_roc_auc_multi(self, n_samples, weighted):
self.cpu_test.run_roc_auc_multi("gpu_hist", n_samples, weighted)
@pytest.mark.parametrize("n_samples", [4, 100, 1000]) @pytest.mark.parametrize("n_samples", [4, 100, 1000])
def test_roc_auc_ltr(self, n_samples): def test_roc_auc_ltr(self, n_samples):

View File

@ -59,6 +59,7 @@ def test_categorical():
) )
X = pd.DataFrame(X.todense()).astype("category") X = pd.DataFrame(X.todense()).astype("category")
clf.fit(X, y) clf.fit(X, y)
assert not clf._can_use_inplace_predict()
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
model = os.path.join(tempdir, "categorial.json") model = os.path.join(tempdir, "categorial.json")

View File

@ -1,3 +1,4 @@
from typing import Union
import xgboost as xgb import xgboost as xgb
import pytest import pytest
import os import os
@ -22,17 +23,14 @@ class TestCallbacks:
cls.X_valid = X[split:, ...] cls.X_valid = X[split:, ...]
cls.y_valid = y[split:, ...] cls.y_valid = y[split:, ...]
def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval): def run_evaluation_monitor(
evals_result = {} self,
with tm.captured_output() as (out, err): D_train: xgb.DMatrix,
xgb.train({'objective': 'binary:logistic', D_valid: xgb.DMatrix,
'eval_metric': 'error'}, D_train, rounds: int,
evals=[(D_train, 'Train'), (D_valid, 'Valid')], verbose_eval: Union[bool, int]
num_boost_round=rounds, ):
evals_result=evals_result, def check_output(output: str) -> None:
verbose_eval=verbose_eval)
output: str = out.getvalue().strip()
if int(verbose_eval) == 1: if int(verbose_eval) == 1:
# Should print each iteration info # Should print each iteration info
assert len(output.split('\n')) == rounds assert len(output.split('\n')) == rounds
@ -40,11 +38,32 @@ class TestCallbacks:
# Should print first and latest iteration info # Should print first and latest iteration info
assert len(output.split('\n')) == 2 assert len(output.split('\n')) == 2
else: else:
# Should print info by each period additionaly to first and latest iteration # Should print info by each period additionaly to first and latest
# iteration
num_periods = rounds // int(verbose_eval) num_periods = rounds // int(verbose_eval)
# Extra information is required for latest iteration # Extra information is required for latest iteration
is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1) is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
assert len(output.split('\n')) == 1 + num_periods + int(is_extra_info_required) assert len(output.split('\n')) == (
1 + num_periods + int(is_extra_info_required)
)
evals_result: xgb.callback.TrainingCallback.EvalsLog = {}
params = {'objective': 'binary:logistic', 'eval_metric': 'error'}
with tm.captured_output() as (out, err):
xgb.train(
params, D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
num_boost_round=rounds,
evals_result=evals_result,
verbose_eval=verbose_eval,
)
output: str = out.getvalue().strip()
check_output(output)
with tm.captured_output() as (out, err):
xgb.cv(params, D_train, num_boost_round=rounds, verbose_eval=verbose_eval)
output = out.getvalue().strip()
check_output(output)
def test_evaluation_monitor(self): def test_evaluation_monitor(self):
D_train = xgb.DMatrix(self.X_train, self.y_train) D_train = xgb.DMatrix(self.X_train, self.y_train)

View File

@ -124,6 +124,35 @@ class TestEvalMetrics:
skl_gamma_dev = mean_gamma_deviance(y, score) skl_gamma_dev = mean_gamma_deviance(y, score)
np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6) np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
@pytest.mark.skipif(**tm.no_sklearn())
def test_gamma_lik(self) -> None:
import scipy.stats as stats
rng = np.random.default_rng(1994)
n_samples = 32
n_features = 10
X = rng.normal(0, 1, size=n_samples * n_features).reshape((n_samples, n_features))
alpha, loc, beta = 5.0, 11.1, 22
y = stats.gamma.rvs(alpha, loc=loc, scale=beta, size=n_samples, random_state=rng)
reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=64)
reg.fit(X, y, eval_metric="gamma-nloglik", eval_set=[(X, y)])
score = reg.predict(X)
booster = reg.get_booster()
nloglik = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1].split(":")[0])
# \beta_i = - (1 / \theta_i a)
# where \theta_i is the canonical parameter
# XGBoost uses the canonical link function of gamma in evaluation function.
# so \theta = - (1.0 / y)
# dispersion is hardcoded as 1.0, so shape (a in scipy parameter) is also 1.0
beta = - (1.0 / (- (1.0 / y))) # == y
nloglik_stats = -stats.gamma.logpdf(score, a=1.0, scale=beta)
np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)
def run_roc_auc_binary(self, tree_method, n_samples): def run_roc_auc_binary(self, tree_method, n_samples):
import numpy as np import numpy as np
from sklearn.datasets import make_classification from sklearn.datasets import make_classification
@ -162,11 +191,11 @@ class TestEvalMetrics:
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6) np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.parametrize("n_samples", [4, 100, 1000]) @pytest.mark.parametrize("n_samples", [100, 1000])
def test_roc_auc(self, n_samples): def test_roc_auc(self, n_samples):
self.run_roc_auc_binary("hist", n_samples) self.run_roc_auc_binary("hist", n_samples)
def run_roc_auc_multi(self, tree_method, n_samples): def run_roc_auc_multi(self, tree_method, n_samples, weighted):
import numpy as np import numpy as np
from sklearn.datasets import make_classification from sklearn.datasets import make_classification
from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_auc_score
@ -184,8 +213,14 @@ class TestEvalMetrics:
n_classes=n_classes, n_classes=n_classes,
random_state=rng random_state=rng
) )
if weighted:
weights = rng.randn(n_samples)
weights -= weights.min()
weights /= weights.max()
else:
weights = None
Xy = xgb.DMatrix(X, y) Xy = xgb.DMatrix(X, y, weight=weights)
booster = xgb.train( booster = xgb.train(
{ {
"tree_method": tree_method, "tree_method": tree_method,
@ -197,16 +232,22 @@ class TestEvalMetrics:
num_boost_round=8, num_boost_round=8,
) )
score = booster.predict(Xy) score = booster.predict(Xy)
skl_auc = roc_auc_score(y, score, average="weighted", multi_class="ovr") skl_auc = roc_auc_score(
y, score, average="weighted", sample_weight=weights, multi_class="ovr"
)
auc = float(booster.eval(Xy).split(":")[1]) auc = float(booster.eval(Xy).split(":")[1])
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6) np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
X = rng.randn(*X.shape) X = rng.randn(*X.shape)
score = booster.predict(xgb.DMatrix(X)) score = booster.predict(xgb.DMatrix(X, weight=weights))
skl_auc = roc_auc_score(y, score, average="weighted", multi_class="ovr") skl_auc = roc_auc_score(
auc = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1]) y, score, average="weighted", sample_weight=weights, multi_class="ovr"
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6) )
auc = float(booster.eval(xgb.DMatrix(X, y, weight=weights)).split(":")[1])
np.testing.assert_allclose(skl_auc, auc, rtol=1e-5)
@pytest.mark.parametrize("n_samples", [4, 100, 1000]) @pytest.mark.parametrize(
def test_roc_auc_multi(self, n_samples): "n_samples,weighted", [(4, False), (100, False), (1000, False), (1000, True)]
self.run_roc_auc_multi("hist", n_samples) )
def test_roc_auc_multi(self, n_samples, weighted):
self.run_roc_auc_multi("hist", n_samples, weighted)

View File

@ -1,6 +1,12 @@
# -*- coding: utf-8 -*- import os
import tempfile
import subprocess
import xgboost as xgb import xgboost as xgb
import numpy as np import numpy as np
import pytest
import testing as tm
class TestOMP: class TestOMP:
@ -71,3 +77,31 @@ class TestOMP:
assert auc_1 == auc_2 == auc_3 assert auc_1 == auc_2 == auc_3
assert np.array_equal(auc_1, auc_2) assert np.array_equal(auc_1, auc_2)
assert np.array_equal(auc_1, auc_3) assert np.array_equal(auc_1, auc_3)
@pytest.mark.skipif(**tm.no_sklearn())
def test_with_omp_thread_limit(self):
args = [
"python", os.path.join(
tm.PROJECT_ROOT, "tests", "python", "with_omp_limit.py"
)
]
results = []
with tempfile.TemporaryDirectory() as tmpdir:
for i in (1, 2, 16):
path = os.path.join(tmpdir, str(i))
with open(path, "w") as fd:
fd.write("\n")
cp = args.copy()
cp.append(path)
env = os.environ.copy()
env["OMP_THREAD_LIMIT"] = str(i)
status = subprocess.call(cp, env=env)
assert status == 0
with open(path, "r") as fd:
results.append(float(fd.read()))
for auc in results:
np.testing.assert_allclose(auc, results[0])

View File

@ -0,0 +1,26 @@
import os
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn.metrics import roc_auc_score
import sys
def run_omp(output_path: str):
X, y = make_classification(
n_samples=200, n_features=32, n_classes=3, n_informative=8
)
Xy = xgb.DMatrix(X, y, nthread=16)
booster = xgb.train(
{"num_class": 3, "objective": "multi:softprob", "n_jobs": 16},
Xy,
num_boost_round=8,
)
score = booster.predict(Xy)
auc = roc_auc_score(y, score, average="weighted", multi_class="ovr")
with open(output_path, "w") as fd:
fd.write(str(auc))
if __name__ == "__main__":
out = sys.argv[1]
run_omp(out)

View File

@ -1,16 +1,5 @@
#!/bin/bash #!/bin/bash
if [ ${TRAVIS_OS_NAME} == "osx" ]; then
# https://travis-ci.community/t/macos-build-fails-because-of-homebrew-bundle-unknown-command/7296/27
# Use libomp 11.1.0: https://github.com/dmlc/xgboost/issues/7039
brew update # Force update, so that update doesn't overwrite our version of libomp.rb
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
brew install cmake libomp
brew pin libomp
fi
if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_sdist_test" ]; then if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_sdist_test" ]; then
if [ ${TRAVIS_OS_NAME} == "osx" ]; then if [ ${TRAVIS_OS_NAME} == "osx" ]; then
wget --no-verbose -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh wget --no-verbose -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh