Compare commits

..

9 Commits

Author SHA1 Message Date
Jiaming Yuan
742c19f3ec Bump release version to 1.5.2. (#7567) 2022-01-17 16:52:31 +08:00
Jiaming Yuan
2245a6e9ac Update setup.py. (#7360) (#7568)
* Add new classifiers.
* Typehint.
2022-01-15 20:39:34 +08:00
Jiaming Yuan
ed8ba2150b [backport] Fix pylint and mypy. (#7563)
* Fix Python typehint with upgraded mypy. (#7513)

* Fix pylint. (#7498)
2022-01-14 14:23:09 +08:00
Jiaming Yuan
87ddcf308e [BP] Fix early stopping with linear model. (#7554) (#7562) 2022-01-14 00:22:08 +08:00
Jiaming Yuan
35dac8af1d [BP] Fix index type for bitfield. (#7541) (#7560) 2022-01-14 00:21:34 +08:00
Jiaming Yuan
1311a20f49 [BP] Fix num_boosted_rounds for linear model. (#7538) (#7559)
* Add note.

* Fix n boosted rounds.
2022-01-14 00:20:57 +08:00
Jiaming Yuan
328d1e18db [backport] [R] Fix single sample prediction. (#7524) (#7558) 2022-01-14 00:20:17 +08:00
Jiaming Yuan
3e2d7519a6 [dask] Fix asyncio. (#7508) (#7561) 2022-01-13 21:49:11 +08:00
Jiaming Yuan
afb9dfd421 [backport] CI fixes for macos (#7482)
* [CI] Fix continuous delivery pipeline for MacOS (#7472)

* Fix github macos package upload. (#7474)

* Fix macos package upload. (#7475)


* Split up the tests.

* [CI] Add missing step extract_branch (#7479)

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
2021-11-25 01:57:55 +08:00
28 changed files with 220 additions and 92 deletions

View File

@@ -45,13 +45,59 @@ jobs:
cd .. cd ..
python -c 'import xgboost' python -c 'import xgboost'
python-tests: python-tests-on-win:
name: Test XGBoost Python package on ${{ matrix.config.os }} name: Test XGBoost Python package on ${{ matrix.config.os }}
runs-on: ${{ matrix.config.os }} runs-on: ${{ matrix.config.os }}
strategy: strategy:
matrix: matrix:
config: config:
- {os: windows-2016, python-version: '3.8'} - {os: windows-2016, python-version: '3.8'}
steps:
- uses: actions/checkout@v2
with:
submodules: 'true'
- uses: conda-incubator/setup-miniconda@v2
with:
auto-update-conda: true
python-version: ${{ matrix.config.python-version }}
activate-environment: win64_env
environment-file: tests/ci_build/conda_env/win64_cpu_test.yml
- name: Display Conda env
shell: bash -l {0}
run: |
conda info
conda list
- name: Build XGBoost on Windows
shell: bash -l {0}
run: |
mkdir build_msvc
cd build_msvc
cmake .. -G"Visual Studio 15 2017" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON
cmake --build . --config Release --parallel $(nproc)
- name: Install Python package
shell: bash -l {0}
run: |
cd python-package
python --version
python setup.py bdist_wheel --universal
pip install ./dist/*.whl
- name: Test Python package
shell: bash -l {0}
run: |
pytest -s -v ./tests/python
python-tests-on-macos:
name: Test XGBoost Python package on ${{ matrix.config.os }}
runs-on: ${{ matrix.config.os }}
strategy:
matrix:
config:
- {os: macos-10.15, python-version "3.8" } - {os: macos-10.15, python-version "3.8" }
steps: steps:
@@ -63,8 +109,8 @@ jobs:
with: with:
auto-update-conda: true auto-update-conda: true
python-version: ${{ matrix.config.python-version }} python-version: ${{ matrix.config.python-version }}
activate-environment: win64_test activate-environment: macos_test
environment-file: tests/ci_build/conda_env/win64_cpu_test.yml environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
- name: Display Conda env - name: Display Conda env
shell: bash -l {0} shell: bash -l {0}
@@ -72,17 +118,7 @@ jobs:
conda info conda info
conda list conda list
- name: Build XGBoost on Windows
shell: bash -l {0}
if: matrix.config.os == 'windows-2016'
run: |
mkdir build_msvc
cd build_msvc
cmake .. -G"Visual Studio 15 2017" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON
cmake --build . --config Release --parallel $(nproc)
- name: Build XGBoost on macos - name: Build XGBoost on macos
if: matrix.config.os == 'macos-10.15'
run: | run: |
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb) wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
brew install ninja libomp brew install ninja libomp
@@ -108,16 +144,19 @@ jobs:
- name: Rename Python wheel - name: Rename Python wheel
shell: bash -l {0} shell: bash -l {0}
if: matrix.config.os == 'macos-10.15'
run: | run: |
TAG=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64 TAG=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
python tests/ci_build/rename_whl.py python-package/dist/*.whl ${{ github.sha }} ${TAG} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${{ github.sha }} ${TAG}
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
- name: Upload Python wheel - name: Upload Python wheel
shell: bash -l {0} shell: bash -l {0}
if: | if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
matrix.os == 'macos-latest'
run: | run: |
python -m awscli s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read python -m awscli s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read
env: env:

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR) cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 1.5.1) project(xgboost LANGUAGES CXX C VERSION 1.5.2)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0022 NEW)

View File

@@ -1,8 +1,8 @@
Package: xgboost Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 1.5.1.1 Version: 1.5.2.1
Date: 2021-10-13 Date: 2022-1-17
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),

View File

@@ -435,7 +435,8 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
lapply(seq_len(n_groups), function(g) arr[g, , ]) lapply(seq_len(n_groups), function(g) arr[g, , ])
} else { } else {
## remove the first axis (group) ## remove the first axis (group)
as.matrix(arr[1, , ]) dn <- dimnames(arr)
matrix(arr[1, , ], nrow = dim(arr)[2], ncol = dim(arr)[3], dimnames = c(dn[2], dn[3]))
} }
} else if (predinteraction) { } else if (predinteraction) {
## Predict interaction ## Predict interaction
@@ -447,7 +448,8 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
lapply(seq_len(n_groups), function(g) arr[g, , , ]) lapply(seq_len(n_groups), function(g) arr[g, , , ])
} else { } else {
## remove the first axis (group) ## remove the first axis (group)
arr[1, , , ] arr <- arr[1, , , , drop = FALSE]
array(arr, dim = dim(arr)[2:4], dimnames(arr)[2:4])
} }
} else { } else {
## Normal prediction ## Normal prediction

View File

@@ -46,3 +46,31 @@ test_that("gblinear works", {
expect_equal(dim(h), c(n, ncol(dtrain) + 1)) expect_equal(dim(h), c(n, ncol(dtrain) + 1))
expect_s4_class(h, "dgCMatrix") expect_s4_class(h, "dgCMatrix")
}) })
test_that("gblinear early stopping works", {
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
param <- list(
objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
updater = "coord_descent"
)
es_round <- 1
n <- 10
booster <- xgb.train(
param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
)
expect_equal(booster$best_iteration, 5)
predt_es <- predict(booster, dtrain)
n <- booster$best_iteration + es_round
booster <- xgb.train(
param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
)
predt <- predict(booster, dtrain)
expect_equal(predt_es, predt)
})

View File

@@ -157,3 +157,28 @@ test_that("multiclass feature interactions work", {
# sums WRT columns must be close to feature contributions # sums WRT columns must be close to feature contributions
expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))), 0.00001) expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))), 0.00001)
}) })
test_that("SHAP single sample works", {
train <- agaricus.train
test <- agaricus.test
booster <- xgboost(
data = train$data,
label = train$label,
max_depth = 2,
nrounds = 4,
objective = "binary:logistic",
)
predt <- predict(
booster,
newdata = train$data[1, , drop = FALSE], predcontrib = TRUE
)
expect_equal(dim(predt), c(1, dim(train$data)[2] + 1))
predt <- predict(
booster,
newdata = train$data[1, , drop = FALSE], predinteraction = TRUE
)
expect_equal(dim(predt), c(1, dim(train$data)[2] + 1, dim(train$data)[2] + 1))
})

View File

@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MAJOR 1
#define XGBOOST_VER_MINOR 5 #define XGBOOST_VER_MINOR 5
#define XGBOOST_VER_PATCH 0 #define XGBOOST_VER_PATCH 2
#endif // XGBOOST_VERSION_CONFIG_H_ #endif // XGBOOST_VERSION_CONFIG_H_

View File

@@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</parent> </parent>
<artifactId>xgboost4j-example_2.12</artifactId> <artifactId>xgboost4j-example_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</parent> </parent>
<artifactId>xgboost4j-flink_2.12</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</parent> </parent>
<artifactId>xgboost4j-gpu_2.12</artifactId> <artifactId>xgboost4j-gpu_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<properties> <properties>

View File

@@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</parent> </parent>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId> <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build> <build>
@@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</parent> </parent>
<artifactId>xgboost4j-spark_2.12</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<build> <build>
@@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
</parent> </parent>
<artifactId>xgboost4j_2.12</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>1.5.1</version> <version>1.5.2</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@@ -4,6 +4,7 @@ import shutil
import subprocess import subprocess
import logging import logging
import distutils import distutils
from typing import Optional, List
import sys import sys
from platform import system from platform import system
from setuptools import setup, find_packages, Extension from setuptools import setup, find_packages, Extension
@@ -36,7 +37,7 @@ NEED_CLEAN_FILE = set()
BUILD_TEMP_DIR = None BUILD_TEMP_DIR = None
def lib_name(): def lib_name() -> str:
'''Return platform dependent shared object name.''' '''Return platform dependent shared object name.'''
if system() == 'Linux' or system().upper().endswith('BSD'): if system() == 'Linux' or system().upper().endswith('BSD'):
name = 'libxgboost.so' name = 'libxgboost.so'
@@ -47,13 +48,13 @@ def lib_name():
return name return name
def copy_tree(src_dir, target_dir): def copy_tree(src_dir: str, target_dir: str) -> None:
'''Copy source tree into build directory.''' '''Copy source tree into build directory.'''
def clean_copy_tree(src, dst): def clean_copy_tree(src: str, dst: str) -> None:
distutils.dir_util.copy_tree(src, dst) distutils.dir_util.copy_tree(src, dst)
NEED_CLEAN_TREE.add(os.path.abspath(dst)) NEED_CLEAN_TREE.add(os.path.abspath(dst))
def clean_copy_file(src, dst): def clean_copy_file(src: str, dst: str) -> None:
distutils.file_util.copy_file(src, dst) distutils.file_util.copy_file(src, dst)
NEED_CLEAN_FILE.add(os.path.abspath(dst)) NEED_CLEAN_FILE.add(os.path.abspath(dst))
@@ -77,7 +78,7 @@ def copy_tree(src_dir, target_dir):
clean_copy_file(lic, os.path.join(target_dir, 'LICENSE')) clean_copy_file(lic, os.path.join(target_dir, 'LICENSE'))
def clean_up(): def clean_up() -> None:
'''Removed copied files.''' '''Removed copied files.'''
for path in NEED_CLEAN_TREE: for path in NEED_CLEAN_TREE:
shutil.rmtree(path) shutil.rmtree(path)
@@ -87,7 +88,7 @@ def clean_up():
class CMakeExtension(Extension): # pylint: disable=too-few-public-methods class CMakeExtension(Extension): # pylint: disable=too-few-public-methods
'''Wrapper for extension''' '''Wrapper for extension'''
def __init__(self, name): def __init__(self, name: str) -> None:
super().__init__(name=name, sources=[]) super().__init__(name=name, sources=[])
@@ -97,7 +98,14 @@ class BuildExt(build_ext.build_ext): # pylint: disable=too-many-ancestors
logger = logging.getLogger('XGBoost build_ext') logger = logging.getLogger('XGBoost build_ext')
# pylint: disable=too-many-arguments,no-self-use # pylint: disable=too-many-arguments,no-self-use
def build(self, src_dir, build_dir, generator, build_tool=None, use_omp=1): def build(
self,
src_dir: str,
build_dir: str,
generator: str,
build_tool: Optional[str] = None,
use_omp: int = 1,
) -> None:
'''Build the core library with CMake.''' '''Build the core library with CMake.'''
cmake_cmd = ['cmake', src_dir, generator] cmake_cmd = ['cmake', src_dir, generator]
@@ -116,13 +124,14 @@ class BuildExt(build_ext.build_ext): # pylint: disable=too-many-ancestors
if system() != 'Windows': if system() != 'Windows':
nproc = os.cpu_count() nproc = os.cpu_count()
assert build_tool is not None
subprocess.check_call([build_tool, '-j' + str(nproc)], subprocess.check_call([build_tool, '-j' + str(nproc)],
cwd=build_dir) cwd=build_dir)
else: else:
subprocess.check_call(['cmake', '--build', '.', subprocess.check_call(['cmake', '--build', '.',
'--config', 'Release'], cwd=build_dir) '--config', 'Release'], cwd=build_dir)
def build_cmake_extension(self): def build_cmake_extension(self) -> None:
'''Configure and build using CMake''' '''Configure and build using CMake'''
if USER_OPTIONS['use-system-libxgboost'][2]: if USER_OPTIONS['use-system-libxgboost'][2]:
self.logger.info('Using system libxgboost.') self.logger.info('Using system libxgboost.')
@@ -174,14 +183,14 @@ class BuildExt(build_ext.build_ext): # pylint: disable=too-many-ancestors
self.logger.warning('Disabling OpenMP support.') self.logger.warning('Disabling OpenMP support.')
self.build(src_dir, build_dir, gen, build_tool, use_omp=0) self.build(src_dir, build_dir, gen, build_tool, use_omp=0)
def build_extension(self, ext): def build_extension(self, ext: Extension) -> None:
'''Override the method for dispatching.''' '''Override the method for dispatching.'''
if isinstance(ext, CMakeExtension): if isinstance(ext, CMakeExtension):
self.build_cmake_extension() self.build_cmake_extension()
else: else:
super().build_extension(ext) super().build_extension(ext)
def copy_extensions_to_source(self): def copy_extensions_to_source(self) -> None:
'''Dummy override. Invoked during editable installation. Our binary '''Dummy override. Invoked during editable installation. Our binary
should available in `lib`. should available in `lib`.
@@ -196,7 +205,7 @@ class Sdist(sdist.sdist): # pylint: disable=too-many-ancestors
'''Copy c++ source into Python directory.''' '''Copy c++ source into Python directory.'''
logger = logging.getLogger('xgboost sdist') logger = logging.getLogger('xgboost sdist')
def run(self): def run(self) -> None:
copy_tree(os.path.join(CURRENT_DIR, os.path.pardir), copy_tree(os.path.join(CURRENT_DIR, os.path.pardir),
os.path.join(CURRENT_DIR, 'xgboost')) os.path.join(CURRENT_DIR, 'xgboost'))
libxgboost = os.path.join( libxgboost = os.path.join(
@@ -213,7 +222,7 @@ class InstallLib(install_lib.install_lib):
'''Copy shared object into installation directory.''' '''Copy shared object into installation directory.'''
logger = logging.getLogger('xgboost install_lib') logger = logging.getLogger('xgboost install_lib')
def install(self): def install(self) -> List[str]:
outfiles = super().install() outfiles = super().install()
if USER_OPTIONS['use-system-libxgboost'][2] != 0: if USER_OPTIONS['use-system-libxgboost'][2] != 0:
@@ -255,7 +264,7 @@ class Install(install.install): # pylint: disable=too-many-instance-attributes
user_options = install.install.user_options + list( user_options = install.install.user_options + list(
(k, v[0], v[1]) for k, v in USER_OPTIONS.items()) (k, v[0], v[1]) for k, v in USER_OPTIONS.items())
def initialize_options(self): def initialize_options(self) -> None:
super().initialize_options() super().initialize_options()
self.use_openmp = 1 self.use_openmp = 1
self.use_cuda = 0 self.use_cuda = 0
@@ -271,7 +280,7 @@ class Install(install.install): # pylint: disable=too-many-instance-attributes
self.use_system_libxgboost = 0 self.use_system_libxgboost = 0
def run(self): def run(self) -> None:
# setuptools will configure the options according to user supplied command line # setuptools will configure the options according to user supplied command line
# arguments, then here we propagate them into `USER_OPTIONS` for visibility to # arguments, then here we propagate them into `USER_OPTIONS` for visibility to
# other sub-commands like `build_ext`. # other sub-commands like `build_ext`.
@@ -341,7 +350,9 @@ if __name__ == '__main__':
'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8'], 'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10'],
python_requires='>=3.6', python_requires='>=3.6',
url='https://github.com/dmlc/xgboost') url='https://github.com/dmlc/xgboost')

View File

@@ -1 +1 @@
1.5.1 1.5.2

View File

@@ -229,7 +229,7 @@ def _numpy2ctypes_type(dtype):
} }
if np.intc is not np.int32: # Windows if np.intc is not np.int32: # Windows
_NUMPY_TO_CTYPES_MAPPING[np.intc] = _NUMPY_TO_CTYPES_MAPPING[np.int32] _NUMPY_TO_CTYPES_MAPPING[np.intc] = _NUMPY_TO_CTYPES_MAPPING[np.int32]
if dtype not in _NUMPY_TO_CTYPES_MAPPING.keys(): if dtype not in _NUMPY_TO_CTYPES_MAPPING:
raise TypeError( raise TypeError(
f"Supported types: {_NUMPY_TO_CTYPES_MAPPING.keys()}, got: {dtype}" f"Supported types: {_NUMPY_TO_CTYPES_MAPPING.keys()}, got: {dtype}"
) )
@@ -266,7 +266,7 @@ def ctypes2cupy(cptr, length, dtype):
from cupy.cuda.memory import UnownedMemory from cupy.cuda.memory import UnownedMemory
CUPY_TO_CTYPES_MAPPING = {cupy.float32: ctypes.c_float, cupy.uint32: ctypes.c_uint} CUPY_TO_CTYPES_MAPPING = {cupy.float32: ctypes.c_float, cupy.uint32: ctypes.c_uint}
if dtype not in CUPY_TO_CTYPES_MAPPING.keys(): if dtype not in CUPY_TO_CTYPES_MAPPING:
raise RuntimeError(f"Supported types: {CUPY_TO_CTYPES_MAPPING.keys()}") raise RuntimeError(f"Supported types: {CUPY_TO_CTYPES_MAPPING.keys()}")
addr = ctypes.cast(cptr, ctypes.c_void_p).value addr = ctypes.cast(cptr, ctypes.c_void_p).value
# pylint: disable=c-extension-no-member,no-member # pylint: disable=c-extension-no-member,no-member

View File

@@ -1606,8 +1606,9 @@ class DaskScikitLearnBase(XGBModel):
should use `worker_client' instead of default client. should use `worker_client' instead of default client.
""" """
asynchronous = getattr(self, "_asynchronous", False)
if self._client is None: if self._client is None:
asynchronous = getattr(self, "_asynchronous", False)
try: try:
distributed.get_worker() distributed.get_worker()
in_worker = True in_worker = True
@@ -1620,7 +1621,7 @@ class DaskScikitLearnBase(XGBModel):
return ret return ret
return ret return ret
return self.client.sync(func, **kwargs, asynchronous=asynchronous) return self.client.sync(func, **kwargs, asynchronous=self.client.asynchronous)
@xgboost_model_doc( @xgboost_model_doc(
@@ -1833,7 +1834,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
vstack = update_wrapper( vstack = update_wrapper(
partial(da.vstack, allow_unknown_chunksizes=True), da.vstack partial(da.vstack, allow_unknown_chunksizes=True), da.vstack
) )
return _cls_predict_proba(getattr(self, "n_classes_", None), predts, vstack) return _cls_predict_proba(getattr(self, "n_classes_", 0), predts, vstack)
# pylint: disable=missing-function-docstring # pylint: disable=missing-function-docstring
def predict_proba( def predict_proba(

View File

@@ -814,7 +814,7 @@ def dispatch_data_backend(
def _to_data_type(dtype: str, name: str): def _to_data_type(dtype: str, name: str):
dtype_map = {'float32': 1, 'float64': 2, 'uint32': 3, 'uint64': 4} dtype_map = {'float32': 1, 'float64': 2, 'uint32': 3, 'uint64': 4}
if dtype not in dtype_map.keys(): if dtype not in dtype_map:
raise TypeError( raise TypeError(
f'Expecting float32, float64, uint32, uint64, got {dtype} ' + f'Expecting float32, float64, uint32, uint64, got {dtype} ' +
f'for {name}.') f'for {name}.')

View File

@@ -1354,9 +1354,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
iteration_range=iteration_range iteration_range=iteration_range
) )
# If model is loaded from a raw booster there's no `n_classes_` # If model is loaded from a raw booster there's no `n_classes_`
return _cls_predict_proba( return _cls_predict_proba(getattr(self, "n_classes_", 0), class_probs, np.vstack)
getattr(self, "n_classes_", None), class_probs, np.vstack
)
def evals_result(self) -> TrainingCallback.EvalsLog: def evals_result(self) -> TrainingCallback.EvalsLog:
"""Return the evaluation results. """Return the evaluation results.

View File

@@ -58,14 +58,15 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
template <typename VT, typename Direction, bool IsConst = false> template <typename VT, typename Direction, bool IsConst = false>
struct BitFieldContainer { struct BitFieldContainer {
using value_type = std::conditional_t<IsConst, VT const, VT>; // NOLINT using value_type = std::conditional_t<IsConst, VT const, VT>; // NOLINT
using pointer = value_type*; // NOLINT using index_type = size_t; // NOLINT
using pointer = value_type*; // NOLINT
static value_type constexpr kValueSize = sizeof(value_type) * 8; static index_type constexpr kValueSize = sizeof(value_type) * 8;
static value_type constexpr kOne = 1; // force correct type. static index_type constexpr kOne = 1; // force correct type.
struct Pos { struct Pos {
std::remove_const_t<value_type> int_pos {0}; index_type int_pos{0};
std::remove_const_t<value_type> bit_pos {0}; index_type bit_pos{0};
}; };
private: private:
@@ -73,13 +74,13 @@ struct BitFieldContainer {
static_assert(!std::is_signed<VT>::value, "Must use unsiged type as underlying storage."); static_assert(!std::is_signed<VT>::value, "Must use unsiged type as underlying storage.");
public: public:
XGBOOST_DEVICE static Pos ToBitPos(value_type pos) { XGBOOST_DEVICE static Pos ToBitPos(index_type pos) {
Pos pos_v; Pos pos_v;
if (pos == 0) { if (pos == 0) {
return pos_v; return pos_v;
} }
pos_v.int_pos = pos / kValueSize; pos_v.int_pos = pos / kValueSize;
pos_v.bit_pos = pos % kValueSize; pos_v.bit_pos = pos % kValueSize;
return pos_v; return pos_v;
} }
@@ -96,7 +97,7 @@ struct BitFieldContainer {
/*\brief Compute the size of needed memory allocation. The returned value is in terms /*\brief Compute the size of needed memory allocation. The returned value is in terms
* of number of elements with `BitFieldContainer::value_type'. * of number of elements with `BitFieldContainer::value_type'.
*/ */
XGBOOST_DEVICE static size_t ComputeStorageSize(size_t size) { XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
return common::DivRoundUp(size, kValueSize); return common::DivRoundUp(size, kValueSize);
} }
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
@@ -138,14 +139,14 @@ struct BitFieldContainer {
#endif // defined(__CUDA_ARCH__) #endif // defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
__device__ auto Set(value_type pos) { __device__ auto Set(index_type pos) {
Pos pos_v = Direction::Shift(ToBitPos(pos)); Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = bits_[pos_v.int_pos]; value_type& value = bits_[pos_v.int_pos];
value_type set_bit = kOne << pos_v.bit_pos; value_type set_bit = kOne << pos_v.bit_pos;
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type; using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
atomicOr(reinterpret_cast<Type *>(&value), set_bit); atomicOr(reinterpret_cast<Type *>(&value), set_bit);
} }
__device__ void Clear(value_type pos) { __device__ void Clear(index_type pos) {
Pos pos_v = Direction::Shift(ToBitPos(pos)); Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = bits_[pos_v.int_pos]; value_type& value = bits_[pos_v.int_pos];
value_type clear_bit = ~(kOne << pos_v.bit_pos); value_type clear_bit = ~(kOne << pos_v.bit_pos);
@@ -153,13 +154,13 @@ struct BitFieldContainer {
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit); atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
} }
#else #else
void Set(value_type pos) { void Set(index_type pos) {
Pos pos_v = Direction::Shift(ToBitPos(pos)); Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = bits_[pos_v.int_pos]; value_type& value = bits_[pos_v.int_pos];
value_type set_bit = kOne << pos_v.bit_pos; value_type set_bit = kOne << pos_v.bit_pos;
value |= set_bit; value |= set_bit;
} }
void Clear(value_type pos) { void Clear(index_type pos) {
Pos pos_v = Direction::Shift(ToBitPos(pos)); Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = bits_[pos_v.int_pos]; value_type& value = bits_[pos_v.int_pos];
value_type clear_bit = ~(kOne << pos_v.bit_pos); value_type clear_bit = ~(kOne << pos_v.bit_pos);
@@ -175,7 +176,7 @@ struct BitFieldContainer {
value_type result = test_bit & value; value_type result = test_bit & value;
return static_cast<bool>(result); return static_cast<bool>(result);
} }
XGBOOST_DEVICE bool Check(value_type pos) const { XGBOOST_DEVICE bool Check(index_type pos) const {
Pos pos_v = ToBitPos(pos); Pos pos_v = ToBitPos(pos);
return Check(pos_v); return Check(pos_v);
} }

View File

@@ -62,9 +62,8 @@ struct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {
} }
}; };
void LinearCheckLayer(unsigned layer_begin, unsigned layer_end) { void LinearCheckLayer(unsigned layer_begin) {
CHECK_EQ(layer_begin, 0) << "Linear booster does not support prediction range."; CHECK_EQ(layer_begin, 0) << "Linear booster does not support prediction range.";
CHECK_EQ(layer_end, 0) << "Linear booster does not support prediction range.";
} }
/*! /*!
@@ -152,7 +151,7 @@ class GBLinear : public GradientBooster {
void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *predts, void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *predts,
bool training, unsigned layer_begin, unsigned layer_end) override { bool training, unsigned layer_begin, unsigned layer_end) override {
monitor_.Start("PredictBatch"); monitor_.Start("PredictBatch");
LinearCheckLayer(layer_begin, layer_end); LinearCheckLayer(layer_begin);
auto* out_preds = &predts->predictions; auto* out_preds = &predts->predictions;
this->PredictBatchInternal(p_fmat, &out_preds->HostVector()); this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
monitor_.Stop("PredictBatch"); monitor_.Stop("PredictBatch");
@@ -161,7 +160,7 @@ class GBLinear : public GradientBooster {
void PredictInstance(const SparsePage::Inst &inst, void PredictInstance(const SparsePage::Inst &inst,
std::vector<bst_float> *out_preds, std::vector<bst_float> *out_preds,
unsigned layer_begin, unsigned layer_end) override { unsigned layer_begin, unsigned layer_end) override {
LinearCheckLayer(layer_begin, layer_end); LinearCheckLayer(layer_begin);
const int ngroup = model_.learner_model_param->num_output_group; const int ngroup = model_.learner_model_param->num_output_group;
for (int gid = 0; gid < ngroup; ++gid) { for (int gid = 0; gid < ngroup; ++gid) {
this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
@@ -177,8 +176,8 @@ class GBLinear : public GradientBooster {
HostDeviceVector<bst_float>* out_contribs, HostDeviceVector<bst_float>* out_contribs,
unsigned layer_begin, unsigned layer_end, bool, int, unsigned) override { unsigned layer_begin, unsigned layer_end, bool, int, unsigned) override {
model_.LazyInitModel(); model_.LazyInitModel();
LinearCheckLayer(layer_begin, layer_end); LinearCheckLayer(layer_begin);
const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector(); const auto &base_margin = p_fmat->Info().base_margin_.ConstHostVector();
const int ngroup = model_.learner_model_param->num_output_group; const int ngroup = model_.learner_model_param->num_output_group;
const size_t ncolumns = model_.learner_model_param->num_feature + 1; const size_t ncolumns = model_.learner_model_param->num_feature + 1;
// allocate space for (#features + bias) times #groups times #rows // allocate space for (#features + bias) times #groups times #rows
@@ -214,7 +213,7 @@ class GBLinear : public GradientBooster {
void PredictInteractionContributions(DMatrix* p_fmat, void PredictInteractionContributions(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_contribs, HostDeviceVector<bst_float>* out_contribs,
unsigned layer_begin, unsigned layer_end, bool) override { unsigned layer_begin, unsigned layer_end, bool) override {
LinearCheckLayer(layer_begin, layer_end); LinearCheckLayer(layer_begin);
std::vector<bst_float>& contribs = out_contribs->HostVector(); std::vector<bst_float>& contribs = out_contribs->HostVector();
// linear models have no interaction effects // linear models have no interaction effects

View File

@@ -18,6 +18,7 @@ void GBLinearModel::SaveModel(Json* p_out) const {
j_weights[i] = weight[i]; j_weights[i] = weight[i];
} }
out["weights"] = std::move(j_weights); out["weights"] = std::move(j_weights);
out["boosted_rounds"] = Json{this->num_boosted_rounds};
} }
void GBLinearModel::LoadModel(Json const& in) { void GBLinearModel::LoadModel(Json const& in) {
@@ -27,6 +28,13 @@ void GBLinearModel::LoadModel(Json const& in) {
for (size_t i = 0; i < n_weights; ++i) { for (size_t i = 0; i < n_weights; ++i) {
weight[i] = get<Number const>(j_weights[i]); weight[i] = get<Number const>(j_weights[i]);
} }
auto const& obj = get<Object const>(in);
auto boosted_rounds = obj.find("boosted_rounds");
if (boosted_rounds != obj.cend()) {
this->num_boosted_rounds = get<Integer const>(boosted_rounds->second);
} else {
this->num_boosted_rounds = 0;
}
} }
DMLC_REGISTER_PARAMETER(DeprecatedGBLinearModelParam); DMLC_REGISTER_PARAMETER(DeprecatedGBLinearModelParam);

View File

@@ -1,4 +1,4 @@
name: cpu_test name: macos_test
channels: channels:
- conda-forge - conda-forge
dependencies: dependencies:

View File

@@ -38,6 +38,14 @@ TEST(BitField, Check) {
ASSERT_FALSE(bits.Check(i)); ASSERT_FALSE(bits.Check(i));
} }
} }
{
// regression test for correct index type.
std::vector<RBitField8::value_type> storage(33, 0);
storage[32] = static_cast<uint8_t>(1);
auto bits = RBitField8({storage.data(), storage.size()});
ASSERT_TRUE(bits.Check(256));
}
} }
template <typename BitFieldT, typename VT = typename BitFieldT::value_type> template <typename BitFieldT, typename VT = typename BitFieldT::value_type>

View File

@@ -1,7 +1,6 @@
import sys import sys
from hypothesis import strategies, given, settings, assume from hypothesis import strategies, given, settings, assume, note
import pytest import pytest
import numpy
import xgboost as xgb import xgboost as xgb
sys.path.append("tests/python") sys.path.append("tests/python")
import testing as tm import testing as tm
@@ -17,10 +16,14 @@ parameter_strategy = strategies.fixed_dictionaries({
'top_k': strategies.integers(1, 10), 'top_k': strategies.integers(1, 10),
}) })
def train_result(param, dmat, num_rounds): def train_result(param, dmat, num_rounds):
result = {} result = {}
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False, booster = xgb.train(
evals_result=result) param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
evals_result=result
)
assert booster.num_boosted_rounds() == num_rounds
return result return result
@@ -33,6 +36,7 @@ class TestGPULinear:
param['updater'] = 'gpu_coord_descent' param['updater'] = 'gpu_coord_descent'
param = dataset.set_params(param) param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric] result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing(result) assert tm.non_increasing(result)
# Loss is not guaranteed to always decrease because of regularisation parameters # Loss is not guaranteed to always decrease because of regularisation parameters
@@ -49,6 +53,7 @@ class TestGPULinear:
param['lambda'] = lambd param['lambda'] = lambd
param = dataset.set_params(param) param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric] result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing([result[0], result[-1]]) assert tm.non_increasing([result[0], result[-1]])
@pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cupy())

View File

@@ -32,6 +32,7 @@ class TestLinear:
param.update(coord_param) param.update(coord_param)
param = dataset.set_params(param) param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric] result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing(result, 5e-4) assert tm.non_increasing(result, 5e-4)
# Loss is not guaranteed to always decrease because of regularisation parameters # Loss is not guaranteed to always decrease because of regularisation parameters
@@ -48,6 +49,7 @@ class TestLinear:
param.update(coord_param) param.update(coord_param)
param = dataset.set_params(param) param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric] result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing([result[0], result[-1]]) assert tm.non_increasing([result[0], result[-1]])
@given(parameter_strategy, strategies.integers(10, 50), @given(parameter_strategy, strategies.integers(10, 50),
@@ -57,6 +59,7 @@ class TestLinear:
param['updater'] = 'shotgun' param['updater'] = 'shotgun'
param = dataset.set_params(param) param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric] result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
# shotgun is non-deterministic, so we relax the test by only using first and last # shotgun is non-deterministic, so we relax the test by only using first and last
# iteration. # iteration.
if len(result) > 2: if len(result) > 2:
@@ -75,4 +78,5 @@ class TestLinear:
param['lambda'] = lambd param['lambda'] = lambd
param = dataset.set_params(param) param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric] result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing([result[0], result[-1]]) assert tm.non_increasing([result[0], result[-1]])

View File

@@ -705,8 +705,7 @@ async def run_from_dask_array_asyncio(scheduler_address: str) -> xgb.dask.TrainR
async def run_dask_regressor_asyncio(scheduler_address: str) -> None: async def run_dask_regressor_asyncio(scheduler_address: str) -> None:
async with Client(scheduler_address, asynchronous=True) as client: async with Client(scheduler_address, asynchronous=True) as client:
X, y, _ = generate_array() X, y, _ = generate_array()
regressor = await xgb.dask.DaskXGBRegressor(verbosity=1, regressor = await xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
n_estimators=2)
regressor.set_params(tree_method='hist') regressor.set_params(tree_method='hist')
regressor.client = client regressor.client = client
await regressor.fit(X, y, eval_set=[(X, y)]) await regressor.fit(X, y, eval_set=[(X, y)])