Compare commits
34 Commits
master-roc
...
release_2.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
82d846bbeb | ||
|
|
71d330afdc | ||
|
|
3acbd8692b | ||
|
|
ad524f76ab | ||
|
|
d2d1751c03 | ||
|
|
e4ee4e79dc | ||
|
|
41ce8f28b2 | ||
|
|
0ffc52e05c | ||
|
|
a408254c2f | ||
|
|
22e891dafa | ||
|
|
89530c80a7 | ||
|
|
946ab53b57 | ||
|
|
afd03a6934 | ||
|
|
f7da938458 | ||
|
|
6ab6577511 | ||
|
|
8c57558d74 | ||
|
|
58aa98a796 | ||
|
|
92273b39d8 | ||
|
|
e824b18bf6 | ||
|
|
66ee89d8b4 | ||
|
|
54d1d72d01 | ||
|
|
032bcc57f9 | ||
|
|
ace7713201 | ||
|
|
096047c547 | ||
|
|
e75dd75bb2 | ||
|
|
4d387cbfbf | ||
|
|
3fde9361d7 | ||
|
|
b67c2ed96d | ||
|
|
177fd79864 | ||
|
|
06487d3896 | ||
|
|
e50ccc4d3c | ||
|
|
add57f8880 | ||
|
|
a0d3573c74 | ||
|
|
4301558a57 |
17
.github/workflows/jvm_tests.yml
vendored
17
.github/workflows/jvm_tests.yml
vendored
@ -51,14 +51,14 @@ jobs:
|
|||||||
id: extract_branch
|
id: extract_branch
|
||||||
if: |
|
if: |
|
||||||
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
||||||
matrix.os == 'windows-latest'
|
(matrix.os == 'windows-latest' || matrix.os == 'macos-11')
|
||||||
|
|
||||||
- name: Publish artifact xgboost4j.dll to S3
|
- name: Publish artifact xgboost4j.dll to S3
|
||||||
run: |
|
run: |
|
||||||
cd lib/
|
cd lib/
|
||||||
Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
|
Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
|
||||||
dir
|
dir
|
||||||
python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read
|
python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
|
||||||
if: |
|
if: |
|
||||||
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
||||||
matrix.os == 'windows-latest'
|
matrix.os == 'windows-latest'
|
||||||
@ -66,6 +66,19 @@ jobs:
|
|||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
|
||||||
|
|
||||||
|
- name: Publish artifact libxgboost4j.dylib to S3
|
||||||
|
run: |
|
||||||
|
cd lib/
|
||||||
|
mv -v libxgboost4j.dylib libxgboost4j_${{ github.sha }}.dylib
|
||||||
|
ls
|
||||||
|
python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
|
||||||
|
if: |
|
||||||
|
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
||||||
|
matrix.os == 'macos-11'
|
||||||
|
env:
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
|
||||||
|
|
||||||
|
|
||||||
- name: Test XGBoost4J (Core, Spark, Examples)
|
- name: Test XGBoost4J (Core, Spark, Examples)
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
4
.github/workflows/r_tests.yml
vendored
4
.github/workflows/r_tests.yml
vendored
@ -25,7 +25,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: 'true'
|
submodules: 'true'
|
||||||
|
|
||||||
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1
|
- uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
|
||||||
with:
|
with:
|
||||||
r-version: ${{ matrix.config.r }}
|
r-version: ${{ matrix.config.r }}
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: 'true'
|
submodules: 'true'
|
||||||
|
|
||||||
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1
|
- uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
|
||||||
with:
|
with:
|
||||||
r-version: ${{ matrix.config.r }}
|
r-version: ${{ matrix.config.r }}
|
||||||
|
|
||||||
|
|||||||
@ -32,4 +32,3 @@ formats:
|
|||||||
python:
|
python:
|
||||||
install:
|
install:
|
||||||
- requirements: doc/requirements.txt
|
- requirements: doc/requirements.txt
|
||||||
system_packages: true
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
||||||
project(xgboost LANGUAGES CXX C VERSION 2.0.0)
|
project(xgboost LANGUAGES CXX C VERSION 2.0.3)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
cmake_policy(SET CMP0022 NEW)
|
||||||
@ -233,6 +233,11 @@ endif (RABIT_BUILD_MPI)
|
|||||||
add_subdirectory(${xgboost_SOURCE_DIR}/src)
|
add_subdirectory(${xgboost_SOURCE_DIR}/src)
|
||||||
target_link_libraries(objxgboost PUBLIC dmlc)
|
target_link_libraries(objxgboost PUBLIC dmlc)
|
||||||
|
|
||||||
|
# Link -lstdc++fs for GCC 8.x
|
||||||
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0")
|
||||||
|
target_link_libraries(objxgboost PUBLIC stdc++fs)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Exports some R specific definitions and objects
|
# Exports some R specific definitions and objects
|
||||||
if (R_LIB)
|
if (R_LIB)
|
||||||
add_subdirectory(${xgboost_SOURCE_DIR}/R-package)
|
add_subdirectory(${xgboost_SOURCE_DIR}/R-package)
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 2.0.0.1
|
Version: 2.0.3.1
|
||||||
Date: 2022-10-18
|
Date: 2023-12-14
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Tianqi", "Chen", role = c("aut"),
|
person("Tianqi", "Chen", role = c("aut"),
|
||||||
email = "tianqi.tchen@gmail.com"),
|
email = "tianqi.tchen@gmail.com"),
|
||||||
|
|||||||
@ -70,7 +70,7 @@ cb.print.evaluation <- function(period = 1, showsd = TRUE) {
|
|||||||
i == env$begin_iteration ||
|
i == env$begin_iteration ||
|
||||||
i == env$end_iteration) {
|
i == env$end_iteration) {
|
||||||
stdev <- if (showsd) env$bst_evaluation_err else NULL
|
stdev <- if (showsd) env$bst_evaluation_err else NULL
|
||||||
msg <- format.eval.string(i, env$bst_evaluation, stdev)
|
msg <- .format_eval_string(i, env$bst_evaluation, stdev)
|
||||||
cat(msg, '\n')
|
cat(msg, '\n')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -380,7 +380,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
|||||||
if ((maximize && score > best_score) ||
|
if ((maximize && score > best_score) ||
|
||||||
(!maximize && score < best_score)) {
|
(!maximize && score < best_score)) {
|
||||||
|
|
||||||
best_msg <<- format.eval.string(i, env$bst_evaluation, env$bst_evaluation_err)
|
best_msg <<- .format_eval_string(
|
||||||
|
i, env$bst_evaluation, env$bst_evaluation_err
|
||||||
|
)
|
||||||
best_score <<- score
|
best_score <<- score
|
||||||
best_iteration <<- i
|
best_iteration <<- i
|
||||||
best_ntreelimit <<- best_iteration * env$num_parallel_tree
|
best_ntreelimit <<- best_iteration * env$num_parallel_tree
|
||||||
@ -754,7 +756,7 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
|
|||||||
#
|
#
|
||||||
|
|
||||||
# Format the evaluation metric string
|
# Format the evaluation metric string
|
||||||
format.eval.string <- function(iter, eval_res, eval_err = NULL) {
|
.format_eval_string <- function(iter, eval_res, eval_err = NULL) {
|
||||||
if (length(eval_res) == 0)
|
if (length(eval_res) == 0)
|
||||||
stop('no evaluation results')
|
stop('no evaluation results')
|
||||||
enames <- names(eval_res)
|
enames <- names(eval_res)
|
||||||
|
|||||||
18
R-package/configure
vendored
18
R-package/configure
vendored
@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.71 for xgboost 2.0.0.
|
# Generated by GNU Autoconf 2.71 for xgboost 2.0.3.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
||||||
@ -607,8 +607,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='xgboost'
|
PACKAGE_NAME='xgboost'
|
||||||
PACKAGE_TARNAME='xgboost'
|
PACKAGE_TARNAME='xgboost'
|
||||||
PACKAGE_VERSION='2.0.0'
|
PACKAGE_VERSION='2.0.3'
|
||||||
PACKAGE_STRING='xgboost 2.0.0'
|
PACKAGE_STRING='xgboost 2.0.3'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures xgboost 2.0.0 to adapt to many kinds of systems.
|
\`configure' configures xgboost 2.0.3 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@ -1287,7 +1287,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of xgboost 2.0.0:";;
|
short | recursive ) echo "Configuration of xgboost 2.0.3:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@ -1367,7 +1367,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
xgboost configure 2.0.0
|
xgboost configure 2.0.3
|
||||||
generated by GNU Autoconf 2.71
|
generated by GNU Autoconf 2.71
|
||||||
|
|
||||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by xgboost $as_me 2.0.0, which was
|
It was created by xgboost $as_me 2.0.3, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
$ $0$ac_configure_args_raw
|
$ $0$ac_configure_args_raw
|
||||||
@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by xgboost $as_me 2.0.0, which was
|
This file was extended by xgboost $as_me 2.0.3, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config='$ac_cs_config_escaped'
|
ac_cs_config='$ac_cs_config_escaped'
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
xgboost config.status 2.0.0
|
xgboost config.status 2.0.3
|
||||||
configured by $0, generated by GNU Autoconf 2.71,
|
configured by $0, generated by GNU Autoconf 2.71,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
AC_PREREQ(2.69)
|
AC_PREREQ(2.69)
|
||||||
|
|
||||||
AC_INIT([xgboost],[2.0.0],[],[xgboost],[])
|
AC_INIT([xgboost],[2.0.3],[],[xgboost],[])
|
||||||
|
|
||||||
: ${R_HOME=`R RHOME`}
|
: ${R_HOME=`R RHOME`}
|
||||||
if test -z "${R_HOME}"; then
|
if test -z "${R_HOME}"; then
|
||||||
|
|||||||
@ -120,11 +120,25 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
|
|||||||
ctx.nthread = asInteger(n_threads);
|
ctx.nthread = asInteger(n_threads);
|
||||||
std::int32_t threads = ctx.Threads();
|
std::int32_t threads = ctx.Threads();
|
||||||
|
|
||||||
|
if (is_int) {
|
||||||
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
|
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
|
||||||
for (size_t j = 0; j < ncol; ++j) {
|
for (size_t j = 0; j < ncol; ++j) {
|
||||||
data[i * ncol + j] = is_int ? static_cast<float>(iin[i + nrow * j]) : din[i + nrow * j];
|
auto v = iin[i + nrow * j];
|
||||||
|
if (v == NA_INTEGER) {
|
||||||
|
data[i * ncol + j] = std::numeric_limits<float>::quiet_NaN();
|
||||||
|
} else {
|
||||||
|
data[i * ncol + j] = static_cast<float>(v);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
|
||||||
|
for (size_t j = 0; j < ncol; ++j) {
|
||||||
|
data[i * ncol + j] = din[i + nrow * j];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
DMatrixHandle handle;
|
DMatrixHandle handle;
|
||||||
CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol,
|
CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol,
|
||||||
asReal(missing), &handle, threads));
|
asReal(missing), &handle, threads));
|
||||||
|
|||||||
@ -56,6 +56,42 @@ test_that("xgb.DMatrix: basic construction", {
|
|||||||
expect_equal(raw_fd, raw_dgc)
|
expect_equal(raw_fd, raw_dgc)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test_that("xgb.DMatrix: NA", {
|
||||||
|
n_samples <- 3
|
||||||
|
x <- cbind(
|
||||||
|
x1 = sample(x = 4, size = n_samples, replace = TRUE),
|
||||||
|
x2 = sample(x = 4, size = n_samples, replace = TRUE)
|
||||||
|
)
|
||||||
|
x[1, "x1"] <- NA
|
||||||
|
|
||||||
|
m <- xgb.DMatrix(x)
|
||||||
|
xgb.DMatrix.save(m, "int.dmatrix")
|
||||||
|
|
||||||
|
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
|
||||||
|
colnames(x) <- c("x1", "x2")
|
||||||
|
m <- xgb.DMatrix(x)
|
||||||
|
|
||||||
|
xgb.DMatrix.save(m, "float.dmatrix")
|
||||||
|
|
||||||
|
iconn <- file("int.dmatrix", "rb")
|
||||||
|
fconn <- file("float.dmatrix", "rb")
|
||||||
|
|
||||||
|
expect_equal(file.size("int.dmatrix"), file.size("float.dmatrix"))
|
||||||
|
|
||||||
|
bytes <- file.size("int.dmatrix")
|
||||||
|
idmatrix <- readBin(iconn, "raw", n = bytes)
|
||||||
|
fdmatrix <- readBin(fconn, "raw", n = bytes)
|
||||||
|
|
||||||
|
expect_equal(length(idmatrix), length(fdmatrix))
|
||||||
|
expect_equal(idmatrix, fdmatrix)
|
||||||
|
|
||||||
|
close(iconn)
|
||||||
|
close(fconn)
|
||||||
|
|
||||||
|
file.remove("int.dmatrix")
|
||||||
|
file.remove("float.dmatrix")
|
||||||
|
})
|
||||||
|
|
||||||
test_that("xgb.DMatrix: saving, loading", {
|
test_that("xgb.DMatrix: saving, loading", {
|
||||||
# save to a local file
|
# save to a local file
|
||||||
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
||||||
|
|||||||
79
dev/change_scala_version.py
Normal file
79
dev/change_scala_version.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
import argparse
|
||||||
|
import pathlib
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
if args.scala_version == "2.12":
|
||||||
|
scala_ver = "2.12"
|
||||||
|
scala_patchver = "2.12.18"
|
||||||
|
elif args.scala_version == "2.13":
|
||||||
|
scala_ver = "2.13"
|
||||||
|
scala_patchver = "2.13.11"
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported Scala version: {args.scala_version}")
|
||||||
|
|
||||||
|
# Clean artifacts
|
||||||
|
if args.purge_artifacts:
|
||||||
|
for target in pathlib.Path("jvm-packages/").glob("**/target"):
|
||||||
|
if target.is_dir():
|
||||||
|
print(f"Removing {target}...")
|
||||||
|
shutil.rmtree(target)
|
||||||
|
|
||||||
|
# Update pom.xml
|
||||||
|
for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"):
|
||||||
|
print(f"Updating {pom}...")
|
||||||
|
with open(pom, "r", encoding="utf-8") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
with open(pom, "w", encoding="utf-8") as f:
|
||||||
|
replaced_scalaver = False
|
||||||
|
replaced_scala_binver = False
|
||||||
|
for line in lines:
|
||||||
|
for artifact in [
|
||||||
|
"xgboost-jvm",
|
||||||
|
"xgboost4j",
|
||||||
|
"xgboost4j-gpu",
|
||||||
|
"xgboost4j-spark",
|
||||||
|
"xgboost4j-spark-gpu",
|
||||||
|
"xgboost4j-flink",
|
||||||
|
"xgboost4j-example",
|
||||||
|
]:
|
||||||
|
line = re.sub(
|
||||||
|
f"<artifactId>{artifact}_[0-9\\.]*",
|
||||||
|
f"<artifactId>{artifact}_{scala_ver}",
|
||||||
|
line,
|
||||||
|
)
|
||||||
|
# Only replace the first occurrence of scala.version
|
||||||
|
if not replaced_scalaver:
|
||||||
|
line, nsubs = re.subn(
|
||||||
|
r"<scala.version>[0-9\.]*",
|
||||||
|
f"<scala.version>{scala_patchver}",
|
||||||
|
line,
|
||||||
|
)
|
||||||
|
if nsubs > 0:
|
||||||
|
replaced_scalaver = True
|
||||||
|
# Only replace the first occurrence of scala.binary.version
|
||||||
|
if not replaced_scala_binver:
|
||||||
|
line, nsubs = re.subn(
|
||||||
|
r"<scala.binary.version>[0-9\.]*",
|
||||||
|
f"<scala.binary.version>{scala_ver}",
|
||||||
|
line,
|
||||||
|
)
|
||||||
|
if nsubs > 0:
|
||||||
|
replaced_scala_binver = True
|
||||||
|
f.write(line)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--purge-artifacts", action="store_true")
|
||||||
|
parser.add_argument(
|
||||||
|
"--scala-version",
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="Version of Scala to use in the JVM packages",
|
||||||
|
choices=["2.12", "2.13"],
|
||||||
|
)
|
||||||
|
parsed_args = parser.parse_args()
|
||||||
|
main(parsed_args)
|
||||||
@ -2,7 +2,6 @@ import argparse
|
|||||||
import errno
|
import errno
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import platform
|
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
@ -21,12 +20,14 @@ def normpath(path):
|
|||||||
else:
|
else:
|
||||||
return normalized
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
def cp(source, target):
|
def cp(source, target):
|
||||||
source = normpath(source)
|
source = normpath(source)
|
||||||
target = normpath(target)
|
target = normpath(target)
|
||||||
print("cp {0} {1}".format(source, target))
|
print("cp {0} {1}".format(source, target))
|
||||||
shutil.copy(source, target)
|
shutil.copy(source, target)
|
||||||
|
|
||||||
|
|
||||||
def maybe_makedirs(path):
|
def maybe_makedirs(path):
|
||||||
path = normpath(path)
|
path = normpath(path)
|
||||||
print("mkdir -p " + path)
|
print("mkdir -p " + path)
|
||||||
@ -36,6 +37,7 @@ def maybe_makedirs(path):
|
|||||||
if e.errno != errno.EEXIST:
|
if e.errno != errno.EEXIST:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def cd(path):
|
def cd(path):
|
||||||
path = normpath(path)
|
path = normpath(path)
|
||||||
@ -47,18 +49,22 @@ def cd(path):
|
|||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
|
||||||
|
|
||||||
def run(command, **kwargs):
|
def run(command, **kwargs):
|
||||||
print(command)
|
print(command)
|
||||||
subprocess.check_call(command, shell=True, **kwargs)
|
subprocess.check_call(command, shell=True, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def get_current_git_tag():
|
def get_current_git_tag():
|
||||||
out = subprocess.check_output(["git", "tag", "--points-at", "HEAD"])
|
out = subprocess.check_output(["git", "tag", "--points-at", "HEAD"])
|
||||||
return out.decode().split("\n")[0]
|
return out.decode().split("\n")[0]
|
||||||
|
|
||||||
|
|
||||||
def get_current_commit_hash():
|
def get_current_commit_hash():
|
||||||
out = subprocess.check_output(["git", "rev-parse", "HEAD"])
|
out = subprocess.check_output(["git", "rev-parse", "HEAD"])
|
||||||
return out.decode().split("\n")[0]
|
return out.decode().split("\n")[0]
|
||||||
|
|
||||||
|
|
||||||
def get_current_git_branch():
|
def get_current_git_branch():
|
||||||
out = subprocess.check_output(["git", "log", "-n", "1", "--pretty=%d", "HEAD"])
|
out = subprocess.check_output(["git", "log", "-n", "1", "--pretty=%d", "HEAD"])
|
||||||
m = re.search(r"release_[0-9\.]+", out.decode())
|
m = re.search(r"release_[0-9\.]+", out.decode())
|
||||||
@ -66,38 +72,49 @@ def get_current_git_branch():
|
|||||||
raise ValueError("Expected branch name of form release_xxx")
|
raise ValueError("Expected branch name of form release_xxx")
|
||||||
return m.group(0)
|
return m.group(0)
|
||||||
|
|
||||||
|
|
||||||
def retrieve(url, filename=None):
|
def retrieve(url, filename=None):
|
||||||
print(f"{url} -> {filename}")
|
print(f"{url} -> {filename}")
|
||||||
return urlretrieve(url, filename)
|
return urlretrieve(url, filename)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--release-version", type=str, required=True,
|
parser.add_argument(
|
||||||
help="Version of the release being prepared")
|
"--release-version",
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="Version of the release being prepared",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if sys.platform != "darwin" or platform.machine() != "x86_64":
|
|
||||||
raise NotImplementedError("Please run this script using an Intel Mac")
|
|
||||||
|
|
||||||
version = args.release_version
|
version = args.release_version
|
||||||
expected_git_tag = "v" + version
|
expected_git_tag = "v" + version
|
||||||
current_git_tag = get_current_git_tag()
|
current_git_tag = get_current_git_tag()
|
||||||
if current_git_tag != expected_git_tag:
|
if current_git_tag != expected_git_tag:
|
||||||
if not current_git_tag:
|
if not current_git_tag:
|
||||||
raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD has no tag. "
|
raise ValueError(
|
||||||
f"Run: git checkout {expected_git_tag}")
|
f"Expected git tag {expected_git_tag} but current HEAD has no tag. "
|
||||||
raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD is at tag "
|
f"Run: git checkout {expected_git_tag}"
|
||||||
f"{current_git_tag}. Run: git checkout {expected_git_tag}")
|
)
|
||||||
|
raise ValueError(
|
||||||
|
f"Expected git tag {expected_git_tag} but current HEAD is at tag "
|
||||||
|
f"{current_git_tag}. Run: git checkout {expected_git_tag}"
|
||||||
|
)
|
||||||
|
|
||||||
commit_hash = get_current_commit_hash()
|
commit_hash = get_current_commit_hash()
|
||||||
git_branch = get_current_git_branch()
|
git_branch = get_current_git_branch()
|
||||||
print(f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}")
|
print(
|
||||||
|
f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}"
|
||||||
|
)
|
||||||
|
|
||||||
with cd("jvm-packages/"):
|
with cd("jvm-packages/"):
|
||||||
print("====copying pure-Python tracker====")
|
print("====copying pure-Python tracker====")
|
||||||
for use_cuda in [True, False]:
|
for use_cuda in [True, False]:
|
||||||
xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
|
xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
|
||||||
cp("../python-package/xgboost/tracker.py", f"{xgboost4j}/src/main/resources")
|
cp(
|
||||||
|
"../python-package/xgboost/tracker.py",
|
||||||
|
f"{xgboost4j}/src/main/resources",
|
||||||
|
)
|
||||||
|
|
||||||
print("====copying resources for testing====")
|
print("====copying resources for testing====")
|
||||||
with cd("../demo/CLI/regression"):
|
with cd("../demo/CLI/regression"):
|
||||||
@ -115,7 +132,12 @@ def main():
|
|||||||
cp(file, f"{xgboost4j_spark}/src/test/resources")
|
cp(file, f"{xgboost4j_spark}/src/test/resources")
|
||||||
|
|
||||||
print("====Creating directories to hold native binaries====")
|
print("====Creating directories to hold native binaries====")
|
||||||
for os_ident, arch in [("linux", "x86_64"), ("windows", "x86_64"), ("macos", "x86_64")]:
|
for os_ident, arch in [
|
||||||
|
("linux", "x86_64"),
|
||||||
|
("windows", "x86_64"),
|
||||||
|
("macos", "x86_64"),
|
||||||
|
("macos", "aarch64"),
|
||||||
|
]:
|
||||||
output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
|
output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
|
||||||
maybe_makedirs(output_dir)
|
maybe_makedirs(output_dir)
|
||||||
for os_ident, arch in [("linux", "x86_64")]:
|
for os_ident, arch in [("linux", "x86_64")]:
|
||||||
@ -123,52 +145,98 @@ def main():
|
|||||||
maybe_makedirs(output_dir)
|
maybe_makedirs(output_dir)
|
||||||
|
|
||||||
print("====Downloading native binaries from CI====")
|
print("====Downloading native binaries from CI====")
|
||||||
nightly_bucket_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
|
nightly_bucket_prefix = (
|
||||||
maven_repo_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc"
|
"https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
|
||||||
|
)
|
||||||
|
maven_repo_prefix = (
|
||||||
|
"https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc"
|
||||||
|
)
|
||||||
|
|
||||||
retrieve(url=f"{nightly_bucket_prefix}/{git_branch}/xgboost4j_{commit_hash}.dll",
|
retrieve(
|
||||||
filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll")
|
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/xgboost4j_{commit_hash}.dll",
|
||||||
|
filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll",
|
||||||
|
)
|
||||||
|
retrieve(
|
||||||
|
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib",
|
||||||
|
filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib",
|
||||||
|
)
|
||||||
|
retrieve(
|
||||||
|
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib",
|
||||||
|
filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib",
|
||||||
|
)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
# libxgboost4j.so for Linux x86_64, CPU only
|
# libxgboost4j.so for Linux x86_64, CPU only
|
||||||
zip_path = os.path.join(tempdir, "xgboost4j_2.12.jar")
|
zip_path = os.path.join(tempdir, "xgboost4j_2.12.jar")
|
||||||
extract_dir = os.path.join(tempdir, "xgboost4j")
|
extract_dir = os.path.join(tempdir, "xgboost4j")
|
||||||
retrieve(url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/"
|
retrieve(
|
||||||
|
url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/"
|
||||||
f"xgboost4j_2.12-{version}.jar",
|
f"xgboost4j_2.12-{version}.jar",
|
||||||
filename=zip_path)
|
filename=zip_path,
|
||||||
|
)
|
||||||
os.mkdir(extract_dir)
|
os.mkdir(extract_dir)
|
||||||
with zipfile.ZipFile(zip_path, "r") as t:
|
with zipfile.ZipFile(zip_path, "r") as t:
|
||||||
t.extractall(extract_dir)
|
t.extractall(extract_dir)
|
||||||
cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
|
cp(
|
||||||
"xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so")
|
os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
|
||||||
|
"xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
|
||||||
|
)
|
||||||
|
|
||||||
# libxgboost4j.so for Linux x86_64, GPU support
|
# libxgboost4j.so for Linux x86_64, GPU support
|
||||||
zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar")
|
zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar")
|
||||||
extract_dir = os.path.join(tempdir, "xgboost4j-gpu")
|
extract_dir = os.path.join(tempdir, "xgboost4j-gpu")
|
||||||
retrieve(url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/"
|
retrieve(
|
||||||
|
url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/"
|
||||||
f"xgboost4j-gpu_2.12-{version}.jar",
|
f"xgboost4j-gpu_2.12-{version}.jar",
|
||||||
filename=zip_path)
|
filename=zip_path,
|
||||||
|
)
|
||||||
os.mkdir(extract_dir)
|
os.mkdir(extract_dir)
|
||||||
with zipfile.ZipFile(zip_path, "r") as t:
|
with zipfile.ZipFile(zip_path, "r") as t:
|
||||||
t.extractall(extract_dir)
|
t.extractall(extract_dir)
|
||||||
cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
|
cp(
|
||||||
"xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so")
|
os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
|
||||||
|
"xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
|
||||||
|
)
|
||||||
|
|
||||||
print("====Next Steps====")
|
print("====Next Steps====")
|
||||||
print("1. Gain upload right to Maven Central repo.")
|
print("1. Gain upload right to Maven Central repo.")
|
||||||
print("1-1. Sign up for a JIRA account at Sonatype: ")
|
print("1-1. Sign up for a JIRA account at Sonatype: ")
|
||||||
print("1-2. File a JIRA ticket: "
|
print(
|
||||||
|
"1-2. File a JIRA ticket: "
|
||||||
"https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: "
|
"https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: "
|
||||||
"https://issues.sonatype.org/browse/OSSRH-67724")
|
"https://issues.sonatype.org/browse/OSSRH-67724"
|
||||||
print("2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
|
)
|
||||||
"https://central.sonatype.org/publish/publish-maven/")
|
print(
|
||||||
print("3. Now on a Mac machine, run:")
|
"2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
|
||||||
print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests")
|
"https://central.sonatype.org/publish/publish-maven/"
|
||||||
print("4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
|
)
|
||||||
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-1085 "
|
print(
|
||||||
|
"3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. "
|
||||||
|
"Make sure to use an Internet connection with fast upload speed:"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
" # Skip native build, since we have all needed native binaries from CI\n"
|
||||||
|
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
|
||||||
|
" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
|
||||||
|
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx "
|
||||||
"to inspect the staged JAR files. Finally, press Release button to publish the "
|
"to inspect the staged JAR files. Finally, press Release button to publish the "
|
||||||
"artifacts to the Maven Central repository.")
|
"artifacts to the Maven Central repository. The top-level metapackage should be "
|
||||||
|
"named xgboost-jvm_2.12."
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n"
|
||||||
|
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
|
||||||
|
" python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n"
|
||||||
|
" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. "
|
||||||
|
"The top-level metapackage should be named xgboost-jvm_2.13."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@ -329,7 +329,7 @@ Parameters for Linear Booster (``booster=gblinear``)
|
|||||||
- Choice of algorithm to fit linear model
|
- Choice of algorithm to fit linear model
|
||||||
|
|
||||||
- ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.
|
- ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.
|
||||||
- ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution.
|
- ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the ``device`` parameter is set to ``cuda`` or ``gpu``, a GPU variant would be used.
|
||||||
|
|
||||||
* ``feature_selector`` [default= ``cyclic``]
|
* ``feature_selector`` [default= ``cyclic``]
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*!
|
/**
|
||||||
* Copyright 2020 by Contributors
|
* Copyright 2020-2023, XGBoost Contributors
|
||||||
* \file global_config.h
|
* \file global_config.h
|
||||||
* \brief Global configuration for XGBoost
|
* \brief Global configuration for XGBoost
|
||||||
* \author Hyunsu Cho
|
* \author Hyunsu Cho
|
||||||
@ -7,24 +7,22 @@
|
|||||||
#ifndef XGBOOST_GLOBAL_CONFIG_H_
|
#ifndef XGBOOST_GLOBAL_CONFIG_H_
|
||||||
#define XGBOOST_GLOBAL_CONFIG_H_
|
#define XGBOOST_GLOBAL_CONFIG_H_
|
||||||
|
|
||||||
#include <xgboost/parameter.h>
|
#include <dmlc/thread_local.h> // for ThreadLocalStore
|
||||||
#include <vector>
|
#include <xgboost/parameter.h> // for XGBoostParameter
|
||||||
#include <string>
|
|
||||||
|
#include <cstdint> // for int32_t
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
class Json;
|
|
||||||
|
|
||||||
struct GlobalConfiguration : public XGBoostParameter<GlobalConfiguration> {
|
struct GlobalConfiguration : public XGBoostParameter<GlobalConfiguration> {
|
||||||
int verbosity { 1 };
|
std::int32_t verbosity{1};
|
||||||
bool use_rmm { false };
|
bool use_rmm{false};
|
||||||
DMLC_DECLARE_PARAMETER(GlobalConfiguration) {
|
DMLC_DECLARE_PARAMETER(GlobalConfiguration) {
|
||||||
DMLC_DECLARE_FIELD(verbosity)
|
DMLC_DECLARE_FIELD(verbosity)
|
||||||
.set_range(0, 3)
|
.set_range(0, 3)
|
||||||
.set_default(1) // shows only warning
|
.set_default(1) // shows only warning
|
||||||
.describe("Flag to print out detailed breakdown of runtime.");
|
.describe("Flag to print out detailed breakdown of runtime.");
|
||||||
DMLC_DECLARE_FIELD(use_rmm)
|
DMLC_DECLARE_FIELD(use_rmm).set_default(false).describe(
|
||||||
.set_default(false)
|
"Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
|
||||||
.describe("Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,6 @@
|
|||||||
|
|
||||||
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
|
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
|
||||||
#define XGBOOST_VER_MINOR 0 /* NOLINT */
|
#define XGBOOST_VER_MINOR 0 /* NOLINT */
|
||||||
#define XGBOOST_VER_PATCH 0 /* NOLINT */
|
#define XGBOOST_VER_PATCH 3 /* NOLINT */
|
||||||
|
|
||||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||||
|
|||||||
@ -25,4 +25,3 @@ target_include_directories(xgboost4j
|
|||||||
${PROJECT_SOURCE_DIR}/rabit/include)
|
${PROJECT_SOURCE_DIR}/rabit/include)
|
||||||
|
|
||||||
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
|
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
|
||||||
target_link_libraries(xgboost4j PRIVATE ${JAVA_JVM_LIBRARY})
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import errno
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import errno
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
@ -19,11 +19,10 @@ CONFIG = {
|
|||||||
"USE_HDFS": "OFF",
|
"USE_HDFS": "OFF",
|
||||||
"USE_AZURE": "OFF",
|
"USE_AZURE": "OFF",
|
||||||
"USE_S3": "OFF",
|
"USE_S3": "OFF",
|
||||||
|
|
||||||
"USE_CUDA": "OFF",
|
"USE_CUDA": "OFF",
|
||||||
"USE_NCCL": "OFF",
|
"USE_NCCL": "OFF",
|
||||||
"JVM_BINDINGS": "ON",
|
"JVM_BINDINGS": "ON",
|
||||||
"LOG_CAPI_INVOCATION": "OFF"
|
"LOG_CAPI_INVOCATION": "OFF",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -70,26 +69,22 @@ def normpath(path):
|
|||||||
return normalized
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def native_build(args):
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF')
|
|
||||||
parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
|
|
||||||
cli_args = parser.parse_args()
|
|
||||||
|
|
||||||
if sys.platform == "darwin":
|
if sys.platform == "darwin":
|
||||||
# Enable of your compiler supports OpenMP.
|
# Enable of your compiler supports OpenMP.
|
||||||
CONFIG["USE_OPENMP"] = "OFF"
|
CONFIG["USE_OPENMP"] = "OFF"
|
||||||
os.environ["JAVA_HOME"] = subprocess.check_output(
|
os.environ["JAVA_HOME"] = (
|
||||||
"/usr/libexec/java_home").strip().decode()
|
subprocess.check_output("/usr/libexec/java_home").strip().decode()
|
||||||
|
)
|
||||||
|
|
||||||
print("building Java wrapper")
|
print("building Java wrapper")
|
||||||
with cd(".."):
|
with cd(".."):
|
||||||
build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' else 'build'
|
build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build"
|
||||||
maybe_makedirs(build_dir)
|
maybe_makedirs(build_dir)
|
||||||
with cd(build_dir):
|
with cd(build_dir):
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
# Force x64 build on Windows.
|
# Force x64 build on Windows.
|
||||||
maybe_generator = ' -A x64'
|
maybe_generator = " -A x64"
|
||||||
else:
|
else:
|
||||||
maybe_generator = ""
|
maybe_generator = ""
|
||||||
if sys.platform == "linux":
|
if sys.platform == "linux":
|
||||||
@ -97,12 +92,12 @@ if __name__ == "__main__":
|
|||||||
else:
|
else:
|
||||||
maybe_parallel_build = ""
|
maybe_parallel_build = ""
|
||||||
|
|
||||||
if cli_args.log_capi_invocation == 'ON':
|
if cli_args.log_capi_invocation == "ON":
|
||||||
CONFIG['LOG_CAPI_INVOCATION'] = 'ON'
|
CONFIG["LOG_CAPI_INVOCATION"] = "ON"
|
||||||
|
|
||||||
if cli_args.use_cuda == 'ON':
|
if cli_args.use_cuda == "ON":
|
||||||
CONFIG['USE_CUDA'] = 'ON'
|
CONFIG["USE_CUDA"] = "ON"
|
||||||
CONFIG['USE_NCCL'] = 'ON'
|
CONFIG["USE_NCCL"] = "ON"
|
||||||
|
|
||||||
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
|
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
|
||||||
|
|
||||||
@ -115,7 +110,7 @@ if __name__ == "__main__":
|
|||||||
if gpu_arch_flag is not None:
|
if gpu_arch_flag is not None:
|
||||||
args.append("%s" % gpu_arch_flag)
|
args.append("%s" % gpu_arch_flag)
|
||||||
|
|
||||||
lib_dir = os.path.join(os.pardir, 'lib')
|
lib_dir = os.path.join(os.pardir, "lib")
|
||||||
if os.path.exists(lib_dir):
|
if os.path.exists(lib_dir):
|
||||||
shutil.rmtree(lib_dir)
|
shutil.rmtree(lib_dir)
|
||||||
run("cmake .. " + " ".join(args) + maybe_generator)
|
run("cmake .. " + " ".join(args) + maybe_generator)
|
||||||
@ -125,8 +120,10 @@ if __name__ == "__main__":
|
|||||||
run(f'"{sys.executable}" mapfeat.py')
|
run(f'"{sys.executable}" mapfeat.py')
|
||||||
run(f'"{sys.executable}" mknfold.py machine.txt 1')
|
run(f'"{sys.executable}" mknfold.py machine.txt 1')
|
||||||
|
|
||||||
xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j'
|
xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" else "xgboost4j"
|
||||||
xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j-spark'
|
xgboost4j_spark = (
|
||||||
|
"xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" else "xgboost4j-spark"
|
||||||
|
)
|
||||||
|
|
||||||
print("copying native library")
|
print("copying native library")
|
||||||
library_name, os_folder = {
|
library_name, os_folder = {
|
||||||
@ -141,14 +138,19 @@ if __name__ == "__main__":
|
|||||||
"i86pc": "x86_64", # on Solaris x86_64
|
"i86pc": "x86_64", # on Solaris x86_64
|
||||||
"sun4v": "sparc", # on Solaris sparc
|
"sun4v": "sparc", # on Solaris sparc
|
||||||
"arm64": "aarch64", # on macOS & Windows ARM 64-bit
|
"arm64": "aarch64", # on macOS & Windows ARM 64-bit
|
||||||
"aarch64": "aarch64"
|
"aarch64": "aarch64",
|
||||||
}[platform.machine().lower()]
|
}[platform.machine().lower()]
|
||||||
output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder)
|
output_folder = "{}/src/main/resources/lib/{}/{}".format(
|
||||||
|
xgboost4j, os_folder, arch_folder
|
||||||
|
)
|
||||||
maybe_makedirs(output_folder)
|
maybe_makedirs(output_folder)
|
||||||
cp("../lib/" + library_name, output_folder)
|
cp("../lib/" + library_name, output_folder)
|
||||||
|
|
||||||
print("copying pure-Python tracker")
|
print("copying pure-Python tracker")
|
||||||
cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j))
|
cp(
|
||||||
|
"../python-package/xgboost/tracker.py",
|
||||||
|
"{}/src/main/resources".format(xgboost4j),
|
||||||
|
)
|
||||||
|
|
||||||
print("copying train/test files")
|
print("copying train/test files")
|
||||||
maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
|
maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
|
||||||
@ -164,3 +166,18 @@ if __name__ == "__main__":
|
|||||||
maybe_makedirs("{}/src/test/resources".format(xgboost4j))
|
maybe_makedirs("{}/src/test/resources".format(xgboost4j))
|
||||||
for file in glob.glob("../demo/data/agaricus.*"):
|
for file in glob.glob("../demo/data/agaricus.*"):
|
||||||
cp(file, "{}/src/test/resources".format(xgboost4j))
|
cp(file, "{}/src/test/resources".format(xgboost4j))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if "MAVEN_SKIP_NATIVE_BUILD" in os.environ:
|
||||||
|
print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...")
|
||||||
|
else:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--use-cuda", type=str, choices=["ON", "OFF"], default="OFF"
|
||||||
|
)
|
||||||
|
cli_args = parser.parse_args()
|
||||||
|
native_build(cli_args)
|
||||||
|
|||||||
@ -5,8 +5,8 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
@ -189,6 +189,93 @@
|
|||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
</profile>
|
</profile>
|
||||||
|
<profile>
|
||||||
|
<id>release-cpu-only</id>
|
||||||
|
<modules>
|
||||||
|
<module>xgboost4j</module>
|
||||||
|
<module>xgboost4j-example</module>
|
||||||
|
<module>xgboost4j-spark</module>
|
||||||
|
<module>xgboost4j-flink</module>
|
||||||
|
</modules>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
|
<version>3.3.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>empty-javadoc-jar</id>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>jar</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<classifier>javadoc</classifier>
|
||||||
|
<classesDirectory>${basedir}/javadoc</classesDirectory>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-release-plugin</artifactId>
|
||||||
|
<version>3.0.1</version>
|
||||||
|
<configuration>
|
||||||
|
<autoVersionSubmodules>true</autoVersionSubmodules>
|
||||||
|
<useReleaseProfile>false</useReleaseProfile>
|
||||||
|
<releaseProfiles>release</releaseProfiles>
|
||||||
|
<goals>deploy</goals>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-gpg-plugin</artifactId>
|
||||||
|
<version>3.1.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>sign-artifacts</id>
|
||||||
|
<phase>verify</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>sign</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-source-plugin</artifactId>
|
||||||
|
<version>3.3.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>attach-sources</id>
|
||||||
|
<goals>
|
||||||
|
<goal>jar-no-fork</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.sonatype.plugins</groupId>
|
||||||
|
<artifactId>nexus-staging-maven-plugin</artifactId>
|
||||||
|
<version>1.6.13</version>
|
||||||
|
<extensions>true</extensions>
|
||||||
|
<configuration>
|
||||||
|
<serverId>ossrh</serverId>
|
||||||
|
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
|
||||||
|
<autoReleaseAfterClose>false</autoReleaseAfterClose>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<skipTests>true</skipTests>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
<profile>
|
<profile>
|
||||||
<id>assembly</id>
|
<id>assembly</id>
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@ -5,12 +5,12 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<name>xgboost4j-example</name>
|
<name>xgboost4j-example</name>
|
||||||
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@ -26,7 +26,7 @@
|
|||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
@ -37,7 +37,7 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|||||||
@ -5,13 +5,13 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<name>xgboost4j-flink</name>
|
<name>xgboost4j-flink</name>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
<properties>
|
<properties>
|
||||||
<flink-ml.version>2.2.0</flink-ml.version>
|
<flink-ml.version>2.2.0</flink-ml.version>
|
||||||
</properties>
|
</properties>
|
||||||
@ -30,7 +30,7 @@
|
|||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|||||||
@ -5,12 +5,12 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||||
<name>xgboost4j-gpu</name>
|
<name>xgboost4j-gpu</name>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@ -5,11 +5,11 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<name>xgboost4j-spark-gpu</name>
|
<name>xgboost4j-spark-gpu</name>
|
||||||
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@ -24,7 +24,7 @@
|
|||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|||||||
@ -5,11 +5,11 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<name>xgboost4j-spark</name>
|
<name>xgboost4j-spark</name>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@ -24,7 +24,7 @@
|
|||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|||||||
@ -5,12 +5,12 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<name>xgboost4j</name>
|
<name>xgboost4j</name>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.3</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@ -132,17 +132,29 @@ def locate_or_build_libxgboost(
|
|||||||
|
|
||||||
if build_config.use_system_libxgboost:
|
if build_config.use_system_libxgboost:
|
||||||
# Find libxgboost from system prefix
|
# Find libxgboost from system prefix
|
||||||
sys_base_prefix = pathlib.Path(sys.base_prefix).absolute().resolve()
|
sys_prefix = pathlib.Path(sys.base_prefix)
|
||||||
libxgboost_sys = sys_base_prefix / "lib" / _lib_name()
|
sys_prefix_candidates = [
|
||||||
if not libxgboost_sys.exists():
|
sys_prefix / "lib",
|
||||||
|
# Paths possibly used on Windows
|
||||||
|
sys_prefix / "bin",
|
||||||
|
sys_prefix / "Library",
|
||||||
|
sys_prefix / "Library" / "bin",
|
||||||
|
sys_prefix / "Library" / "lib",
|
||||||
|
]
|
||||||
|
sys_prefix_candidates = [
|
||||||
|
p.expanduser().resolve() for p in sys_prefix_candidates
|
||||||
|
]
|
||||||
|
for candidate_dir in sys_prefix_candidates:
|
||||||
|
libtreelite_sys = candidate_dir / _lib_name()
|
||||||
|
if libtreelite_sys.exists():
|
||||||
|
logger.info("Using system XGBoost: %s", str(libtreelite_sys))
|
||||||
|
return libtreelite_sys
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"use_system_libxgboost was specified but {_lib_name()} is "
|
f"use_system_libxgboost was specified but {_lib_name()} is "
|
||||||
f"not found in {libxgboost_sys.parent}"
|
f"not found. Paths searched (in order): \n"
|
||||||
|
+ "\n".join([f"* {str(p)}" for p in sys_prefix_candidates])
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Using system XGBoost: %s", str(libxgboost_sys))
|
|
||||||
return libxgboost_sys
|
|
||||||
|
|
||||||
libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
|
libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
|
||||||
if libxgboost is not None:
|
if libxgboost is not None:
|
||||||
return libxgboost
|
return libxgboost
|
||||||
|
|||||||
@ -7,7 +7,7 @@ build-backend = "packager.pep517"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "xgboost"
|
name = "xgboost"
|
||||||
version = "2.0.0-dev"
|
version = "2.0.3"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
|
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
|
||||||
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
|
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
2.0.0-dev
|
2.0.3
|
||||||
|
|||||||
@ -88,6 +88,18 @@ def is_cudf_available() -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_cupy_available() -> bool:
|
||||||
|
"""Check cupy package available or not"""
|
||||||
|
if importlib.util.find_spec("cupy") is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
import cupy
|
||||||
|
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import scipy.sparse as scipy_sparse
|
import scipy.sparse as scipy_sparse
|
||||||
from scipy.sparse import csr_matrix as scipy_csr
|
from scipy.sparse import csr_matrix as scipy_csr
|
||||||
|
|||||||
@ -206,6 +206,7 @@ def _load_lib() -> ctypes.CDLL:
|
|||||||
lib = ctypes.cdll.LoadLibrary(lib_path)
|
lib = ctypes.cdll.LoadLibrary(lib_path)
|
||||||
setattr(lib, "path", os.path.normpath(lib_path))
|
setattr(lib, "path", os.path.normpath(lib_path))
|
||||||
lib_success = True
|
lib_success = True
|
||||||
|
break
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
os_error_list.append(str(e))
|
os_error_list.append(str(e))
|
||||||
continue
|
continue
|
||||||
@ -2399,6 +2400,7 @@ class Booster:
|
|||||||
_is_cudf_df,
|
_is_cudf_df,
|
||||||
_is_cupy_array,
|
_is_cupy_array,
|
||||||
_is_list,
|
_is_list,
|
||||||
|
_is_np_array_like,
|
||||||
_is_pandas_df,
|
_is_pandas_df,
|
||||||
_is_pandas_series,
|
_is_pandas_series,
|
||||||
_is_tuple,
|
_is_tuple,
|
||||||
@ -2428,7 +2430,7 @@ class Booster:
|
|||||||
f"got {data.shape[1]}"
|
f"got {data.shape[1]}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(data, np.ndarray):
|
if _is_np_array_like(data):
|
||||||
from .data import _ensure_np_dtype
|
from .data import _ensure_np_dtype
|
||||||
|
|
||||||
data, _ = _ensure_np_dtype(data, data.dtype)
|
data, _ = _ensure_np_dtype(data, data.dtype)
|
||||||
|
|||||||
@ -78,7 +78,6 @@ from .data import _is_cudf_ser, _is_cupy_array
|
|||||||
from .sklearn import (
|
from .sklearn import (
|
||||||
XGBClassifier,
|
XGBClassifier,
|
||||||
XGBClassifierBase,
|
XGBClassifierBase,
|
||||||
XGBClassifierMixIn,
|
|
||||||
XGBModel,
|
XGBModel,
|
||||||
XGBRanker,
|
XGBRanker,
|
||||||
XGBRankerMixIn,
|
XGBRankerMixIn,
|
||||||
@ -1854,7 +1853,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
|
|||||||
"Implementation of the scikit-learn API for XGBoost classification.",
|
"Implementation of the scikit-learn API for XGBoost classification.",
|
||||||
["estimators", "model"],
|
["estimators", "model"],
|
||||||
)
|
)
|
||||||
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBase):
|
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
||||||
# pylint: disable=missing-class-docstring
|
# pylint: disable=missing-class-docstring
|
||||||
async def _fit_async(
|
async def _fit_async(
|
||||||
self,
|
self,
|
||||||
@ -2036,10 +2035,6 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBa
|
|||||||
preds = da.map_blocks(_argmax, pred_probs, drop_axis=1)
|
preds = da.map_blocks(_argmax, pred_probs, drop_axis=1)
|
||||||
return preds
|
return preds
|
||||||
|
|
||||||
def load_model(self, fname: ModelIn) -> None:
|
|
||||||
super().load_model(fname)
|
|
||||||
self._load_model_attributes(self.get_booster())
|
|
||||||
|
|
||||||
|
|
||||||
@xgboost_model_doc(
|
@xgboost_model_doc(
|
||||||
"""Implementation of the Scikit-Learn API for XGBoost Ranking.
|
"""Implementation of the Scikit-Learn API for XGBoost Ranking.
|
||||||
|
|||||||
@ -164,8 +164,8 @@ def _is_scipy_coo(data: DataType) -> bool:
|
|||||||
return isinstance(data, scipy.sparse.coo_matrix)
|
return isinstance(data, scipy.sparse.coo_matrix)
|
||||||
|
|
||||||
|
|
||||||
def _is_numpy_array(data: DataType) -> bool:
|
def _is_np_array_like(data: DataType) -> bool:
|
||||||
return isinstance(data, (np.ndarray, np.matrix))
|
return hasattr(data, "__array_interface__")
|
||||||
|
|
||||||
|
|
||||||
def _ensure_np_dtype(
|
def _ensure_np_dtype(
|
||||||
@ -317,7 +317,6 @@ def pandas_feature_info(
|
|||||||
) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]:
|
) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]:
|
||||||
"""Handle feature info for pandas dataframe."""
|
"""Handle feature info for pandas dataframe."""
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.api.types import is_categorical_dtype, is_sparse
|
|
||||||
|
|
||||||
# handle feature names
|
# handle feature names
|
||||||
if feature_names is None and meta is None:
|
if feature_names is None and meta is None:
|
||||||
@ -332,10 +331,10 @@ def pandas_feature_info(
|
|||||||
if feature_types is None and meta is None:
|
if feature_types is None and meta is None:
|
||||||
feature_types = []
|
feature_types = []
|
||||||
for dtype in data.dtypes:
|
for dtype in data.dtypes:
|
||||||
if is_sparse(dtype):
|
if is_pd_sparse_dtype(dtype):
|
||||||
feature_types.append(_pandas_dtype_mapper[dtype.subtype.name])
|
feature_types.append(_pandas_dtype_mapper[dtype.subtype.name])
|
||||||
elif (
|
elif (
|
||||||
is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
|
is_pd_cat_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
|
||||||
) and enable_categorical:
|
) and enable_categorical:
|
||||||
feature_types.append(CAT_T)
|
feature_types.append(CAT_T)
|
||||||
else:
|
else:
|
||||||
@ -345,18 +344,13 @@ def pandas_feature_info(
|
|||||||
|
|
||||||
def is_nullable_dtype(dtype: PandasDType) -> bool:
|
def is_nullable_dtype(dtype: PandasDType) -> bool:
|
||||||
"""Whether dtype is a pandas nullable type."""
|
"""Whether dtype is a pandas nullable type."""
|
||||||
from pandas.api.types import (
|
from pandas.api.types import is_bool_dtype, is_float_dtype, is_integer_dtype
|
||||||
is_bool_dtype,
|
|
||||||
is_categorical_dtype,
|
|
||||||
is_float_dtype,
|
|
||||||
is_integer_dtype,
|
|
||||||
)
|
|
||||||
|
|
||||||
is_int = is_integer_dtype(dtype) and dtype.name in pandas_nullable_mapper
|
is_int = is_integer_dtype(dtype) and dtype.name in pandas_nullable_mapper
|
||||||
# np.bool has alias `bool`, while pd.BooleanDtype has `boolean`.
|
# np.bool has alias `bool`, while pd.BooleanDtype has `boolean`.
|
||||||
is_bool = is_bool_dtype(dtype) and dtype.name == "boolean"
|
is_bool = is_bool_dtype(dtype) and dtype.name == "boolean"
|
||||||
is_float = is_float_dtype(dtype) and dtype.name in pandas_nullable_mapper
|
is_float = is_float_dtype(dtype) and dtype.name in pandas_nullable_mapper
|
||||||
return is_int or is_bool or is_float or is_categorical_dtype(dtype)
|
return is_int or is_bool or is_float or is_pd_cat_dtype(dtype)
|
||||||
|
|
||||||
|
|
||||||
def is_pa_ext_dtype(dtype: Any) -> bool:
|
def is_pa_ext_dtype(dtype: Any) -> bool:
|
||||||
@ -371,17 +365,48 @@ def is_pa_ext_categorical_dtype(dtype: Any) -> bool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_pd_cat_dtype(dtype: PandasDType) -> bool:
|
||||||
|
"""Wrapper for testing pandas category type."""
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
if hasattr(pd.util, "version") and hasattr(pd.util.version, "Version"):
|
||||||
|
Version = pd.util.version.Version
|
||||||
|
if Version(pd.__version__) >= Version("2.1.0"):
|
||||||
|
from pandas import CategoricalDtype
|
||||||
|
|
||||||
|
return isinstance(dtype, CategoricalDtype)
|
||||||
|
|
||||||
|
from pandas.api.types import is_categorical_dtype
|
||||||
|
|
||||||
|
return is_categorical_dtype(dtype)
|
||||||
|
|
||||||
|
|
||||||
|
def is_pd_sparse_dtype(dtype: PandasDType) -> bool:
|
||||||
|
"""Wrapper for testing pandas sparse type."""
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
if hasattr(pd.util, "version") and hasattr(pd.util.version, "Version"):
|
||||||
|
Version = pd.util.version.Version
|
||||||
|
if Version(pd.__version__) >= Version("2.1.0"):
|
||||||
|
from pandas import SparseDtype
|
||||||
|
|
||||||
|
return isinstance(dtype, SparseDtype)
|
||||||
|
|
||||||
|
from pandas.api.types import is_sparse
|
||||||
|
|
||||||
|
return is_sparse(dtype)
|
||||||
|
|
||||||
|
|
||||||
def pandas_cat_null(data: DataFrame) -> DataFrame:
|
def pandas_cat_null(data: DataFrame) -> DataFrame:
|
||||||
"""Handle categorical dtype and nullable extension types from pandas."""
|
"""Handle categorical dtype and nullable extension types from pandas."""
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.api.types import is_categorical_dtype
|
|
||||||
|
|
||||||
# handle category codes and nullable.
|
# handle category codes and nullable.
|
||||||
cat_columns = []
|
cat_columns = []
|
||||||
nul_columns = []
|
nul_columns = []
|
||||||
# avoid an unnecessary conversion if possible
|
# avoid an unnecessary conversion if possible
|
||||||
for col, dtype in zip(data.columns, data.dtypes):
|
for col, dtype in zip(data.columns, data.dtypes):
|
||||||
if is_categorical_dtype(dtype):
|
if is_pd_cat_dtype(dtype):
|
||||||
cat_columns.append(col)
|
cat_columns.append(col)
|
||||||
elif is_pa_ext_categorical_dtype(dtype):
|
elif is_pa_ext_categorical_dtype(dtype):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -398,7 +423,7 @@ def pandas_cat_null(data: DataFrame) -> DataFrame:
|
|||||||
transformed = data
|
transformed = data
|
||||||
|
|
||||||
def cat_codes(ser: pd.Series) -> pd.Series:
|
def cat_codes(ser: pd.Series) -> pd.Series:
|
||||||
if is_categorical_dtype(ser.dtype):
|
if is_pd_cat_dtype(ser.dtype):
|
||||||
return ser.cat.codes
|
return ser.cat.codes
|
||||||
assert is_pa_ext_categorical_dtype(ser.dtype)
|
assert is_pa_ext_categorical_dtype(ser.dtype)
|
||||||
# Not yet supported, the index is not ordered for some reason. Alternately:
|
# Not yet supported, the index is not ordered for some reason. Alternately:
|
||||||
@ -454,14 +479,12 @@ def _transform_pandas_df(
|
|||||||
meta: Optional[str] = None,
|
meta: Optional[str] = None,
|
||||||
meta_type: Optional[NumpyDType] = None,
|
meta_type: Optional[NumpyDType] = None,
|
||||||
) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]:
|
) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]:
|
||||||
from pandas.api.types import is_categorical_dtype, is_sparse
|
|
||||||
|
|
||||||
pyarrow_extension = False
|
pyarrow_extension = False
|
||||||
for dtype in data.dtypes:
|
for dtype in data.dtypes:
|
||||||
if not (
|
if not (
|
||||||
(dtype.name in _pandas_dtype_mapper)
|
(dtype.name in _pandas_dtype_mapper)
|
||||||
or is_sparse(dtype)
|
or is_pd_sparse_dtype(dtype)
|
||||||
or (is_categorical_dtype(dtype) and enable_categorical)
|
or (is_pd_cat_dtype(dtype) and enable_categorical)
|
||||||
or is_pa_ext_dtype(dtype)
|
or is_pa_ext_dtype(dtype)
|
||||||
):
|
):
|
||||||
_invalid_dataframe_dtype(data)
|
_invalid_dataframe_dtype(data)
|
||||||
@ -515,9 +538,8 @@ def _meta_from_pandas_series(
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Help transform pandas series for meta data like labels"""
|
"""Help transform pandas series for meta data like labels"""
|
||||||
data = data.values.astype("float")
|
data = data.values.astype("float")
|
||||||
from pandas.api.types import is_sparse
|
|
||||||
|
|
||||||
if is_sparse(data):
|
if is_pd_sparse_dtype(getattr(data, "dtype", data)):
|
||||||
data = data.to_dense() # type: ignore
|
data = data.to_dense() # type: ignore
|
||||||
assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
|
assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
|
||||||
_meta_from_numpy(data, name, dtype, handle)
|
_meta_from_numpy(data, name, dtype, handle)
|
||||||
@ -539,13 +561,11 @@ def _from_pandas_series(
|
|||||||
feature_names: Optional[FeatureNames],
|
feature_names: Optional[FeatureNames],
|
||||||
feature_types: Optional[FeatureTypes],
|
feature_types: Optional[FeatureTypes],
|
||||||
) -> DispatchedDataBackendReturnType:
|
) -> DispatchedDataBackendReturnType:
|
||||||
from pandas.api.types import is_categorical_dtype
|
|
||||||
|
|
||||||
if (data.dtype.name not in _pandas_dtype_mapper) and not (
|
if (data.dtype.name not in _pandas_dtype_mapper) and not (
|
||||||
is_categorical_dtype(data.dtype) and enable_categorical
|
is_pd_cat_dtype(data.dtype) and enable_categorical
|
||||||
):
|
):
|
||||||
_invalid_dataframe_dtype(data)
|
_invalid_dataframe_dtype(data)
|
||||||
if enable_categorical and is_categorical_dtype(data.dtype):
|
if enable_categorical and is_pd_cat_dtype(data.dtype):
|
||||||
data = data.cat.codes
|
data = data.cat.codes
|
||||||
return _from_numpy_array(
|
return _from_numpy_array(
|
||||||
data.values.reshape(data.shape[0], 1).astype("float"),
|
data.values.reshape(data.shape[0], 1).astype("float"),
|
||||||
@ -1051,7 +1071,7 @@ def dispatch_data_backend(
|
|||||||
return _from_scipy_csr(
|
return _from_scipy_csr(
|
||||||
data.tocsr(), missing, threads, feature_names, feature_types
|
data.tocsr(), missing, threads, feature_names, feature_types
|
||||||
)
|
)
|
||||||
if _is_numpy_array(data):
|
if _is_np_array_like(data):
|
||||||
return _from_numpy_array(
|
return _from_numpy_array(
|
||||||
data, missing, threads, feature_names, feature_types, data_split_mode
|
data, missing, threads, feature_names, feature_types, data_split_mode
|
||||||
)
|
)
|
||||||
@ -1194,7 +1214,7 @@ def dispatch_meta_backend(
|
|||||||
if _is_tuple(data):
|
if _is_tuple(data):
|
||||||
_meta_from_tuple(data, name, dtype, handle)
|
_meta_from_tuple(data, name, dtype, handle)
|
||||||
return
|
return
|
||||||
if _is_numpy_array(data):
|
if _is_np_array_like(data):
|
||||||
_meta_from_numpy(data, name, dtype, handle)
|
_meta_from_numpy(data, name, dtype, handle)
|
||||||
return
|
return
|
||||||
if _is_pandas_df(data):
|
if _is_pandas_df(data):
|
||||||
@ -1281,7 +1301,7 @@ def _proxy_transform(
|
|||||||
return _transform_dlpack(data), None, feature_names, feature_types
|
return _transform_dlpack(data), None, feature_names, feature_types
|
||||||
if _is_list(data) or _is_tuple(data):
|
if _is_list(data) or _is_tuple(data):
|
||||||
data = np.array(data)
|
data = np.array(data)
|
||||||
if _is_numpy_array(data):
|
if _is_np_array_like(data):
|
||||||
data, _ = _ensure_np_dtype(data, data.dtype)
|
data, _ = _ensure_np_dtype(data, data.dtype)
|
||||||
return data, None, feature_names, feature_types
|
return data, None, feature_names, feature_types
|
||||||
if _is_scipy_csr(data):
|
if _is_scipy_csr(data):
|
||||||
@ -1331,7 +1351,7 @@ def dispatch_proxy_set_data(
|
|||||||
if not allow_host:
|
if not allow_host:
|
||||||
raise err
|
raise err
|
||||||
|
|
||||||
if _is_numpy_array(data):
|
if _is_np_array_like(data):
|
||||||
_check_data_shape(data)
|
_check_data_shape(data)
|
||||||
proxy._set_data_from_array(data) # pylint: disable=W0212
|
proxy._set_data_from_array(data) # pylint: disable=W0212
|
||||||
return
|
return
|
||||||
|
|||||||
@ -31,16 +31,15 @@ def find_lib_path() -> List[str]:
|
|||||||
]
|
]
|
||||||
|
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
if platform.architecture()[0] == "64bit":
|
# On Windows, Conda may install libs in different paths
|
||||||
dll_path.append(os.path.join(curr_path, "../../windows/x64/Release/"))
|
dll_path.extend(
|
||||||
# hack for pip installation when copy all parent source
|
[
|
||||||
# directory here
|
os.path.join(sys.base_prefix, "bin"),
|
||||||
dll_path.append(os.path.join(curr_path, "./windows/x64/Release/"))
|
os.path.join(sys.base_prefix, "Library"),
|
||||||
else:
|
os.path.join(sys.base_prefix, "Library", "bin"),
|
||||||
dll_path.append(os.path.join(curr_path, "../../windows/Release/"))
|
os.path.join(sys.base_prefix, "Library", "lib"),
|
||||||
# hack for pip installation when copy all parent source
|
]
|
||||||
# directory here
|
)
|
||||||
dll_path.append(os.path.join(curr_path, "./windows/Release/"))
|
|
||||||
dll_path = [os.path.join(p, "xgboost.dll") for p in dll_path]
|
dll_path = [os.path.join(p, "xgboost.dll") for p in dll_path]
|
||||||
elif sys.platform.startswith(("linux", "freebsd", "emscripten")):
|
elif sys.platform.startswith(("linux", "freebsd", "emscripten")):
|
||||||
dll_path = [os.path.join(p, "libxgboost.so") for p in dll_path]
|
dll_path = [os.path.join(p, "libxgboost.so") for p in dll_path]
|
||||||
|
|||||||
@ -43,19 +43,6 @@ from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df
|
|||||||
from .training import train
|
from .training import train
|
||||||
|
|
||||||
|
|
||||||
class XGBClassifierMixIn: # pylint: disable=too-few-public-methods
|
|
||||||
"""MixIn for classification."""
|
|
||||||
|
|
||||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def _load_model_attributes(self, booster: Booster) -> None:
|
|
||||||
config = json.loads(booster.save_config())
|
|
||||||
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
|
|
||||||
# binary classification is treated as regression in XGBoost.
|
|
||||||
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
|
|
||||||
|
|
||||||
|
|
||||||
class XGBRankerMixIn: # pylint: disable=too-few-public-methods
|
class XGBRankerMixIn: # pylint: disable=too-few-public-methods
|
||||||
"""MixIn for ranking, defines the _estimator_type usually defined in scikit-learn
|
"""MixIn for ranking, defines the _estimator_type usually defined in scikit-learn
|
||||||
base classes.
|
base classes.
|
||||||
@ -845,9 +832,7 @@ class XGBModel(XGBModelBase):
|
|||||||
self.get_booster().load_model(fname)
|
self.get_booster().load_model(fname)
|
||||||
|
|
||||||
meta_str = self.get_booster().attr("scikit_learn")
|
meta_str = self.get_booster().attr("scikit_learn")
|
||||||
if meta_str is None:
|
if meta_str is not None:
|
||||||
return
|
|
||||||
|
|
||||||
meta = json.loads(meta_str)
|
meta = json.loads(meta_str)
|
||||||
t = meta.get("_estimator_type", None)
|
t = meta.get("_estimator_type", None)
|
||||||
if t is not None and t != self._get_type():
|
if t is not None and t != self._get_type():
|
||||||
@ -855,11 +840,30 @@ class XGBModel(XGBModelBase):
|
|||||||
"Loading an estimator with different type. Expecting: "
|
"Loading an estimator with different type. Expecting: "
|
||||||
f"{self._get_type()}, got: {t}"
|
f"{self._get_type()}, got: {t}"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.feature_types = self.get_booster().feature_types
|
self.feature_types = self.get_booster().feature_types
|
||||||
self.get_booster().set_attr(scikit_learn=None)
|
self.get_booster().set_attr(scikit_learn=None)
|
||||||
|
config = json.loads(self.get_booster().save_config())
|
||||||
|
self._load_model_attributes(config)
|
||||||
|
|
||||||
load_model.__doc__ = f"""{Booster.load_model.__doc__}"""
|
load_model.__doc__ = f"""{Booster.load_model.__doc__}"""
|
||||||
|
|
||||||
|
def _load_model_attributes(self, config: dict) -> None:
|
||||||
|
"""Load model attributes without hyper-parameters."""
|
||||||
|
from sklearn.base import is_classifier
|
||||||
|
|
||||||
|
booster = self.get_booster()
|
||||||
|
|
||||||
|
self.objective = config["learner"]["objective"]["name"]
|
||||||
|
self.booster = config["learner"]["gradient_booster"]["name"]
|
||||||
|
self.base_score = config["learner"]["learner_model_param"]["base_score"]
|
||||||
|
self.feature_types = booster.feature_types
|
||||||
|
|
||||||
|
if is_classifier(self):
|
||||||
|
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
|
||||||
|
# binary classification is treated as regression in XGBoost.
|
||||||
|
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
|
||||||
|
|
||||||
# pylint: disable=too-many-branches
|
# pylint: disable=too-many-branches
|
||||||
def _configure_fit(
|
def _configure_fit(
|
||||||
self,
|
self,
|
||||||
@ -1409,7 +1413,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) ->
|
|||||||
Number of boosting rounds.
|
Number of boosting rounds.
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
|
class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||||
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
|
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
|
||||||
@_deprecate_positional_args
|
@_deprecate_positional_args
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -1637,10 +1641,6 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
|
|||||||
def classes_(self) -> np.ndarray:
|
def classes_(self) -> np.ndarray:
|
||||||
return np.arange(self.n_classes_)
|
return np.arange(self.n_classes_)
|
||||||
|
|
||||||
def load_model(self, fname: ModelIn) -> None:
|
|
||||||
super().load_model(fname)
|
|
||||||
self._load_model_attributes(self.get_booster())
|
|
||||||
|
|
||||||
|
|
||||||
@xgboost_model_doc(
|
@xgboost_model_doc(
|
||||||
"scikit-learn API for XGBoost random forest classification.",
|
"scikit-learn API for XGBoost random forest classification.",
|
||||||
@ -2093,7 +2093,17 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
X, qid = _get_qid(X, None)
|
X, qid = _get_qid(X, None)
|
||||||
Xyq = DMatrix(X, y, qid=qid)
|
# fixme(jiamingy): base margin and group weight is not yet supported. We might
|
||||||
|
# need to make extra special fields in the dataframe.
|
||||||
|
Xyq = DMatrix(
|
||||||
|
X,
|
||||||
|
y,
|
||||||
|
qid=qid,
|
||||||
|
missing=self.missing,
|
||||||
|
enable_categorical=self.enable_categorical,
|
||||||
|
nthread=self.n_jobs,
|
||||||
|
feature_types=self.feature_types,
|
||||||
|
)
|
||||||
if callable(self.eval_metric):
|
if callable(self.eval_metric):
|
||||||
metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
|
metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
|
||||||
result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric)
|
result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric)
|
||||||
|
|||||||
@ -22,7 +22,7 @@ from typing import (
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pyspark import SparkContext, cloudpickle
|
from pyspark import RDD, SparkContext, cloudpickle
|
||||||
from pyspark.ml import Estimator, Model
|
from pyspark.ml import Estimator, Model
|
||||||
from pyspark.ml.functions import array_to_vector, vector_to_array
|
from pyspark.ml.functions import array_to_vector, vector_to_array
|
||||||
from pyspark.ml.linalg import VectorUDT
|
from pyspark.ml.linalg import VectorUDT
|
||||||
@ -44,6 +44,7 @@ from pyspark.ml.util import (
|
|||||||
MLWritable,
|
MLWritable,
|
||||||
MLWriter,
|
MLWriter,
|
||||||
)
|
)
|
||||||
|
from pyspark.resource import ResourceProfileBuilder, TaskResourceRequests
|
||||||
from pyspark.sql import Column, DataFrame
|
from pyspark.sql import Column, DataFrame
|
||||||
from pyspark.sql.functions import col, countDistinct, pandas_udf, rand, struct
|
from pyspark.sql.functions import col, countDistinct, pandas_udf, rand, struct
|
||||||
from pyspark.sql.types import (
|
from pyspark.sql.types import (
|
||||||
@ -59,11 +60,12 @@ from scipy.special import expit, softmax # pylint: disable=no-name-in-module
|
|||||||
|
|
||||||
import xgboost
|
import xgboost
|
||||||
from xgboost import XGBClassifier
|
from xgboost import XGBClassifier
|
||||||
from xgboost.compat import is_cudf_available
|
from xgboost.compat import is_cudf_available, is_cupy_available
|
||||||
from xgboost.core import Booster, _check_distributed_params
|
from xgboost.core import Booster, _check_distributed_params
|
||||||
from xgboost.sklearn import DEFAULT_N_ESTIMATORS, XGBModel, _can_use_qdm
|
from xgboost.sklearn import DEFAULT_N_ESTIMATORS, XGBModel, _can_use_qdm
|
||||||
from xgboost.training import train as worker_train
|
from xgboost.training import train as worker_train
|
||||||
|
|
||||||
|
from .._typing import ArrayLike
|
||||||
from .data import (
|
from .data import (
|
||||||
_read_csr_matrix_from_unwrapped_spark_vec,
|
_read_csr_matrix_from_unwrapped_spark_vec,
|
||||||
alias,
|
alias,
|
||||||
@ -87,6 +89,7 @@ from .utils import (
|
|||||||
_get_rabit_args,
|
_get_rabit_args,
|
||||||
_get_spark_session,
|
_get_spark_session,
|
||||||
_is_local,
|
_is_local,
|
||||||
|
_is_standalone_or_localcluster,
|
||||||
deserialize_booster,
|
deserialize_booster,
|
||||||
deserialize_xgb_model,
|
deserialize_xgb_model,
|
||||||
get_class_name,
|
get_class_name,
|
||||||
@ -241,6 +244,13 @@ class _SparkXGBParams(
|
|||||||
TypeConverters.toList,
|
TypeConverters.toList,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def set_device(self, value: str) -> "_SparkXGBParams":
|
||||||
|
"""Set device, optional value: cpu, cuda, gpu"""
|
||||||
|
_check_distributed_params({"device": value})
|
||||||
|
assert value in ("cpu", "cuda", "gpu")
|
||||||
|
self.set(self.device, value)
|
||||||
|
return self
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _xgb_cls(cls) -> Type[XGBModel]:
|
def _xgb_cls(cls) -> Type[XGBModel]:
|
||||||
"""
|
"""
|
||||||
@ -334,6 +344,54 @@ class _SparkXGBParams(
|
|||||||
predict_params[param.name] = self.getOrDefault(param)
|
predict_params[param.name] = self.getOrDefault(param)
|
||||||
return predict_params
|
return predict_params
|
||||||
|
|
||||||
|
def _validate_gpu_params(self) -> None:
|
||||||
|
"""Validate the gpu parameters and gpu configurations"""
|
||||||
|
|
||||||
|
if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu):
|
||||||
|
ss = _get_spark_session()
|
||||||
|
sc = ss.sparkContext
|
||||||
|
|
||||||
|
if _is_local(sc):
|
||||||
|
# Support GPU training in Spark local mode is just for debugging
|
||||||
|
# purposes, so it's okay for printing the below warning instead of
|
||||||
|
# checking the real gpu numbers and raising the exception.
|
||||||
|
get_logger(self.__class__.__name__).warning(
|
||||||
|
"You have enabled GPU in spark local mode. Please make sure your"
|
||||||
|
" local node has at least %d GPUs",
|
||||||
|
self.getOrDefault(self.num_workers),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
executor_gpus = sc.getConf().get("spark.executor.resource.gpu.amount")
|
||||||
|
if executor_gpus is None:
|
||||||
|
raise ValueError(
|
||||||
|
"The `spark.executor.resource.gpu.amount` is required for training"
|
||||||
|
" on GPU."
|
||||||
|
)
|
||||||
|
|
||||||
|
if not (ss.version >= "3.4.0" and _is_standalone_or_localcluster(sc)):
|
||||||
|
# We will enable stage-level scheduling in spark 3.4.0+ which doesn't
|
||||||
|
# require spark.task.resource.gpu.amount to be set explicitly
|
||||||
|
gpu_per_task = sc.getConf().get("spark.task.resource.gpu.amount")
|
||||||
|
if gpu_per_task is not None:
|
||||||
|
if float(gpu_per_task) < 1.0:
|
||||||
|
raise ValueError(
|
||||||
|
"XGBoost doesn't support GPU fractional configurations. "
|
||||||
|
"Please set `spark.task.resource.gpu.amount=spark.executor"
|
||||||
|
".resource.gpu.amount`"
|
||||||
|
)
|
||||||
|
|
||||||
|
if float(gpu_per_task) > 1.0:
|
||||||
|
get_logger(self.__class__.__name__).warning(
|
||||||
|
"%s GPUs for each Spark task is configured, but each "
|
||||||
|
"XGBoost training task uses only 1 GPU.",
|
||||||
|
gpu_per_task,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"The `spark.task.resource.gpu.amount` is required for training"
|
||||||
|
" on GPU."
|
||||||
|
)
|
||||||
|
|
||||||
def _validate_params(self) -> None:
|
def _validate_params(self) -> None:
|
||||||
# pylint: disable=too-many-branches
|
# pylint: disable=too-many-branches
|
||||||
init_model = self.getOrDefault("xgb_model")
|
init_model = self.getOrDefault("xgb_model")
|
||||||
@ -413,53 +471,7 @@ class _SparkXGBParams(
|
|||||||
"`pyspark.ml.linalg.Vector` type."
|
"`pyspark.ml.linalg.Vector` type."
|
||||||
)
|
)
|
||||||
|
|
||||||
if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu):
|
self._validate_gpu_params()
|
||||||
gpu_per_task = (
|
|
||||||
_get_spark_session()
|
|
||||||
.sparkContext.getConf()
|
|
||||||
.get("spark.task.resource.gpu.amount")
|
|
||||||
)
|
|
||||||
|
|
||||||
is_local = _is_local(_get_spark_session().sparkContext)
|
|
||||||
|
|
||||||
if is_local:
|
|
||||||
# checking spark local mode.
|
|
||||||
if gpu_per_task is not None:
|
|
||||||
raise RuntimeError(
|
|
||||||
"The spark local mode does not support gpu configuration."
|
|
||||||
"Please remove spark.executor.resource.gpu.amount and "
|
|
||||||
"spark.task.resource.gpu.amount"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Support GPU training in Spark local mode is just for debugging
|
|
||||||
# purposes, so it's okay for printing the below warning instead of
|
|
||||||
# checking the real gpu numbers and raising the exception.
|
|
||||||
get_logger(self.__class__.__name__).warning(
|
|
||||||
"You have enabled GPU in spark local mode. Please make sure your"
|
|
||||||
" local node has at least %d GPUs",
|
|
||||||
self.getOrDefault(self.num_workers),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# checking spark non-local mode.
|
|
||||||
if gpu_per_task is not None:
|
|
||||||
if float(gpu_per_task) < 1.0:
|
|
||||||
raise ValueError(
|
|
||||||
"XGBoost doesn't support GPU fractional configurations. "
|
|
||||||
"Please set `spark.task.resource.gpu.amount=spark.executor"
|
|
||||||
".resource.gpu.amount`"
|
|
||||||
)
|
|
||||||
|
|
||||||
if float(gpu_per_task) > 1.0:
|
|
||||||
get_logger(self.__class__.__name__).warning(
|
|
||||||
"%s GPUs for each Spark task is configured, but each "
|
|
||||||
"XGBoost training task uses only 1 GPU.",
|
|
||||||
gpu_per_task,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"The `spark.task.resource.gpu.amount` is required for training"
|
|
||||||
" on GPU."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_and_convert_feature_col_as_float_col_list(
|
def _validate_and_convert_feature_col_as_float_col_list(
|
||||||
@ -584,6 +596,8 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
arbitrary_params_dict={},
|
arbitrary_params_dict={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.logger = get_logger(self.__class__.__name__)
|
||||||
|
|
||||||
def setParams(self, **kwargs: Any) -> None: # pylint: disable=invalid-name
|
def setParams(self, **kwargs: Any) -> None: # pylint: disable=invalid-name
|
||||||
"""
|
"""
|
||||||
Set params for the estimator.
|
Set params for the estimator.
|
||||||
@ -886,6 +900,116 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
|
|
||||||
return booster_params, train_call_kwargs_params, dmatrix_kwargs
|
return booster_params, train_call_kwargs_params, dmatrix_kwargs
|
||||||
|
|
||||||
|
def _skip_stage_level_scheduling(self) -> bool:
|
||||||
|
# pylint: disable=too-many-return-statements
|
||||||
|
"""Check if stage-level scheduling is not needed,
|
||||||
|
return true to skip stage-level scheduling"""
|
||||||
|
|
||||||
|
if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu):
|
||||||
|
ss = _get_spark_session()
|
||||||
|
sc = ss.sparkContext
|
||||||
|
|
||||||
|
if ss.version < "3.4.0":
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost requires spark version 3.4.0+"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not _is_standalone_or_localcluster(sc):
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost requires spark standalone or "
|
||||||
|
"local-cluster mode"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
executor_cores = sc.getConf().get("spark.executor.cores")
|
||||||
|
executor_gpus = sc.getConf().get("spark.executor.resource.gpu.amount")
|
||||||
|
if executor_cores is None or executor_gpus is None:
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost requires spark.executor.cores, "
|
||||||
|
"spark.executor.resource.gpu.amount to be set."
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
if int(executor_cores) == 1:
|
||||||
|
# there will be only 1 task running at any time.
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost requires spark.executor.cores > 1 "
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
if int(executor_gpus) > 1:
|
||||||
|
# For spark.executor.resource.gpu.amount > 1, we suppose user knows how to configure
|
||||||
|
# to make xgboost run successfully.
|
||||||
|
#
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost will not work "
|
||||||
|
"when spark.executor.resource.gpu.amount>1"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
task_gpu_amount = sc.getConf().get("spark.task.resource.gpu.amount")
|
||||||
|
|
||||||
|
if task_gpu_amount is None:
|
||||||
|
# The ETL tasks will not grab a gpu when spark.task.resource.gpu.amount is not set,
|
||||||
|
# but with stage-level scheduling, we can make training task grab the gpu.
|
||||||
|
return False
|
||||||
|
|
||||||
|
if float(task_gpu_amount) == float(executor_gpus):
|
||||||
|
# spark.executor.resource.gpu.amount=spark.task.resource.gpu.amount "
|
||||||
|
# results in only 1 task running at a time, which may cause perf issue.
|
||||||
|
return True
|
||||||
|
|
||||||
|
# We can enable stage-level scheduling
|
||||||
|
return False
|
||||||
|
|
||||||
|
# CPU training doesn't require stage-level scheduling
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _try_stage_level_scheduling(self, rdd: RDD) -> RDD:
|
||||||
|
"""Try to enable stage-level scheduling"""
|
||||||
|
|
||||||
|
if self._skip_stage_level_scheduling():
|
||||||
|
return rdd
|
||||||
|
|
||||||
|
ss = _get_spark_session()
|
||||||
|
|
||||||
|
# executor_cores will not be None
|
||||||
|
executor_cores = ss.sparkContext.getConf().get("spark.executor.cores")
|
||||||
|
assert executor_cores is not None
|
||||||
|
|
||||||
|
# Spark-rapids is a project to leverage GPUs to accelerate spark SQL.
|
||||||
|
# If spark-rapids is enabled, to avoid GPU OOM, we don't allow other
|
||||||
|
# ETL gpu tasks running alongside training tasks.
|
||||||
|
spark_plugins = ss.conf.get("spark.plugins", " ")
|
||||||
|
assert spark_plugins is not None
|
||||||
|
spark_rapids_sql_enabled = ss.conf.get("spark.rapids.sql.enabled", "true")
|
||||||
|
assert spark_rapids_sql_enabled is not None
|
||||||
|
|
||||||
|
task_cores = (
|
||||||
|
int(executor_cores)
|
||||||
|
if "com.nvidia.spark.SQLPlugin" in spark_plugins
|
||||||
|
and "true" == spark_rapids_sql_enabled.lower()
|
||||||
|
else (int(executor_cores) // 2) + 1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Each training task requires cpu cores > total executor cores//2 + 1 which can
|
||||||
|
# make sure the tasks be sent to different executors.
|
||||||
|
#
|
||||||
|
# Please note that we can't use GPU to limit the concurrent tasks because of
|
||||||
|
# https://issues.apache.org/jira/browse/SPARK-45527.
|
||||||
|
|
||||||
|
task_gpus = 1.0
|
||||||
|
treqs = TaskResourceRequests().cpus(task_cores).resource("gpu", task_gpus)
|
||||||
|
rp = ResourceProfileBuilder().require(treqs).build
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
"XGBoost training tasks require the resource(cores=%s, gpu=%s).",
|
||||||
|
task_cores,
|
||||||
|
task_gpus,
|
||||||
|
)
|
||||||
|
return rdd.withResources(rp)
|
||||||
|
|
||||||
def _fit(self, dataset: DataFrame) -> "_SparkXGBModel":
|
def _fit(self, dataset: DataFrame) -> "_SparkXGBModel":
|
||||||
# pylint: disable=too-many-statements, too-many-locals
|
# pylint: disable=too-many-statements, too-many-locals
|
||||||
self._validate_params()
|
self._validate_params()
|
||||||
@ -986,14 +1110,16 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _run_job() -> Tuple[str, str]:
|
def _run_job() -> Tuple[str, str]:
|
||||||
ret = (
|
rdd = (
|
||||||
dataset.mapInPandas(
|
dataset.mapInPandas(
|
||||||
_train_booster, schema="config string, booster string" # type: ignore
|
_train_booster, # type: ignore
|
||||||
|
schema="config string, booster string",
|
||||||
)
|
)
|
||||||
.rdd.barrier()
|
.rdd.barrier()
|
||||||
.mapPartitions(lambda x: x)
|
.mapPartitions(lambda x: x)
|
||||||
.collect()[0]
|
|
||||||
)
|
)
|
||||||
|
rdd_with_resource = self._try_stage_level_scheduling(rdd)
|
||||||
|
ret = rdd_with_resource.collect()[0]
|
||||||
return ret[0], ret[1]
|
return ret[0], ret[1]
|
||||||
|
|
||||||
get_logger("XGBoost-PySpark").info(
|
get_logger("XGBoost-PySpark").info(
|
||||||
@ -1117,12 +1243,111 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
)
|
)
|
||||||
return features_col, feature_col_names
|
return features_col, feature_col_names
|
||||||
|
|
||||||
|
def _get_pred_contrib_col_name(self) -> Optional[str]:
|
||||||
|
"""Return the pred_contrib_col col name"""
|
||||||
|
pred_contrib_col_name = None
|
||||||
|
if (
|
||||||
|
self.isDefined(self.pred_contrib_col)
|
||||||
|
and self.getOrDefault(self.pred_contrib_col) != ""
|
||||||
|
):
|
||||||
|
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
|
||||||
|
|
||||||
|
return pred_contrib_col_name
|
||||||
|
|
||||||
|
def _out_schema(self) -> Tuple[bool, str]:
|
||||||
|
"""Return the bool to indicate if it's a single prediction, true is single prediction,
|
||||||
|
and the returned type of the user-defined function. The value must
|
||||||
|
be a DDL-formatted type string."""
|
||||||
|
|
||||||
|
if self._get_pred_contrib_col_name() is not None:
|
||||||
|
return False, f"{pred.prediction} double, {pred.pred_contrib} array<double>"
|
||||||
|
|
||||||
|
return True, "double"
|
||||||
|
|
||||||
|
def _get_predict_func(self) -> Callable:
|
||||||
|
"""Return the true prediction function which will be running on the executor side"""
|
||||||
|
|
||||||
|
predict_params = self._gen_predict_params_dict()
|
||||||
|
pred_contrib_col_name = self._get_pred_contrib_col_name()
|
||||||
|
|
||||||
|
def _predict(
|
||||||
|
model: XGBModel, X: ArrayLike, base_margin: Optional[ArrayLike]
|
||||||
|
) -> Union[pd.DataFrame, pd.Series]:
|
||||||
|
data = {}
|
||||||
|
preds = model.predict(
|
||||||
|
X,
|
||||||
|
base_margin=base_margin,
|
||||||
|
validate_features=False,
|
||||||
|
**predict_params,
|
||||||
|
)
|
||||||
|
data[pred.prediction] = pd.Series(preds)
|
||||||
|
|
||||||
|
if pred_contrib_col_name is not None:
|
||||||
|
contribs = pred_contribs(model, X, base_margin)
|
||||||
|
data[pred.pred_contrib] = pd.Series(list(contribs))
|
||||||
|
return pd.DataFrame(data=data)
|
||||||
|
|
||||||
|
return data[pred.prediction]
|
||||||
|
|
||||||
|
return _predict
|
||||||
|
|
||||||
|
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
|
||||||
|
"""Post process of transform"""
|
||||||
|
prediction_col_name = self.getOrDefault(self.predictionCol)
|
||||||
|
single_pred, _ = self._out_schema()
|
||||||
|
|
||||||
|
if single_pred:
|
||||||
|
if prediction_col_name:
|
||||||
|
dataset = dataset.withColumn(prediction_col_name, pred_col)
|
||||||
|
else:
|
||||||
|
pred_struct_col = "_prediction_struct"
|
||||||
|
dataset = dataset.withColumn(pred_struct_col, pred_col)
|
||||||
|
|
||||||
|
if prediction_col_name:
|
||||||
|
dataset = dataset.withColumn(
|
||||||
|
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
|
||||||
|
)
|
||||||
|
|
||||||
|
pred_contrib_col_name = self._get_pred_contrib_col_name()
|
||||||
|
if pred_contrib_col_name is not None:
|
||||||
|
dataset = dataset.withColumn(
|
||||||
|
pred_contrib_col_name,
|
||||||
|
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
|
||||||
|
)
|
||||||
|
|
||||||
|
dataset = dataset.drop(pred_struct_col)
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
def _gpu_transform(self) -> bool:
|
||||||
|
"""If gpu is used to do the prediction, true to gpu prediction"""
|
||||||
|
|
||||||
|
if _is_local(_get_spark_session().sparkContext):
|
||||||
|
# if it's local model, we just use the internal "device"
|
||||||
|
return use_cuda(self.getOrDefault(self.device))
|
||||||
|
|
||||||
|
gpu_per_task = (
|
||||||
|
_get_spark_session()
|
||||||
|
.sparkContext.getConf()
|
||||||
|
.get("spark.task.resource.gpu.amount")
|
||||||
|
)
|
||||||
|
|
||||||
|
# User don't set gpu configurations, just use cpu
|
||||||
|
if gpu_per_task is None:
|
||||||
|
if use_cuda(self.getOrDefault(self.device)):
|
||||||
|
get_logger("XGBoost-PySpark").warning(
|
||||||
|
"Do the prediction on the CPUs since "
|
||||||
|
"no gpu configurations are set"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# User already sets the gpu configurations, we just use the internal "device".
|
||||||
|
return use_cuda(self.getOrDefault(self.device))
|
||||||
|
|
||||||
def _transform(self, dataset: DataFrame) -> DataFrame:
|
def _transform(self, dataset: DataFrame) -> DataFrame:
|
||||||
# pylint: disable=too-many-statements, too-many-locals
|
# pylint: disable=too-many-statements, too-many-locals
|
||||||
# Save xgb_sklearn_model and predict_params to be local variable
|
# Save xgb_sklearn_model and predict_params to be local variable
|
||||||
# to avoid the `self` object to be pickled to remote.
|
# to avoid the `self` object to be pickled to remote.
|
||||||
xgb_sklearn_model = self._xgb_sklearn_model
|
xgb_sklearn_model = self._xgb_sklearn_model
|
||||||
predict_params = self._gen_predict_params_dict()
|
|
||||||
|
|
||||||
has_base_margin = False
|
has_base_margin = False
|
||||||
if (
|
if (
|
||||||
@ -1137,79 +1362,92 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
features_col, feature_col_names = self._get_feature_col(dataset)
|
features_col, feature_col_names = self._get_feature_col(dataset)
|
||||||
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
|
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
|
||||||
|
|
||||||
pred_contrib_col_name = None
|
predict_func = self._get_predict_func()
|
||||||
if (
|
|
||||||
self.isDefined(self.pred_contrib_col)
|
|
||||||
and self.getOrDefault(self.pred_contrib_col) != ""
|
|
||||||
):
|
|
||||||
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
|
|
||||||
|
|
||||||
single_pred = True
|
_, schema = self._out_schema()
|
||||||
schema = "double"
|
|
||||||
if pred_contrib_col_name:
|
is_local = _is_local(_get_spark_session().sparkContext)
|
||||||
single_pred = False
|
run_on_gpu = self._gpu_transform()
|
||||||
schema = f"{pred.prediction} double, {pred.pred_contrib} array<double>"
|
|
||||||
|
|
||||||
@pandas_udf(schema) # type: ignore
|
@pandas_udf(schema) # type: ignore
|
||||||
def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:
|
def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:
|
||||||
assert xgb_sklearn_model is not None
|
assert xgb_sklearn_model is not None
|
||||||
model = xgb_sklearn_model
|
model = xgb_sklearn_model
|
||||||
|
|
||||||
|
from pyspark import TaskContext
|
||||||
|
|
||||||
|
context = TaskContext.get()
|
||||||
|
assert context is not None
|
||||||
|
|
||||||
|
dev_ordinal = -1
|
||||||
|
|
||||||
|
if is_cudf_available():
|
||||||
|
if is_local:
|
||||||
|
if run_on_gpu and is_cupy_available():
|
||||||
|
import cupy as cp # pylint: disable=import-error
|
||||||
|
|
||||||
|
total_gpus = cp.cuda.runtime.getDeviceCount()
|
||||||
|
if total_gpus > 0:
|
||||||
|
partition_id = context.partitionId()
|
||||||
|
# For transform local mode, default the dev_ordinal to
|
||||||
|
# (partition id) % gpus.
|
||||||
|
dev_ordinal = partition_id % total_gpus
|
||||||
|
elif run_on_gpu:
|
||||||
|
dev_ordinal = _get_gpu_id(context)
|
||||||
|
|
||||||
|
if dev_ordinal >= 0:
|
||||||
|
device = "cuda:" + str(dev_ordinal)
|
||||||
|
get_logger("XGBoost-PySpark").info(
|
||||||
|
"Do the inference with device: %s", device
|
||||||
|
)
|
||||||
|
model.set_params(device=device)
|
||||||
|
else:
|
||||||
|
get_logger("XGBoost-PySpark").info("Do the inference on the CPUs")
|
||||||
|
else:
|
||||||
|
msg = (
|
||||||
|
"CUDF is unavailable, fallback the inference on the CPUs"
|
||||||
|
if run_on_gpu
|
||||||
|
else "Do the inference on the CPUs"
|
||||||
|
)
|
||||||
|
get_logger("XGBoost-PySpark").info(msg)
|
||||||
|
|
||||||
|
def to_gpu_if_possible(data: ArrayLike) -> ArrayLike:
|
||||||
|
"""Move the data to gpu if possible"""
|
||||||
|
if dev_ordinal >= 0:
|
||||||
|
import cudf # pylint: disable=import-error
|
||||||
|
import cupy as cp # pylint: disable=import-error
|
||||||
|
|
||||||
|
# We must set the device after import cudf, which will change the device id to 0
|
||||||
|
# See https://github.com/rapidsai/cudf/issues/11386
|
||||||
|
cp.cuda.runtime.setDevice(dev_ordinal) # pylint: disable=I1101
|
||||||
|
df = cudf.DataFrame(data)
|
||||||
|
del data
|
||||||
|
return df
|
||||||
|
return data
|
||||||
|
|
||||||
for data in iterator:
|
for data in iterator:
|
||||||
if enable_sparse_data_optim:
|
if enable_sparse_data_optim:
|
||||||
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
|
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
|
||||||
else:
|
else:
|
||||||
if feature_col_names is not None:
|
if feature_col_names is not None:
|
||||||
X = data[feature_col_names]
|
tmp = data[feature_col_names]
|
||||||
else:
|
else:
|
||||||
X = stack_series(data[alias.data])
|
tmp = stack_series(data[alias.data])
|
||||||
|
X = to_gpu_if_possible(tmp)
|
||||||
|
|
||||||
if has_base_margin:
|
if has_base_margin:
|
||||||
base_margin = data[alias.margin].to_numpy()
|
base_margin = to_gpu_if_possible(data[alias.margin])
|
||||||
else:
|
else:
|
||||||
base_margin = None
|
base_margin = None
|
||||||
|
|
||||||
data = {}
|
yield predict_func(model, X, base_margin)
|
||||||
preds = model.predict(
|
|
||||||
X,
|
|
||||||
base_margin=base_margin,
|
|
||||||
validate_features=False,
|
|
||||||
**predict_params,
|
|
||||||
)
|
|
||||||
data[pred.prediction] = pd.Series(preds)
|
|
||||||
|
|
||||||
if pred_contrib_col_name:
|
|
||||||
contribs = pred_contribs(model, X, base_margin)
|
|
||||||
data[pred.pred_contrib] = pd.Series(list(contribs))
|
|
||||||
yield pd.DataFrame(data=data)
|
|
||||||
else:
|
|
||||||
yield data[pred.prediction]
|
|
||||||
|
|
||||||
if has_base_margin:
|
if has_base_margin:
|
||||||
pred_col = predict_udf(struct(*features_col, base_margin_col))
|
pred_col = predict_udf(struct(*features_col, base_margin_col))
|
||||||
else:
|
else:
|
||||||
pred_col = predict_udf(struct(*features_col))
|
pred_col = predict_udf(struct(*features_col))
|
||||||
|
|
||||||
prediction_col_name = self.getOrDefault(self.predictionCol)
|
return self._post_transform(dataset, pred_col)
|
||||||
|
|
||||||
if single_pred:
|
|
||||||
dataset = dataset.withColumn(prediction_col_name, pred_col)
|
|
||||||
else:
|
|
||||||
pred_struct_col = "_prediction_struct"
|
|
||||||
dataset = dataset.withColumn(pred_struct_col, pred_col)
|
|
||||||
|
|
||||||
dataset = dataset.withColumn(
|
|
||||||
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
|
|
||||||
)
|
|
||||||
|
|
||||||
if pred_contrib_col_name:
|
|
||||||
dataset = dataset.withColumn(
|
|
||||||
pred_contrib_col_name,
|
|
||||||
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
|
|
||||||
)
|
|
||||||
|
|
||||||
dataset = dataset.drop(pred_struct_col)
|
|
||||||
|
|
||||||
return dataset
|
|
||||||
|
|
||||||
|
|
||||||
class _ClassificationModel( # pylint: disable=abstract-method
|
class _ClassificationModel( # pylint: disable=abstract-method
|
||||||
@ -1221,22 +1459,21 @@ class _ClassificationModel( # pylint: disable=abstract-method
|
|||||||
.. Note:: This API is experimental.
|
.. Note:: This API is experimental.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _transform(self, dataset: DataFrame) -> DataFrame:
|
def _out_schema(self) -> Tuple[bool, str]:
|
||||||
# pylint: disable=too-many-statements, too-many-locals
|
schema = (
|
||||||
# Save xgb_sklearn_model and predict_params to be local variable
|
f"{pred.raw_prediction} array<double>, {pred.prediction} double,"
|
||||||
# to avoid the `self` object to be pickled to remote.
|
f" {pred.probability} array<double>"
|
||||||
xgb_sklearn_model = self._xgb_sklearn_model
|
|
||||||
predict_params = self._gen_predict_params_dict()
|
|
||||||
|
|
||||||
has_base_margin = False
|
|
||||||
if (
|
|
||||||
self.isDefined(self.base_margin_col)
|
|
||||||
and self.getOrDefault(self.base_margin_col) != ""
|
|
||||||
):
|
|
||||||
has_base_margin = True
|
|
||||||
base_margin_col = col(self.getOrDefault(self.base_margin_col)).alias(
|
|
||||||
alias.margin
|
|
||||||
)
|
)
|
||||||
|
if self._get_pred_contrib_col_name() is not None:
|
||||||
|
# We will force setting strict_shape to True when predicting contribs,
|
||||||
|
# So, it will also output 3-D shape result.
|
||||||
|
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
|
||||||
|
|
||||||
|
return False, schema
|
||||||
|
|
||||||
|
def _get_predict_func(self) -> Callable:
|
||||||
|
predict_params = self._gen_predict_params_dict()
|
||||||
|
pred_contrib_col_name = self._get_pred_contrib_col_name()
|
||||||
|
|
||||||
def transform_margin(margins: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
def transform_margin(margins: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||||
if margins.ndim == 1:
|
if margins.ndim == 1:
|
||||||
@ -1251,45 +1488,9 @@ class _ClassificationModel( # pylint: disable=abstract-method
|
|||||||
class_probs = softmax(raw_preds, axis=1)
|
class_probs = softmax(raw_preds, axis=1)
|
||||||
return raw_preds, class_probs
|
return raw_preds, class_probs
|
||||||
|
|
||||||
features_col, feature_col_names = self._get_feature_col(dataset)
|
def _predict(
|
||||||
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
|
model: XGBModel, X: ArrayLike, base_margin: Optional[np.ndarray]
|
||||||
|
) -> Union[pd.DataFrame, pd.Series]:
|
||||||
pred_contrib_col_name = None
|
|
||||||
if (
|
|
||||||
self.isDefined(self.pred_contrib_col)
|
|
||||||
and self.getOrDefault(self.pred_contrib_col) != ""
|
|
||||||
):
|
|
||||||
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
|
|
||||||
|
|
||||||
schema = (
|
|
||||||
f"{pred.raw_prediction} array<double>, {pred.prediction} double,"
|
|
||||||
f" {pred.probability} array<double>"
|
|
||||||
)
|
|
||||||
if pred_contrib_col_name:
|
|
||||||
# We will force setting strict_shape to True when predicting contribs,
|
|
||||||
# So, it will also output 3-D shape result.
|
|
||||||
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
|
|
||||||
|
|
||||||
@pandas_udf(schema) # type: ignore
|
|
||||||
def predict_udf(
|
|
||||||
iterator: Iterator[Tuple[pd.Series, ...]]
|
|
||||||
) -> Iterator[pd.DataFrame]:
|
|
||||||
assert xgb_sklearn_model is not None
|
|
||||||
model = xgb_sklearn_model
|
|
||||||
for data in iterator:
|
|
||||||
if enable_sparse_data_optim:
|
|
||||||
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
|
|
||||||
else:
|
|
||||||
if feature_col_names is not None:
|
|
||||||
X = data[feature_col_names] # type: ignore
|
|
||||||
else:
|
|
||||||
X = stack_series(data[alias.data])
|
|
||||||
|
|
||||||
if has_base_margin:
|
|
||||||
base_margin = stack_series(data[alias.margin])
|
|
||||||
else:
|
|
||||||
base_margin = None
|
|
||||||
|
|
||||||
margins = model.predict(
|
margins = model.predict(
|
||||||
X,
|
X,
|
||||||
base_margin=base_margin,
|
base_margin=base_margin,
|
||||||
@ -1308,19 +1509,17 @@ class _ClassificationModel( # pylint: disable=abstract-method
|
|||||||
pred.probability: pd.Series(list(class_probs)),
|
pred.probability: pd.Series(list(class_probs)),
|
||||||
}
|
}
|
||||||
|
|
||||||
if pred_contrib_col_name:
|
if pred_contrib_col_name is not None:
|
||||||
contribs = pred_contribs(model, X, base_margin, strict_shape=True)
|
contribs = pred_contribs(model, X, base_margin, strict_shape=True)
|
||||||
result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))
|
result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))
|
||||||
|
|
||||||
yield pd.DataFrame(data=result)
|
return pd.DataFrame(data=result)
|
||||||
|
|
||||||
if has_base_margin:
|
return _predict
|
||||||
pred_struct = predict_udf(struct(*features_col, base_margin_col))
|
|
||||||
else:
|
|
||||||
pred_struct = predict_udf(struct(*features_col))
|
|
||||||
|
|
||||||
|
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
|
||||||
pred_struct_col = "_prediction_struct"
|
pred_struct_col = "_prediction_struct"
|
||||||
dataset = dataset.withColumn(pred_struct_col, pred_struct)
|
dataset = dataset.withColumn(pred_struct_col, pred_col)
|
||||||
|
|
||||||
raw_prediction_col_name = self.getOrDefault(self.rawPredictionCol)
|
raw_prediction_col_name = self.getOrDefault(self.rawPredictionCol)
|
||||||
if raw_prediction_col_name:
|
if raw_prediction_col_name:
|
||||||
@ -1342,7 +1541,8 @@ class _ClassificationModel( # pylint: disable=abstract-method
|
|||||||
array_to_vector(getattr(col(pred_struct_col), pred.probability)),
|
array_to_vector(getattr(col(pred_struct_col), pred.probability)),
|
||||||
)
|
)
|
||||||
|
|
||||||
if pred_contrib_col_name:
|
pred_contrib_col_name = self._get_pred_contrib_col_name()
|
||||||
|
if pred_contrib_col_name is not None:
|
||||||
dataset = dataset.withColumn(
|
dataset = dataset.withColumn(
|
||||||
pred_contrib_col_name,
|
pred_contrib_col_name,
|
||||||
getattr(col(pred_struct_col), pred.pred_contrib),
|
getattr(col(pred_struct_col), pred.pred_contrib),
|
||||||
|
|||||||
@ -10,7 +10,7 @@ from threading import Thread
|
|||||||
from typing import Any, Callable, Dict, Optional, Set, Type
|
from typing import Any, Callable, Dict, Optional, Set, Type
|
||||||
|
|
||||||
import pyspark
|
import pyspark
|
||||||
from pyspark import BarrierTaskContext, SparkContext, SparkFiles
|
from pyspark import BarrierTaskContext, SparkContext, SparkFiles, TaskContext
|
||||||
from pyspark.sql.session import SparkSession
|
from pyspark.sql.session import SparkSession
|
||||||
|
|
||||||
from xgboost import Booster, XGBModel, collective
|
from xgboost import Booster, XGBModel, collective
|
||||||
@ -129,7 +129,14 @@ def _is_local(spark_context: SparkContext) -> bool:
|
|||||||
return spark_context._jsc.sc().isLocal()
|
return spark_context._jsc.sc().isLocal()
|
||||||
|
|
||||||
|
|
||||||
def _get_gpu_id(task_context: BarrierTaskContext) -> int:
|
def _is_standalone_or_localcluster(spark_context: SparkContext) -> bool:
|
||||||
|
master = spark_context.getConf().get("spark.master")
|
||||||
|
return master is not None and (
|
||||||
|
master.startswith("spark://") or master.startswith("local-cluster")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_gpu_id(task_context: TaskContext) -> int:
|
||||||
"""Get the gpu id from the task resources"""
|
"""Get the gpu id from the task resources"""
|
||||||
if task_context is None:
|
if task_context is None:
|
||||||
# This is a safety check.
|
# This is a safety check.
|
||||||
|
|||||||
@ -75,3 +75,28 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
|
|||||||
|
|
||||||
with pytest.raises(ValueError, match="Either `group` or `qid`."):
|
with pytest.raises(ValueError, match="Either `group` or `qid`."):
|
||||||
ranker.fit(df, y, eval_set=[(X, y)])
|
ranker.fit(df, y, eval_set=[(X, y)])
|
||||||
|
|
||||||
|
|
||||||
|
def run_ranking_categorical(device: str) -> None:
|
||||||
|
"""Test LTR with categorical features."""
|
||||||
|
from sklearn.model_selection import cross_val_score
|
||||||
|
|
||||||
|
X, y = tm.make_categorical(
|
||||||
|
n_samples=512, n_features=10, n_categories=3, onehot=False
|
||||||
|
)
|
||||||
|
rng = np.random.default_rng(1994)
|
||||||
|
qid = rng.choice(3, size=y.shape[0])
|
||||||
|
qid = np.sort(qid)
|
||||||
|
X["qid"] = qid
|
||||||
|
|
||||||
|
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
|
||||||
|
ltr.fit(X, y)
|
||||||
|
score = ltr.score(X, y)
|
||||||
|
assert score > 0.9
|
||||||
|
|
||||||
|
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
|
||||||
|
|
||||||
|
# test using the score function inside sklearn.
|
||||||
|
scores = cross_val_score(ltr, X, y)
|
||||||
|
for s in scores:
|
||||||
|
assert s > 0.7
|
||||||
|
|||||||
@ -52,7 +52,7 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
|
|||||||
*
|
*
|
||||||
* Go to left if it's NOT the matching category, which matches one-hot encoding.
|
* Go to left if it's NOT the matching category, which matches one-hot encoding.
|
||||||
*/
|
*/
|
||||||
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
|
inline XGBOOST_DEVICE bool Decision(common::Span<CatBitField::value_type const> cats, float cat) {
|
||||||
KCatBitField const s_cats(cats);
|
KCatBitField const s_cats(cats);
|
||||||
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
|
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -3,9 +3,11 @@
|
|||||||
*/
|
*/
|
||||||
#include "error_msg.h"
|
#include "error_msg.h"
|
||||||
|
|
||||||
|
#include <mutex> // for call_once, once_flag
|
||||||
#include <sstream> // for stringstream
|
#include <sstream> // for stringstream
|
||||||
|
|
||||||
#include "../collective/communicator-inl.h" // for GetRank
|
#include "../collective/communicator-inl.h" // for GetRank
|
||||||
|
#include "xgboost/context.h" // for Context
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
|
|
||||||
namespace xgboost::error {
|
namespace xgboost::error {
|
||||||
@ -26,34 +28,43 @@ void WarnDeprecatedGPUHist() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void WarnManualUpdater() {
|
void WarnManualUpdater() {
|
||||||
bool static thread_local logged{false};
|
static std::once_flag flag;
|
||||||
if (logged) {
|
std::call_once(flag, [] {
|
||||||
return;
|
|
||||||
}
|
|
||||||
LOG(WARNING)
|
LOG(WARNING)
|
||||||
<< "You have manually specified the `updater` parameter. The `tree_method` parameter "
|
<< "You have manually specified the `updater` parameter. The `tree_method` parameter "
|
||||||
"will be ignored. Incorrect sequence of updaters will produce undefined "
|
"will be ignored. Incorrect sequence of updaters will produce undefined "
|
||||||
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
|
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
|
||||||
logged = true;
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void WarnDeprecatedGPUId() {
|
void WarnDeprecatedGPUId() {
|
||||||
static thread_local bool logged{false};
|
static std::once_flag flag;
|
||||||
if (logged) {
|
std::call_once(flag, [] {
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
|
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
|
||||||
msg += " E.g. device=cpu/cuda/cuda:0";
|
msg += " E.g. device=cpu/cuda/cuda:0";
|
||||||
LOG(WARNING) << msg;
|
LOG(WARNING) << msg;
|
||||||
logged = true;
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void WarnEmptyDataset() {
|
void WarnEmptyDataset() {
|
||||||
static thread_local bool logged{false};
|
static std::once_flag flag;
|
||||||
if (logged) {
|
std::call_once(flag,
|
||||||
return;
|
[] { LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank(); });
|
||||||
}
|
}
|
||||||
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank();
|
|
||||||
logged = true;
|
void MismatchedDevices(Context const* booster, Context const* data) {
|
||||||
|
static std::once_flag flag;
|
||||||
|
std::call_once(flag, [&] {
|
||||||
|
LOG(WARNING)
|
||||||
|
<< "Falling back to prediction using DMatrix due to mismatched devices. This might "
|
||||||
|
"lead to higher memory usage and slower performance. XGBoost is running on: "
|
||||||
|
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName() << ".\n"
|
||||||
|
<< R"(Potential solutions:
|
||||||
|
- Use a data structure that matches the device ordinal in the booster.
|
||||||
|
- Set the device for booster before call to inplace_predict.
|
||||||
|
|
||||||
|
This warning will only be shown once.
|
||||||
|
)";
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} // namespace xgboost::error
|
} // namespace xgboost::error
|
||||||
|
|||||||
@ -11,6 +11,7 @@
|
|||||||
#include <string> // for string
|
#include <string> // for string
|
||||||
|
|
||||||
#include "xgboost/base.h" // for bst_feature_t
|
#include "xgboost/base.h" // for bst_feature_t
|
||||||
|
#include "xgboost/context.h" // for Context
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
#include "xgboost/string_view.h" // for StringView
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
@ -94,5 +95,7 @@ constexpr StringView InvalidCUDAOrdinal() {
|
|||||||
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
|
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
|
||||||
"available for using GPU.";
|
"available for using GPU.";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MismatchedDevices(Context const* booster, Context const* data);
|
||||||
} // namespace xgboost::error
|
} // namespace xgboost::error
|
||||||
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
||||||
|
|||||||
@ -384,7 +384,8 @@ class PrivateMmapConstStream : public AlignedResourceReadStream {
|
|||||||
* @param length See the `length` parameter of `mmap` for details.
|
* @param length See the `length` parameter of `mmap` for details.
|
||||||
*/
|
*/
|
||||||
explicit PrivateMmapConstStream(std::string path, std::size_t offset, std::size_t length)
|
explicit PrivateMmapConstStream(std::string path, std::size_t offset, std::size_t length)
|
||||||
: AlignedResourceReadStream{std::make_shared<MmapResource>(path, offset, length)} {}
|
: AlignedResourceReadStream{std::shared_ptr<MmapResource>{ // NOLINT
|
||||||
|
new MmapResource{std::move(path), offset, length}}} {}
|
||||||
~PrivateMmapConstStream() noexcept(false) override;
|
~PrivateMmapConstStream() noexcept(false) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -76,7 +76,7 @@ class RefResourceView {
|
|||||||
|
|
||||||
[[nodiscard]] size_type size() const { return size_; } // NOLINT
|
[[nodiscard]] size_type size() const { return size_; } // NOLINT
|
||||||
[[nodiscard]] size_type size_bytes() const { // NOLINT
|
[[nodiscard]] size_type size_bytes() const { // NOLINT
|
||||||
return Span{data(), size()}.size_bytes();
|
return Span<const value_type>{data(), size()}.size_bytes();
|
||||||
}
|
}
|
||||||
[[nodiscard]] value_type* data() { return ptr_; }; // NOLINT
|
[[nodiscard]] value_type* data() { return ptr_; }; // NOLINT
|
||||||
[[nodiscard]] value_type const* data() const { return ptr_; }; // NOLINT
|
[[nodiscard]] value_type const* data() const { return ptr_; }; // NOLINT
|
||||||
|
|||||||
@ -3,14 +3,23 @@
|
|||||||
*/
|
*/
|
||||||
#include "threading_utils.h"
|
#include "threading_utils.h"
|
||||||
|
|
||||||
#include <fstream>
|
#include <algorithm> // for max
|
||||||
#include <string>
|
#include <exception> // for exception
|
||||||
|
#include <filesystem> // for path, exists
|
||||||
|
#include <fstream> // for ifstream
|
||||||
|
#include <string> // for string
|
||||||
|
|
||||||
#include "xgboost/logging.h"
|
#include "common.h" // for DivRoundUp
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::common {
|
||||||
namespace common {
|
/**
|
||||||
int32_t GetCfsCPUCount() noexcept {
|
* Modified from
|
||||||
|
* github.com/psiha/sweater/blob/master/include/boost/sweater/hardware_concurrency.hpp
|
||||||
|
*
|
||||||
|
* MIT License: Copyright (c) 2016 Domagoj Šarić
|
||||||
|
*/
|
||||||
|
std::int32_t GetCGroupV1Count(std::filesystem::path const& quota_path,
|
||||||
|
std::filesystem::path const& peroid_path) {
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
// https://bugs.openjdk.java.net/browse/JDK-8146115
|
// https://bugs.openjdk.java.net/browse/JDK-8146115
|
||||||
// http://hg.openjdk.java.net/jdk/hs/rev/7f22774a5f42
|
// http://hg.openjdk.java.net/jdk/hs/rev/7f22774a5f42
|
||||||
@ -31,8 +40,8 @@ int32_t GetCfsCPUCount() noexcept {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
// complete fair scheduler from Linux
|
// complete fair scheduler from Linux
|
||||||
auto const cfs_quota(read_int("/sys/fs/cgroup/cpu/cpu.cfs_quota_us"));
|
auto const cfs_quota(read_int(quota_path.c_str()));
|
||||||
auto const cfs_period(read_int("/sys/fs/cgroup/cpu/cpu.cfs_period_us"));
|
auto const cfs_period(read_int(peroid_path.c_str()));
|
||||||
if ((cfs_quota > 0) && (cfs_period > 0)) {
|
if ((cfs_quota > 0) && (cfs_period > 0)) {
|
||||||
return std::max(cfs_quota / cfs_period, 1);
|
return std::max(cfs_quota / cfs_period, 1);
|
||||||
}
|
}
|
||||||
@ -40,6 +49,47 @@ int32_t GetCfsCPUCount() noexcept {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::int32_t GetCGroupV2Count(std::filesystem::path const& bandwidth_path) noexcept(true) {
|
||||||
|
std::int32_t cnt{-1};
|
||||||
|
#if defined(__linux__)
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
std::int32_t a{0}, b{0};
|
||||||
|
|
||||||
|
auto warn = [] { LOG(WARNING) << "Invalid cgroupv2 file."; };
|
||||||
|
try {
|
||||||
|
std::ifstream fin{bandwidth_path, std::ios::in};
|
||||||
|
fin >> a;
|
||||||
|
fin >> b;
|
||||||
|
} catch (std::exception const&) {
|
||||||
|
warn();
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
if (a > 0 && b > 0) {
|
||||||
|
cnt = std::max(common::DivRoundUp(a, b), 1);
|
||||||
|
}
|
||||||
|
#endif // defined(__linux__)
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::int32_t GetCfsCPUCount() noexcept {
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
fs::path const bandwidth_path{"/sys/fs/cgroup/cpu.max"};
|
||||||
|
auto has_v2 = fs::exists(bandwidth_path);
|
||||||
|
if (has_v2) {
|
||||||
|
return GetCGroupV2Count(bandwidth_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
fs::path const quota_path{"/sys/fs/cgroup/cpu/cpu.cfs_quota_us"};
|
||||||
|
fs::path const peroid_path{"/sys/fs/cgroup/cpu/cpu.cfs_period_us"};
|
||||||
|
auto has_v1 = fs::exists(quota_path) && fs::exists(peroid_path);
|
||||||
|
if (has_v1) {
|
||||||
|
return GetCGroupV1Count(quota_path, peroid_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
std::int32_t OmpGetNumThreads(std::int32_t n_threads) {
|
std::int32_t OmpGetNumThreads(std::int32_t n_threads) {
|
||||||
// Don't use parallel if we are in a parallel region.
|
// Don't use parallel if we are in a parallel region.
|
||||||
if (omp_in_parallel()) {
|
if (omp_in_parallel()) {
|
||||||
@ -54,5 +104,4 @@ std::int32_t OmpGetNumThreads(std::int32_t n_threads) {
|
|||||||
n_threads = std::max(n_threads, 1);
|
n_threads = std::max(n_threads, 1);
|
||||||
return n_threads;
|
return n_threads;
|
||||||
}
|
}
|
||||||
} // namespace common
|
} // namespace xgboost::common
|
||||||
} // namespace xgboost
|
|
||||||
|
|||||||
@ -253,11 +253,6 @@ inline std::int32_t OmpGetThreadLimit() {
|
|||||||
* \brief Get thread limit from CFS.
|
* \brief Get thread limit from CFS.
|
||||||
*
|
*
|
||||||
* This function has non-trivial overhead and should not be called repeatly.
|
* This function has non-trivial overhead and should not be called repeatly.
|
||||||
*
|
|
||||||
* Modified from
|
|
||||||
* github.com/psiha/sweater/blob/master/include/boost/sweater/hardware_concurrency.hpp
|
|
||||||
*
|
|
||||||
* MIT License: Copyright (c) 2016 Domagoj Šarić
|
|
||||||
*/
|
*/
|
||||||
std::int32_t GetCfsCPUCount() noexcept;
|
std::int32_t GetCfsCPUCount() noexcept;
|
||||||
|
|
||||||
|
|||||||
@ -55,6 +55,7 @@ std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
CHECK(p_fmat) << "Failed to fallback.";
|
CHECK(p_fmat) << "Failed to fallback.";
|
||||||
|
p_fmat->Info() = proxy->Info().Copy();
|
||||||
return p_fmat;
|
return p_fmat;
|
||||||
}
|
}
|
||||||
} // namespace xgboost::data
|
} // namespace xgboost::data
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*!
|
/**
|
||||||
* Copyright 2014-2022 by XGBoost Contributors
|
* Copyright 2014-2023, XGBoost Contributors
|
||||||
* \file gblinear.cc
|
* \file gblinear.cc
|
||||||
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
|
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
|
||||||
* the update rule is parallel coordinate descent (shotgun)
|
* the update rule is parallel coordinate descent (shotgun)
|
||||||
@ -26,9 +26,9 @@
|
|||||||
#include "../common/timer.h"
|
#include "../common/timer.h"
|
||||||
#include "../common/common.h"
|
#include "../common/common.h"
|
||||||
#include "../common/threading_utils.h"
|
#include "../common/threading_utils.h"
|
||||||
|
#include "../common/error_msg.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::gbm {
|
||||||
namespace gbm {
|
|
||||||
|
|
||||||
DMLC_REGISTRY_FILE_TAG(gblinear);
|
DMLC_REGISTRY_FILE_TAG(gblinear);
|
||||||
|
|
||||||
@ -83,7 +83,16 @@ class GBLinear : public GradientBooster {
|
|||||||
}
|
}
|
||||||
param_.UpdateAllowUnknown(cfg);
|
param_.UpdateAllowUnknown(cfg);
|
||||||
param_.CheckGPUSupport();
|
param_.CheckGPUSupport();
|
||||||
|
if (param_.updater == "gpu_coord_descent") {
|
||||||
|
LOG(WARNING) << error::DeprecatedFunc("gpu_coord_descent", "2.0.0",
|
||||||
|
R"(device="cuda", updater="coord_descent")");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (param_.updater == "coord_descent" && ctx_->IsCUDA()) {
|
||||||
|
updater_.reset(LinearUpdater::Create("gpu_coord_descent", ctx_));
|
||||||
|
} else {
|
||||||
updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
|
updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
|
||||||
|
}
|
||||||
updater_->Configure(cfg);
|
updater_->Configure(cfg);
|
||||||
monitor_.Init("GBLinear");
|
monitor_.Init("GBLinear");
|
||||||
}
|
}
|
||||||
@ -354,5 +363,4 @@ XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
|
|||||||
.set_body([](LearnerModelParam const* booster_config, Context const* ctx) {
|
.set_body([](LearnerModelParam const* booster_config, Context const* ctx) {
|
||||||
return new GBLinear(booster_config, ctx);
|
return new GBLinear(booster_config, ctx);
|
||||||
});
|
});
|
||||||
} // namespace gbm
|
} // namespace xgboost::gbm
|
||||||
} // namespace xgboost
|
|
||||||
|
|||||||
@ -85,25 +85,6 @@ bool UpdatersMatched(std::vector<std::string> updater_seq,
|
|||||||
return name == up->Name();
|
return name == up->Name();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void MismatchedDevices(Context const* booster, Context const* data) {
|
|
||||||
bool thread_local static logged{false};
|
|
||||||
if (logged) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. This might "
|
|
||||||
"lead to higher memory usage and slower performance. XGBoost is running on: "
|
|
||||||
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName()
|
|
||||||
<< ".\n"
|
|
||||||
<< R"(Potential solutions:
|
|
||||||
- Use a data structure that matches the device ordinal in the booster.
|
|
||||||
- Set the device for booster before call to inplace_predict.
|
|
||||||
|
|
||||||
This warning will only be shown once for each thread. Subsequent warnings made by the
|
|
||||||
current thread will be suppressed.
|
|
||||||
)";
|
|
||||||
logged = true;
|
|
||||||
}
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void GBTree::Configure(Args const& cfg) {
|
void GBTree::Configure(Args const& cfg) {
|
||||||
@ -146,14 +127,6 @@ void GBTree::Configure(Args const& cfg) {
|
|||||||
if (specified_updater_) {
|
if (specified_updater_) {
|
||||||
error::WarnManualUpdater();
|
error::WarnManualUpdater();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (model_.learner_model_param->IsVectorLeaf()) {
|
|
||||||
CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
|
|
||||||
<< "Only the hist tree method is supported for building multi-target trees with vector "
|
|
||||||
"leaf.";
|
|
||||||
CHECK(ctx_->IsCPU()) << "GPU is not yet supported for vector leaf.";
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
|
LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
|
||||||
|
|
||||||
if (!specified_updater_) {
|
if (!specified_updater_) {
|
||||||
@ -225,6 +198,13 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
|
|||||||
|
|
||||||
void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
||||||
PredictionCacheEntry* predt, ObjFunction const* obj) {
|
PredictionCacheEntry* predt, ObjFunction const* obj) {
|
||||||
|
if (model_.learner_model_param->IsVectorLeaf()) {
|
||||||
|
CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
|
||||||
|
<< "Only the hist tree method is supported for building multi-target trees with vector "
|
||||||
|
"leaf.";
|
||||||
|
CHECK(ctx_->IsCPU()) << "GPU is not yet supported for vector leaf.";
|
||||||
|
}
|
||||||
|
|
||||||
TreesOneIter new_trees;
|
TreesOneIter new_trees;
|
||||||
bst_target_t const n_groups = model_.learner_model_param->OutputLength();
|
bst_target_t const n_groups = model_.learner_model_param->OutputLength();
|
||||||
monitor_.Start("BoostNewTrees");
|
monitor_.Start("BoostNewTrees");
|
||||||
@ -555,7 +535,7 @@ void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
|
|||||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||||
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
|
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
|
||||||
if (p_m->Ctx()->Device() != this->ctx_->Device()) {
|
if (p_m->Ctx()->Device() != this->ctx_->Device()) {
|
||||||
MismatchedDevices(this->ctx_, p_m->Ctx());
|
error::MismatchedDevices(this->ctx_, p_m->Ctx());
|
||||||
CHECK_EQ(out_preds->version, 0);
|
CHECK_EQ(out_preds->version, 0);
|
||||||
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
|
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
|
||||||
CHECK(proxy) << error::InplacePredictProxy();
|
CHECK(proxy) << error::InplacePredictProxy();
|
||||||
@ -808,7 +788,7 @@ class Dart : public GBTree {
|
|||||||
auto n_groups = model_.learner_model_param->num_output_group;
|
auto n_groups = model_.learner_model_param->num_output_group;
|
||||||
|
|
||||||
if (ctx_->Device() != p_fmat->Ctx()->Device()) {
|
if (ctx_->Device() != p_fmat->Ctx()->Device()) {
|
||||||
MismatchedDevices(ctx_, p_fmat->Ctx());
|
error::MismatchedDevices(ctx_, p_fmat->Ctx());
|
||||||
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_fmat);
|
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_fmat);
|
||||||
CHECK(proxy) << error::InplacePredictProxy();
|
CHECK(proxy) << error::InplacePredictProxy();
|
||||||
auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);
|
auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);
|
||||||
|
|||||||
@ -1317,7 +1317,9 @@ class LearnerImpl : public LearnerIO {
|
|||||||
if (metrics_.empty() && tparam_.disable_default_eval_metric <= 0) {
|
if (metrics_.empty() && tparam_.disable_default_eval_metric <= 0) {
|
||||||
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &ctx_));
|
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &ctx_));
|
||||||
auto config = obj_->DefaultMetricConfig();
|
auto config = obj_->DefaultMetricConfig();
|
||||||
|
if (!IsA<Null>(config)) {
|
||||||
metrics_.back()->LoadConfig(config);
|
metrics_.back()->LoadConfig(config);
|
||||||
|
}
|
||||||
metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
|
metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -9,8 +9,7 @@
|
|||||||
#include "coordinate_common.h"
|
#include "coordinate_common.h"
|
||||||
#include "xgboost/json.h"
|
#include "xgboost/json.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::linear {
|
||||||
namespace linear {
|
|
||||||
|
|
||||||
DMLC_REGISTER_PARAMETER(CoordinateParam);
|
DMLC_REGISTER_PARAMETER(CoordinateParam);
|
||||||
DMLC_REGISTRY_FILE_TAG(updater_coordinate);
|
DMLC_REGISTRY_FILE_TAG(updater_coordinate);
|
||||||
@ -39,8 +38,9 @@ class CoordinateUpdater : public LinearUpdater {
|
|||||||
FromJson(config.at("linear_train_param"), &tparam_);
|
FromJson(config.at("linear_train_param"), &tparam_);
|
||||||
FromJson(config.at("coordinate_param"), &cparam_);
|
FromJson(config.at("coordinate_param"), &cparam_);
|
||||||
}
|
}
|
||||||
void SaveConfig(Json* p_out) const override {
|
void SaveConfig(Json *p_out) const override {
|
||||||
auto& out = *p_out;
|
LOG(DEBUG) << "Save config for CPU updater.";
|
||||||
|
auto &out = *p_out;
|
||||||
out["linear_train_param"] = ToJson(tparam_);
|
out["linear_train_param"] = ToJson(tparam_);
|
||||||
out["coordinate_param"] = ToJson(cparam_);
|
out["coordinate_param"] = ToJson(cparam_);
|
||||||
}
|
}
|
||||||
@ -99,5 +99,4 @@ class CoordinateUpdater : public LinearUpdater {
|
|||||||
XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent")
|
XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent")
|
||||||
.describe("Update linear model according to coordinate descent algorithm.")
|
.describe("Update linear model according to coordinate descent algorithm.")
|
||||||
.set_body([]() { return new CoordinateUpdater(); });
|
.set_body([]() { return new CoordinateUpdater(); });
|
||||||
} // namespace linear
|
} // namespace xgboost::linear
|
||||||
} // namespace xgboost
|
|
||||||
|
|||||||
@ -15,8 +15,7 @@
|
|||||||
#include "../common/timer.h"
|
#include "../common/timer.h"
|
||||||
#include "./param.h"
|
#include "./param.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::linear {
|
||||||
namespace linear {
|
|
||||||
|
|
||||||
DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
|
DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
|
||||||
|
|
||||||
@ -29,7 +28,7 @@ DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
|
|||||||
class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||||
public:
|
public:
|
||||||
// set training parameter
|
// set training parameter
|
||||||
void Configure(Args const& args) override {
|
void Configure(Args const &args) override {
|
||||||
tparam_.UpdateAllowUnknown(args);
|
tparam_.UpdateAllowUnknown(args);
|
||||||
coord_param_.UpdateAllowUnknown(args);
|
coord_param_.UpdateAllowUnknown(args);
|
||||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
|
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
|
||||||
@ -41,8 +40,9 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
|||||||
FromJson(config.at("linear_train_param"), &tparam_);
|
FromJson(config.at("linear_train_param"), &tparam_);
|
||||||
FromJson(config.at("coordinate_param"), &coord_param_);
|
FromJson(config.at("coordinate_param"), &coord_param_);
|
||||||
}
|
}
|
||||||
void SaveConfig(Json* p_out) const override {
|
void SaveConfig(Json *p_out) const override {
|
||||||
auto& out = *p_out;
|
LOG(DEBUG) << "Save config for GPU updater.";
|
||||||
|
auto &out = *p_out;
|
||||||
out["linear_train_param"] = ToJson(tparam_);
|
out["linear_train_param"] = ToJson(tparam_);
|
||||||
out["coordinate_param"] = ToJson(coord_param_);
|
out["coordinate_param"] = ToJson(coord_param_);
|
||||||
}
|
}
|
||||||
@ -101,10 +101,9 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
|||||||
monitor_.Stop("LazyInitDevice");
|
monitor_.Stop("LazyInitDevice");
|
||||||
|
|
||||||
monitor_.Start("UpdateGpair");
|
monitor_.Start("UpdateGpair");
|
||||||
auto &in_gpair_host = in_gpair->ConstHostVector();
|
|
||||||
// Update gpair
|
// Update gpair
|
||||||
if (ctx_->gpu_id >= 0) {
|
if (ctx_->gpu_id >= 0) {
|
||||||
this->UpdateGpair(in_gpair_host);
|
this->UpdateGpair(in_gpair->ConstHostVector());
|
||||||
}
|
}
|
||||||
monitor_.Stop("UpdateGpair");
|
monitor_.Stop("UpdateGpair");
|
||||||
|
|
||||||
@ -249,5 +248,4 @@ XGBOOST_REGISTER_LINEAR_UPDATER(GPUCoordinateUpdater, "gpu_coord_descent")
|
|||||||
"Update linear model according to coordinate descent algorithm. GPU "
|
"Update linear model according to coordinate descent algorithm. GPU "
|
||||||
"accelerated.")
|
"accelerated.")
|
||||||
.set_body([]() { return new GPUCoordinateUpdater(); });
|
.set_body([]() { return new GPUCoordinateUpdater(); });
|
||||||
} // namespace linear
|
} // namespace xgboost::linear
|
||||||
} // namespace xgboost
|
|
||||||
|
|||||||
@ -268,6 +268,13 @@ class PseudoHuberRegression : public FitIntercept {
|
|||||||
}
|
}
|
||||||
FromJson(in["pseudo_huber_param"], ¶m_);
|
FromJson(in["pseudo_huber_param"], ¶m_);
|
||||||
}
|
}
|
||||||
|
[[nodiscard]] Json DefaultMetricConfig() const override {
|
||||||
|
CHECK(param_.GetInitialised());
|
||||||
|
Json config{Object{}};
|
||||||
|
config["name"] = String{this->DefaultEvalMetric()};
|
||||||
|
config["pseudo_huber_param"] = ToJson(param_);
|
||||||
|
return config;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
XGBOOST_REGISTER_OBJECTIVE(PseudoHuberRegression, "reg:pseudohubererror")
|
XGBOOST_REGISTER_OBJECTIVE(PseudoHuberRegression, "reg:pseudohubererror")
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*!
|
/**
|
||||||
* Copyright 2020-2022 by XGBoost Contributors
|
* Copyright 2020-2023, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include <algorithm> // std::max
|
#include <algorithm> // std::max
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -11,9 +11,7 @@
|
|||||||
#include "evaluate_splits.cuh"
|
#include "evaluate_splits.cuh"
|
||||||
#include "expand_entry.cuh"
|
#include "expand_entry.cuh"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::tree {
|
||||||
namespace tree {
|
|
||||||
|
|
||||||
// With constraints
|
// With constraints
|
||||||
XGBOOST_DEVICE float LossChangeMissing(const GradientPairInt64 &scan,
|
XGBOOST_DEVICE float LossChangeMissing(const GradientPairInt64 &scan,
|
||||||
const GradientPairInt64 &missing,
|
const GradientPairInt64 &missing,
|
||||||
@ -315,11 +313,11 @@ __device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inpu
|
|||||||
common::Span<common::CatBitField::value_type> out,
|
common::Span<common::CatBitField::value_type> out,
|
||||||
DeviceSplitCandidate *p_out_split) {
|
DeviceSplitCandidate *p_out_split) {
|
||||||
auto &out_split = *p_out_split;
|
auto &out_split = *p_out_split;
|
||||||
out_split.split_cats = common::CatBitField{out};
|
auto out_cats = common::CatBitField{out};
|
||||||
|
|
||||||
// Simple case for one hot split
|
// Simple case for one hot split
|
||||||
if (common::UseOneHot(shared_inputs.FeatureBins(fidx), shared_inputs.param.max_cat_to_onehot)) {
|
if (common::UseOneHot(shared_inputs.FeatureBins(fidx), shared_inputs.param.max_cat_to_onehot)) {
|
||||||
out_split.split_cats.Set(common::AsCat(out_split.thresh));
|
out_cats.Set(common::AsCat(out_split.thresh));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -339,7 +337,7 @@ __device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inpu
|
|||||||
assert(partition > 0 && "Invalid partition.");
|
assert(partition > 0 && "Invalid partition.");
|
||||||
thrust::for_each(thrust::seq, beg, beg + partition, [&](size_t c) {
|
thrust::for_each(thrust::seq, beg, beg + partition, [&](size_t c) {
|
||||||
auto cat = shared_inputs.feature_values[c - node_offset];
|
auto cat = shared_inputs.feature_values[c - node_offset];
|
||||||
out_split.SetCat(cat);
|
out_cats.Set(common::AsCat(cat));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -427,8 +425,7 @@ void GPUHistEvaluator::EvaluateSplits(
|
|||||||
|
|
||||||
if (split.is_cat) {
|
if (split.is_cat) {
|
||||||
SetCategoricalSplit(shared_inputs, d_sorted_idx, fidx, i,
|
SetCategoricalSplit(shared_inputs, d_sorted_idx, fidx, i,
|
||||||
device_cats_accessor.GetNodeCatStorage(input.nidx),
|
device_cats_accessor.GetNodeCatStorage(input.nidx), &out_splits[i]);
|
||||||
&out_splits[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
float base_weight =
|
float base_weight =
|
||||||
@ -460,6 +457,4 @@ GPUExpandEntry GPUHistEvaluator::EvaluateSingleSplit(
|
|||||||
cudaMemcpyDeviceToHost));
|
cudaMemcpyDeviceToHost));
|
||||||
return root_entry;
|
return root_entry;
|
||||||
}
|
}
|
||||||
|
} // namespace xgboost::tree
|
||||||
} // namespace tree
|
|
||||||
} // namespace xgboost
|
|
||||||
|
|||||||
@ -37,8 +37,8 @@ struct EvaluateSplitSharedInputs {
|
|||||||
common::Span<const float> feature_values;
|
common::Span<const float> feature_values;
|
||||||
common::Span<const float> min_fvalue;
|
common::Span<const float> min_fvalue;
|
||||||
bool is_dense;
|
bool is_dense;
|
||||||
XGBOOST_DEVICE auto Features() const { return feature_segments.size() - 1; }
|
[[nodiscard]] XGBOOST_DEVICE auto Features() const { return feature_segments.size() - 1; }
|
||||||
__device__ auto FeatureBins(bst_feature_t fidx) const {
|
[[nodiscard]] __device__ std::uint32_t FeatureBins(bst_feature_t fidx) const {
|
||||||
return feature_segments[fidx + 1] - feature_segments[fidx];
|
return feature_segments[fidx + 1] - feature_segments[fidx];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -102,7 +102,7 @@ class GPUHistEvaluator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Get device category storage of nidx for internal calculation.
|
* @brief Get device category storage of nidx for internal calculation.
|
||||||
*/
|
*/
|
||||||
auto DeviceCatStorage(const std::vector<bst_node_t> &nidx) {
|
auto DeviceCatStorage(const std::vector<bst_node_t> &nidx) {
|
||||||
if (!has_categoricals_) return CatAccessor{};
|
if (!has_categoricals_) return CatAccessor{};
|
||||||
@ -117,8 +117,8 @@ class GPUHistEvaluator {
|
|||||||
/**
|
/**
|
||||||
* \brief Get sorted index storage based on the left node of inputs.
|
* \brief Get sorted index storage based on the left node of inputs.
|
||||||
*/
|
*/
|
||||||
auto SortedIdx(int num_nodes, bst_feature_t total_bins) {
|
auto SortedIdx(int num_nodes, bst_bin_t total_bins) {
|
||||||
if(!need_sort_histogram_) return common::Span<bst_feature_t>();
|
if (!need_sort_histogram_) return common::Span<bst_feature_t>{};
|
||||||
cat_sorted_idx_.resize(num_nodes * total_bins);
|
cat_sorted_idx_.resize(num_nodes * total_bins);
|
||||||
return dh::ToSpan(cat_sorted_idx_);
|
return dh::ToSpan(cat_sorted_idx_);
|
||||||
}
|
}
|
||||||
@ -142,12 +142,22 @@ class GPUHistEvaluator {
|
|||||||
* \brief Get host category storage for nidx. Different from the internal version, this
|
* \brief Get host category storage for nidx. Different from the internal version, this
|
||||||
* returns strictly 1 node.
|
* returns strictly 1 node.
|
||||||
*/
|
*/
|
||||||
common::Span<CatST const> GetHostNodeCats(bst_node_t nidx) const {
|
[[nodiscard]] common::Span<CatST const> GetHostNodeCats(bst_node_t nidx) const {
|
||||||
copy_stream_.View().Sync();
|
copy_stream_.View().Sync();
|
||||||
auto cats_out = common::Span<CatST const>{h_split_cats_}.subspan(
|
auto cats_out = common::Span<CatST const>{h_split_cats_}.subspan(
|
||||||
nidx * node_categorical_storage_size_, node_categorical_storage_size_);
|
nidx * node_categorical_storage_size_, node_categorical_storage_size_);
|
||||||
return cats_out;
|
return cats_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] auto GetDeviceNodeCats(bst_node_t nidx) {
|
||||||
|
copy_stream_.View().Sync();
|
||||||
|
if (has_categoricals_) {
|
||||||
|
CatAccessor accessor = {dh::ToSpan(split_cats_), node_categorical_storage_size_};
|
||||||
|
return common::KCatBitField{accessor.GetNodeCatStorage(nidx)};
|
||||||
|
} else {
|
||||||
|
return common::KCatBitField{};
|
||||||
|
}
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* \brief Add a split to the internal tree evaluator.
|
* \brief Add a split to the internal tree evaluator.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -64,7 +64,6 @@ struct DeviceSplitCandidate {
|
|||||||
// split.
|
// split.
|
||||||
bst_cat_t thresh{-1};
|
bst_cat_t thresh{-1};
|
||||||
|
|
||||||
common::CatBitField split_cats;
|
|
||||||
bool is_cat { false };
|
bool is_cat { false };
|
||||||
|
|
||||||
GradientPairInt64 left_sum;
|
GradientPairInt64 left_sum;
|
||||||
@ -72,12 +71,6 @@ struct DeviceSplitCandidate {
|
|||||||
|
|
||||||
XGBOOST_DEVICE DeviceSplitCandidate() {} // NOLINT
|
XGBOOST_DEVICE DeviceSplitCandidate() {} // NOLINT
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
XGBOOST_DEVICE void SetCat(T c) {
|
|
||||||
this->split_cats.Set(common::AsCat(c));
|
|
||||||
fvalue = std::max(this->fvalue, static_cast<float>(c));
|
|
||||||
}
|
|
||||||
|
|
||||||
XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in, float fvalue_in,
|
XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in, float fvalue_in,
|
||||||
int findex_in, GradientPairInt64 left_sum_in,
|
int findex_in, GradientPairInt64 left_sum_in,
|
||||||
GradientPairInt64 right_sum_in, bool cat,
|
GradientPairInt64 right_sum_in, bool cat,
|
||||||
@ -100,7 +93,8 @@ struct DeviceSplitCandidate {
|
|||||||
*/
|
*/
|
||||||
XGBOOST_DEVICE void UpdateCat(float loss_chg_in, DefaultDirection dir_in, bst_cat_t thresh_in,
|
XGBOOST_DEVICE void UpdateCat(float loss_chg_in, DefaultDirection dir_in, bst_cat_t thresh_in,
|
||||||
bst_feature_t findex_in, GradientPairInt64 left_sum_in,
|
bst_feature_t findex_in, GradientPairInt64 left_sum_in,
|
||||||
GradientPairInt64 right_sum_in, GPUTrainingParam const& param, const GradientQuantiser& quantiser) {
|
GradientPairInt64 right_sum_in, GPUTrainingParam const& param,
|
||||||
|
const GradientQuantiser& quantiser) {
|
||||||
if (loss_chg_in > loss_chg &&
|
if (loss_chg_in > loss_chg &&
|
||||||
quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&
|
quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&
|
||||||
quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {
|
quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {
|
||||||
@ -115,7 +109,7 @@ struct DeviceSplitCandidate {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }
|
[[nodiscard]] XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }
|
||||||
|
|
||||||
friend std::ostream& operator<<(std::ostream& os, DeviceSplitCandidate const& c) {
|
friend std::ostream& operator<<(std::ostream& os, DeviceSplitCandidate const& c) {
|
||||||
os << "loss_chg:" << c.loss_chg << ", "
|
os << "loss_chg:" << c.loss_chg << ", "
|
||||||
|
|||||||
@ -7,9 +7,9 @@
|
|||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <limits>
|
#include <cstddef> // for size_t
|
||||||
#include <memory>
|
#include <memory> // for unique_ptr, make_unique
|
||||||
#include <utility>
|
#include <utility> // for move
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "../collective/communicator-inl.cuh"
|
#include "../collective/communicator-inl.cuh"
|
||||||
@ -216,9 +216,9 @@ struct GPUHistMakerDevice {
|
|||||||
void InitFeatureGroupsOnce() {
|
void InitFeatureGroupsOnce() {
|
||||||
if (!feature_groups) {
|
if (!feature_groups) {
|
||||||
CHECK(page);
|
CHECK(page);
|
||||||
feature_groups.reset(new FeatureGroups(page->Cuts(), page->is_dense,
|
feature_groups = std::make_unique<FeatureGroups>(page->Cuts(), page->is_dense,
|
||||||
dh::MaxSharedMemoryOptin(ctx_->gpu_id),
|
dh::MaxSharedMemoryOptin(ctx_->gpu_id),
|
||||||
sizeof(GradientPairPrecise)));
|
sizeof(GradientPairPrecise));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -244,10 +244,10 @@ struct GPUHistMakerDevice {
|
|||||||
|
|
||||||
this->evaluator_.Reset(page->Cuts(), feature_types, dmat->Info().num_col_, param, ctx_->gpu_id);
|
this->evaluator_.Reset(page->Cuts(), feature_types, dmat->Info().num_col_, param, ctx_->gpu_id);
|
||||||
|
|
||||||
quantiser.reset(new GradientQuantiser(this->gpair));
|
quantiser = std::make_unique<GradientQuantiser>(this->gpair);
|
||||||
|
|
||||||
row_partitioner.reset(); // Release the device memory first before reallocating
|
row_partitioner.reset(); // Release the device memory first before reallocating
|
||||||
row_partitioner.reset(new RowPartitioner(ctx_->gpu_id, sample.sample_rows));
|
row_partitioner = std::make_unique<RowPartitioner>(ctx_->gpu_id, sample.sample_rows);
|
||||||
|
|
||||||
// Init histogram
|
// Init histogram
|
||||||
hist.Init(ctx_->gpu_id, page->Cuts().TotalBins());
|
hist.Init(ctx_->gpu_id, page->Cuts().TotalBins());
|
||||||
@ -294,7 +294,7 @@ struct GPUHistMakerDevice {
|
|||||||
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
|
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
|
||||||
// Store the feature set ptrs so they dont go out of scope before the kernel is called
|
// Store the feature set ptrs so they dont go out of scope before the kernel is called
|
||||||
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
|
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
|
||||||
for (size_t i = 0; i < candidates.size(); i++) {
|
for (std::size_t i = 0; i < candidates.size(); i++) {
|
||||||
auto candidate = candidates.at(i);
|
auto candidate = candidates.at(i);
|
||||||
int left_nidx = tree[candidate.nid].LeftChild();
|
int left_nidx = tree[candidate.nid].LeftChild();
|
||||||
int right_nidx = tree[candidate.nid].RightChild();
|
int right_nidx = tree[candidate.nid].RightChild();
|
||||||
@ -327,9 +327,8 @@ struct GPUHistMakerDevice {
|
|||||||
d_node_inputs.data().get(), h_node_inputs.data(),
|
d_node_inputs.data().get(), h_node_inputs.data(),
|
||||||
h_node_inputs.size() * sizeof(EvaluateSplitInputs), cudaMemcpyDefault));
|
h_node_inputs.size() * sizeof(EvaluateSplitInputs), cudaMemcpyDefault));
|
||||||
|
|
||||||
this->evaluator_.EvaluateSplits(nidx, max_active_features,
|
this->evaluator_.EvaluateSplits(nidx, max_active_features, dh::ToSpan(d_node_inputs),
|
||||||
dh::ToSpan(d_node_inputs), shared_inputs,
|
shared_inputs, dh::ToSpan(entries));
|
||||||
dh::ToSpan(entries));
|
|
||||||
dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(),
|
dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(),
|
||||||
entries.data().get(), sizeof(GPUExpandEntry) * entries.size(),
|
entries.data().get(), sizeof(GPUExpandEntry) * entries.size(),
|
||||||
cudaMemcpyDeviceToHost));
|
cudaMemcpyDeviceToHost));
|
||||||
@ -366,23 +365,29 @@ struct GPUHistMakerDevice {
|
|||||||
struct NodeSplitData {
|
struct NodeSplitData {
|
||||||
RegTree::Node split_node;
|
RegTree::Node split_node;
|
||||||
FeatureType split_type;
|
FeatureType split_type;
|
||||||
common::CatBitField node_cats;
|
common::KCatBitField node_cats;
|
||||||
};
|
};
|
||||||
|
|
||||||
void UpdatePosition(const std::vector<GPUExpandEntry>& candidates, RegTree* p_tree) {
|
void UpdatePosition(std::vector<GPUExpandEntry> const& candidates, RegTree* p_tree) {
|
||||||
if (candidates.empty()) return;
|
if (candidates.empty()) {
|
||||||
std::vector<int> nidx(candidates.size());
|
return;
|
||||||
std::vector<int> left_nidx(candidates.size());
|
}
|
||||||
std::vector<int> right_nidx(candidates.size());
|
|
||||||
|
std::vector<bst_node_t> nidx(candidates.size());
|
||||||
|
std::vector<bst_node_t> left_nidx(candidates.size());
|
||||||
|
std::vector<bst_node_t> right_nidx(candidates.size());
|
||||||
std::vector<NodeSplitData> split_data(candidates.size());
|
std::vector<NodeSplitData> split_data(candidates.size());
|
||||||
|
|
||||||
for (size_t i = 0; i < candidates.size(); i++) {
|
for (size_t i = 0; i < candidates.size(); i++) {
|
||||||
auto& e = candidates[i];
|
auto const& e = candidates[i];
|
||||||
RegTree::Node split_node = (*p_tree)[e.nid];
|
RegTree::Node split_node = (*p_tree)[e.nid];
|
||||||
auto split_type = p_tree->NodeSplitType(e.nid);
|
auto split_type = p_tree->NodeSplitType(e.nid);
|
||||||
nidx.at(i) = e.nid;
|
nidx.at(i) = e.nid;
|
||||||
left_nidx.at(i) = split_node.LeftChild();
|
left_nidx.at(i) = split_node.LeftChild();
|
||||||
right_nidx.at(i) = split_node.RightChild();
|
right_nidx.at(i) = split_node.RightChild();
|
||||||
split_data.at(i) = NodeSplitData{split_node, split_type, e.split.split_cats};
|
split_data.at(i) = NodeSplitData{split_node, split_type, evaluator_.GetDeviceNodeCats(e.nid)};
|
||||||
|
|
||||||
|
CHECK_EQ(split_type == FeatureType::kCategorical, e.split.is_cat);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id);
|
auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id);
|
||||||
@ -390,7 +395,7 @@ struct GPUHistMakerDevice {
|
|||||||
nidx, left_nidx, right_nidx, split_data,
|
nidx, left_nidx, right_nidx, split_data,
|
||||||
[=] __device__(bst_uint ridx, const NodeSplitData& data) {
|
[=] __device__(bst_uint ridx, const NodeSplitData& data) {
|
||||||
// given a row index, returns the node id it belongs to
|
// given a row index, returns the node id it belongs to
|
||||||
bst_float cut_value = d_matrix.GetFvalue(ridx, data.split_node.SplitIndex());
|
float cut_value = d_matrix.GetFvalue(ridx, data.split_node.SplitIndex());
|
||||||
// Missing value
|
// Missing value
|
||||||
bool go_left = true;
|
bool go_left = true;
|
||||||
if (isnan(cut_value)) {
|
if (isnan(cut_value)) {
|
||||||
@ -620,7 +625,6 @@ struct GPUHistMakerDevice {
|
|||||||
CHECK(common::CheckNAN(candidate.split.fvalue));
|
CHECK(common::CheckNAN(candidate.split.fvalue));
|
||||||
std::vector<common::CatBitField::value_type> split_cats;
|
std::vector<common::CatBitField::value_type> split_cats;
|
||||||
|
|
||||||
CHECK_GT(candidate.split.split_cats.Bits().size(), 0);
|
|
||||||
auto h_cats = this->evaluator_.GetHostNodeCats(candidate.nid);
|
auto h_cats = this->evaluator_.GetHostNodeCats(candidate.nid);
|
||||||
auto n_bins_feature = page->Cuts().FeatureBins(candidate.split.findex);
|
auto n_bins_feature = page->Cuts().FeatureBins(candidate.split.findex);
|
||||||
split_cats.resize(common::CatBitField::ComputeStorageSize(n_bins_feature), 0);
|
split_cats.resize(common::CatBitField::ComputeStorageSize(n_bins_feature), 0);
|
||||||
|
|||||||
@ -8,13 +8,18 @@ echo "--- Build XGBoost JVM packages scala 2.12"
|
|||||||
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
||||||
${SPARK_VERSION}
|
${SPARK_VERSION}
|
||||||
|
|
||||||
|
echo "--- Stash XGBoost4J JARs (Scala 2.12)"
|
||||||
|
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
|
||||||
|
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
|
||||||
|
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
|
||||||
|
buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"
|
||||||
|
|
||||||
echo "--- Build XGBoost JVM packages scala 2.13"
|
echo "--- Build XGBoost JVM packages scala 2.13"
|
||||||
|
|
||||||
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
||||||
${SPARK_VERSION} "" "" "true"
|
${SPARK_VERSION} "" "" "true"
|
||||||
|
|
||||||
echo "--- Stash XGBoost4J JARs"
|
echo "--- Stash XGBoost4J JARs (Scala 2.13)"
|
||||||
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
|
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
|
||||||
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
|
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
|
||||||
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
|
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
|
||||||
|
|||||||
8
tests/buildkite/pipeline-mac-m1.yml
Normal file
8
tests/buildkite/pipeline-mac-m1.yml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
steps:
|
||||||
|
- block: ":rocket: Run this test job"
|
||||||
|
if: build.pull_request.id != null || build.branch =~ /^dependabot\//
|
||||||
|
- label: ":macos: Build and Test XGBoost for MacOS M1 with Clang 11"
|
||||||
|
command: "tests/buildkite/test-macos-m1-clang11.sh"
|
||||||
|
key: mac-m1-appleclang11
|
||||||
|
agents:
|
||||||
|
queue: mac-mini-m1
|
||||||
50
tests/buildkite/test-macos-m1-clang11.sh
Executable file
50
tests/buildkite/test-macos-m1-clang11.sh
Executable file
@ -0,0 +1,50 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
source tests/buildkite/conftest.sh
|
||||||
|
|
||||||
|
# Display system info
|
||||||
|
echo "--- Display system information"
|
||||||
|
set -x
|
||||||
|
system_profiler SPSoftwareDataType
|
||||||
|
sysctl -n machdep.cpu.brand_string
|
||||||
|
uname -m
|
||||||
|
set +x
|
||||||
|
|
||||||
|
# Build XGBoost4J binary
|
||||||
|
echo "--- Build libxgboost4j.dylib"
|
||||||
|
set -x
|
||||||
|
mkdir build
|
||||||
|
pushd build
|
||||||
|
export JAVA_HOME=$(/usr/libexec/java_home)
|
||||||
|
cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=OFF -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
|
||||||
|
ninja -v
|
||||||
|
popd
|
||||||
|
rm -rf build
|
||||||
|
set +x
|
||||||
|
|
||||||
|
echo "--- Upload Python wheel"
|
||||||
|
set -x
|
||||||
|
pushd lib
|
||||||
|
mv -v libxgboost4j.dylib libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
|
||||||
|
buildkite-agent artifact upload libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
|
||||||
|
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
|
||||||
|
then
|
||||||
|
aws s3 cp libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib \
|
||||||
|
s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
|
||||||
|
--acl public-read --no-progress
|
||||||
|
fi
|
||||||
|
popd
|
||||||
|
set +x
|
||||||
|
|
||||||
|
# Ensure that XGBoost can be built with Clang 11
|
||||||
|
echo "--- Build and Test XGBoost with MacOS M1, Clang 11"
|
||||||
|
set -x
|
||||||
|
LLVM11_PATH=$(brew --prefix llvm\@11)
|
||||||
|
mkdir build
|
||||||
|
pushd build
|
||||||
|
cmake .. -GNinja -DCMAKE_C_COMPILER=${LLVM11_PATH}/bin/clang \
|
||||||
|
-DCMAKE_CXX_COMPILER=${LLVM11_PATH}/bin/clang++ -DGOOGLE_TEST=ON \
|
||||||
|
-DUSE_DMLC_GTEST=ON
|
||||||
|
ninja -v
|
||||||
@ -1,5 +1,5 @@
|
|||||||
ARG CUDA_VERSION_ARG
|
ARG CUDA_VERSION_ARG
|
||||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
||||||
ARG CUDA_VERSION_ARG
|
ARG CUDA_VERSION_ARG
|
||||||
ARG NCCL_VERSION_ARG
|
ARG NCCL_VERSION_ARG
|
||||||
ARG RAPIDS_VERSION_ARG
|
ARG RAPIDS_VERSION_ARG
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
ARG CUDA_VERSION_ARG
|
ARG CUDA_VERSION_ARG
|
||||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
||||||
ARG CUDA_VERSION_ARG
|
ARG CUDA_VERSION_ARG
|
||||||
|
|
||||||
# Install all basic requirements
|
# Install all basic requirements
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
ARG CUDA_VERSION_ARG
|
ARG CUDA_VERSION_ARG
|
||||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
||||||
ARG CUDA_VERSION_ARG
|
ARG CUDA_VERSION_ARG
|
||||||
ARG NCCL_VERSION_ARG
|
ARG NCCL_VERSION_ARG
|
||||||
|
|
||||||
|
|||||||
@ -27,6 +27,9 @@ fi
|
|||||||
mvn_profile_string=""
|
mvn_profile_string=""
|
||||||
if [ "x$use_scala213" != "x" ]; then
|
if [ "x$use_scala213" != "x" ]; then
|
||||||
export mvn_profile_string="-Pdefault,scala-2.13"
|
export mvn_profile_string="-Pdefault,scala-2.13"
|
||||||
|
cd ..
|
||||||
|
python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
|
||||||
|
cd jvm-packages
|
||||||
fi
|
fi
|
||||||
|
|
||||||
mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options
|
mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options
|
||||||
|
|||||||
@ -32,11 +32,10 @@ dependencies:
|
|||||||
- jsonschema
|
- jsonschema
|
||||||
- boto3
|
- boto3
|
||||||
- awscli
|
- awscli
|
||||||
- py-ubjson
|
|
||||||
- cffi
|
- cffi
|
||||||
- pyarrow
|
- pyarrow
|
||||||
- pyspark>=3.4.0
|
- pyspark>=3.4.0
|
||||||
- cloudpickle
|
- cloudpickle
|
||||||
- pip:
|
- pip:
|
||||||
- sphinx_rtd_theme
|
- sphinx_rtd_theme
|
||||||
- datatable
|
- py-ubjson
|
||||||
|
|||||||
@ -27,6 +27,9 @@ rm -rf ../build/
|
|||||||
# Deploy to S3 bucket xgboost-maven-repo
|
# Deploy to S3 bucket xgboost-maven-repo
|
||||||
mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests
|
mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests
|
||||||
# Deploy scala 2.13 to S3 bucket xgboost-maven-repo
|
# Deploy scala 2.13 to S3 bucket xgboost-maven-repo
|
||||||
|
cd ..
|
||||||
|
python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
|
||||||
|
cd jvm-packages/
|
||||||
mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests
|
mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -21,9 +21,18 @@ if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# including maven profiles for different scala versions: 2.12 is the default at the moment.
|
# including maven profiles for different scala versions: 2.12 is the default at the moment.
|
||||||
for _maven_profile_string in "" "-Pdefault,scala-2.13"; do
|
for scala_binary_version in "2.12" "2.13"; do
|
||||||
|
cd ..
|
||||||
|
python dev/change_scala_version.py --scala-version ${scala_binary_version}
|
||||||
|
cd jvm-packages
|
||||||
scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
|
scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
|
||||||
scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout)
|
if [[ "$scala_binary_version" == "2.12" ]]; then
|
||||||
|
_maven_profile_string=""
|
||||||
|
elif [[ "$scala_binary_version" == "2.13" ]]; then
|
||||||
|
_maven_profile_string="-Pdefault,scala-2.13"
|
||||||
|
else
|
||||||
|
echo "Unexpected scala version: $scala_version ($scala_binary_version)."
|
||||||
|
fi
|
||||||
|
|
||||||
# Install XGBoost4J JAR into local Maven repository
|
# Install XGBoost4J JAR into local Maven repository
|
||||||
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
|
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
|
||||||
|
|||||||
@ -148,7 +148,8 @@ TEST(IO, Resource) {
|
|||||||
fout << 1.0 << std::endl;
|
fout << 1.0 << std::endl;
|
||||||
fout.close();
|
fout.close();
|
||||||
|
|
||||||
auto resource = std::make_shared<MmapResource>(path, 0, sizeof(double));
|
auto resource = std::shared_ptr<MmapResource>{
|
||||||
|
new MmapResource{path, 0, sizeof(double)}};
|
||||||
ASSERT_EQ(resource->Size(), sizeof(double));
|
ASSERT_EQ(resource->Size(), sizeof(double));
|
||||||
ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);
|
ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);
|
||||||
ASSERT_EQ(resource->DataAs<double>()[0], val);
|
ASSERT_EQ(resource->DataAs<double>()[0], val);
|
||||||
|
|||||||
42
tests/cpp/gbm/test_gblinear.cu
Normal file
42
tests/cpp/gbm/test_gblinear.cu
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2023, XGBoost Contributors
|
||||||
|
*/
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <xgboost/global_config.h> // for GlobalConfigThreadLocalStore
|
||||||
|
#include <xgboost/json.h> // for Json, Object
|
||||||
|
#include <xgboost/learner.h> // for Learner
|
||||||
|
|
||||||
|
#include <algorithm> // for transform
|
||||||
|
#include <string> // for string
|
||||||
|
#include <utility> // for swap
|
||||||
|
|
||||||
|
#include "../helpers.h" // for RandomDataGenerator
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
TEST(GBlinear, DispatchUpdater) {
|
||||||
|
auto verbosity = 3;
|
||||||
|
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
|
||||||
|
|
||||||
|
auto test = [](std::string device) {
|
||||||
|
auto p_fmat = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix(true);
|
||||||
|
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||||
|
learner->SetParams(
|
||||||
|
Args{{"booster", "gblinear"}, {"updater", "coord_descent"}, {"device", device}});
|
||||||
|
learner->Configure();
|
||||||
|
for (std::int32_t iter = 0; iter < 3; ++iter) {
|
||||||
|
learner->UpdateOneIter(iter, p_fmat);
|
||||||
|
}
|
||||||
|
Json config{Object{}};
|
||||||
|
::testing::internal::CaptureStderr();
|
||||||
|
learner->SaveConfig(&config);
|
||||||
|
auto str = ::testing::internal::GetCapturedStderr();
|
||||||
|
std::transform(device.cbegin(), device.cend(), device.begin(),
|
||||||
|
[](char c) { return std::toupper(c); });
|
||||||
|
ASSERT_NE(str.find(device), std::string::npos);
|
||||||
|
};
|
||||||
|
test("cpu");
|
||||||
|
test("gpu");
|
||||||
|
|
||||||
|
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
|
||||||
|
}
|
||||||
|
} // namespace xgboost
|
||||||
@ -58,21 +58,6 @@ void TestInplaceFallback(Context const* ctx) {
|
|||||||
HostDeviceVector<float>* out_predt{nullptr};
|
HostDeviceVector<float>* out_predt{nullptr};
|
||||||
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
|
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
|
||||||
std::string output;
|
std::string output;
|
||||||
// test whether the warning is raised
|
|
||||||
#if !defined(_WIN32)
|
|
||||||
// Windows has issue with CUDA and thread local storage. For some reason, on Windows a
|
|
||||||
// cudaInitializationError is raised during destruction of `HostDeviceVector`. This
|
|
||||||
// might be related to https://github.com/dmlc/xgboost/issues/5793
|
|
||||||
::testing::internal::CaptureStderr();
|
|
||||||
std::thread{[&] {
|
|
||||||
// Launch a new thread to ensure a warning is raised as we prevent over-verbose
|
|
||||||
// warning by using thread-local flags.
|
|
||||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
|
||||||
&out_predt, 0, 0);
|
|
||||||
}}.join();
|
|
||||||
output = testing::internal::GetCapturedStderr();
|
|
||||||
ASSERT_NE(output.find("Falling back"), std::string::npos);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
||||||
&out_predt, 0, 0);
|
&out_predt, 0, 0);
|
||||||
|
|||||||
@ -6,6 +6,7 @@
|
|||||||
#include <xgboost/objective.h>
|
#include <xgboost/objective.h>
|
||||||
|
|
||||||
#include "../helpers.h"
|
#include "../helpers.h"
|
||||||
|
#include "../objective_helpers.h"
|
||||||
|
|
||||||
TEST(Objective, UnknownFunction) {
|
TEST(Objective, UnknownFunction) {
|
||||||
xgboost::ObjFunction* obj = nullptr;
|
xgboost::ObjFunction* obj = nullptr;
|
||||||
@ -43,4 +44,61 @@ TEST(Objective, PredTransform) {
|
|||||||
ASSERT_TRUE(predts.HostCanWrite());
|
ASSERT_TRUE(predts.HostCanWrite());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class TestDefaultObjConfig : public ::testing::TestWithParam<std::string> {
|
||||||
|
Context ctx_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
void Run(std::string objective) {
|
||||||
|
auto Xy = MakeFmatForObjTest(objective);
|
||||||
|
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||||
|
std::unique_ptr<ObjFunction> objfn{ObjFunction::Create(objective, &ctx_)};
|
||||||
|
|
||||||
|
learner->SetParam("objective", objective);
|
||||||
|
if (objective.find("multi") != std::string::npos) {
|
||||||
|
learner->SetParam("num_class", "3");
|
||||||
|
objfn->Configure(Args{{"num_class", "3"}});
|
||||||
|
} else if (objective.find("quantile") != std::string::npos) {
|
||||||
|
learner->SetParam("quantile_alpha", "0.5");
|
||||||
|
objfn->Configure(Args{{"quantile_alpha", "0.5"}});
|
||||||
|
} else {
|
||||||
|
objfn->Configure(Args{});
|
||||||
|
}
|
||||||
|
learner->Configure();
|
||||||
|
learner->UpdateOneIter(0, Xy);
|
||||||
|
learner->EvalOneIter(0, {Xy}, {"train"});
|
||||||
|
Json config{Object{}};
|
||||||
|
learner->SaveConfig(&config);
|
||||||
|
auto jobj = get<Object const>(config["learner"]["objective"]);
|
||||||
|
|
||||||
|
ASSERT_TRUE(jobj.find("name") != jobj.cend());
|
||||||
|
// FIXME(jiamingy): We should have the following check, but some legacy parameter like
|
||||||
|
// "pos_weight", "delta_step" in objectives are not in metrics.
|
||||||
|
|
||||||
|
// if (jobj.size() > 1) {
|
||||||
|
// ASSERT_FALSE(IsA<Null>(objfn->DefaultMetricConfig()));
|
||||||
|
// }
|
||||||
|
auto mconfig = objfn->DefaultMetricConfig();
|
||||||
|
if (!IsA<Null>(mconfig)) {
|
||||||
|
// make sure metric can handle it
|
||||||
|
std::unique_ptr<Metric> metricfn{Metric::Create(get<String const>(mconfig["name"]), &ctx_)};
|
||||||
|
metricfn->LoadConfig(mconfig);
|
||||||
|
Json loaded(Object{});
|
||||||
|
metricfn->SaveConfig(&loaded);
|
||||||
|
metricfn->Configure(Args{});
|
||||||
|
ASSERT_EQ(mconfig, loaded);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(TestDefaultObjConfig, Objective) {
|
||||||
|
std::string objective = GetParam();
|
||||||
|
this->Run(objective);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(Objective, TestDefaultObjConfig,
|
||||||
|
::testing::ValuesIn(MakeObjNamesForTest()),
|
||||||
|
[](const ::testing::TestParamInfo<TestDefaultObjConfig::ParamType>& info) {
|
||||||
|
return ObjTestNameGenerator(info);
|
||||||
|
});
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
31
tests/cpp/objective_helpers.cc
Normal file
31
tests/cpp/objective_helpers.cc
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2023, XGBoost contributors
|
||||||
|
*/
|
||||||
|
#include "objective_helpers.h"
|
||||||
|
|
||||||
|
#include "../../src/common/linalg_op.h" // for begin, end
|
||||||
|
#include "helpers.h" // for RandomDataGenerator
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj) {
|
||||||
|
auto constexpr kRows = 10, kCols = 10;
|
||||||
|
auto p_fmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
||||||
|
auto& h_upper = p_fmat->Info().labels_upper_bound_.HostVector();
|
||||||
|
auto& h_lower = p_fmat->Info().labels_lower_bound_.HostVector();
|
||||||
|
h_lower.resize(kRows);
|
||||||
|
h_upper.resize(kRows);
|
||||||
|
for (size_t i = 0; i < kRows; ++i) {
|
||||||
|
h_lower[i] = 1;
|
||||||
|
h_upper[i] = 10;
|
||||||
|
}
|
||||||
|
if (obj.find("rank:") != std::string::npos) {
|
||||||
|
auto h_label = p_fmat->Info().labels.HostView();
|
||||||
|
std::size_t k = 0;
|
||||||
|
for (auto& v : h_label) {
|
||||||
|
v = k % 2 == 0;
|
||||||
|
++k;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return p_fmat;
|
||||||
|
};
|
||||||
|
} // namespace xgboost
|
||||||
@ -1,6 +1,8 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright (c) 2023, XGBoost contributors
|
* Copyright (c) 2023, XGBoost contributors
|
||||||
*/
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
#include <dmlc/registry.h> // for Registry
|
#include <dmlc/registry.h> // for Registry
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <xgboost/objective.h> // for ObjFunctionReg
|
#include <xgboost/objective.h> // for ObjFunctionReg
|
||||||
@ -29,4 +31,6 @@ inline std::string ObjTestNameGenerator(const ::testing::TestParamInfo<ParamType
|
|||||||
}
|
}
|
||||||
return name;
|
return name;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj);
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -655,33 +655,11 @@ TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }
|
|||||||
TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); }
|
TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); }
|
||||||
|
|
||||||
class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
||||||
static auto MakeFmat(std::string const& obj) {
|
|
||||||
auto constexpr kRows = 10, kCols = 10;
|
|
||||||
auto p_fmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
|
||||||
auto& h_upper = p_fmat->Info().labels_upper_bound_.HostVector();
|
|
||||||
auto& h_lower = p_fmat->Info().labels_lower_bound_.HostVector();
|
|
||||||
h_lower.resize(kRows);
|
|
||||||
h_upper.resize(kRows);
|
|
||||||
for (size_t i = 0; i < kRows; ++i) {
|
|
||||||
h_lower[i] = 1;
|
|
||||||
h_upper[i] = 10;
|
|
||||||
}
|
|
||||||
if (obj.find("rank:") != std::string::npos) {
|
|
||||||
auto h_label = p_fmat->Info().labels.HostView();
|
|
||||||
std::size_t k = 0;
|
|
||||||
for (auto& v : h_label) {
|
|
||||||
v = k % 2 == 0;
|
|
||||||
++k;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return p_fmat;
|
|
||||||
};
|
|
||||||
|
|
||||||
void TestBaseScore(std::string objective, float expected_base_score, Json expected_model) {
|
void TestBaseScore(std::string objective, float expected_base_score, Json expected_model) {
|
||||||
auto const world_size = collective::GetWorldSize();
|
auto const world_size = collective::GetWorldSize();
|
||||||
auto const rank = collective::GetRank();
|
auto const rank = collective::GetRank();
|
||||||
|
|
||||||
auto p_fmat = MakeFmat(objective);
|
auto p_fmat = MakeFmatForObjTest(objective);
|
||||||
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
|
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
|
||||||
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
||||||
learner->SetParam("tree_method", "approx");
|
learner->SetParam("tree_method", "approx");
|
||||||
@ -705,7 +683,7 @@ class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
void Run(std::string objective) {
|
void Run(std::string objective) {
|
||||||
auto p_fmat = MakeFmat(objective);
|
auto p_fmat = MakeFmatForObjTest(objective);
|
||||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||||
learner->SetParam("tree_method", "approx");
|
learner->SetParam("tree_method", "approx");
|
||||||
learner->SetParam("objective", objective);
|
learner->SetParam("objective", objective);
|
||||||
|
|||||||
@ -191,14 +191,32 @@ class TestGPUPredict:
|
|||||||
np.testing.assert_allclose(predt_0, predt_3)
|
np.testing.assert_allclose(predt_0, predt_3)
|
||||||
np.testing.assert_allclose(predt_0, predt_4)
|
np.testing.assert_allclose(predt_0, predt_4)
|
||||||
|
|
||||||
def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
|
def run_inplace_base_margin(
|
||||||
|
self, device: int, booster: xgb.Booster, dtrain: xgb.DMatrix, X, base_margin
|
||||||
|
) -> None:
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
|
|
||||||
|
booster.set_param({"device": f"cuda:{device}"})
|
||||||
dtrain.set_info(base_margin=base_margin)
|
dtrain.set_info(base_margin=base_margin)
|
||||||
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
|
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
|
||||||
from_dmatrix = booster.predict(dtrain)
|
from_dmatrix = booster.predict(dtrain)
|
||||||
cp.testing.assert_allclose(from_inplace, from_dmatrix)
|
cp.testing.assert_allclose(from_inplace, from_dmatrix)
|
||||||
|
|
||||||
|
booster = booster.copy() # clear prediction cache.
|
||||||
|
booster.set_param({"device": "cpu"})
|
||||||
|
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
|
||||||
|
from_dmatrix = booster.predict(dtrain)
|
||||||
|
cp.testing.assert_allclose(from_inplace, from_dmatrix)
|
||||||
|
|
||||||
|
booster = booster.copy() # clear prediction cache.
|
||||||
|
base_margin = cp.asnumpy(base_margin)
|
||||||
|
if hasattr(X, "values"):
|
||||||
|
X = cp.asnumpy(X.values)
|
||||||
|
booster.set_param({"device": f"cuda:{device}"})
|
||||||
|
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
|
||||||
|
from_dmatrix = booster.predict(dtrain)
|
||||||
|
cp.testing.assert_allclose(from_inplace, from_dmatrix, rtol=1e-6)
|
||||||
|
|
||||||
def run_inplace_predict_cupy(self, device: int) -> None:
|
def run_inplace_predict_cupy(self, device: int) -> None:
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
|
|
||||||
@ -244,7 +262,7 @@ class TestGPUPredict:
|
|||||||
run_threaded_predict(X, rows, predict_dense)
|
run_threaded_predict(X, rows, predict_dense)
|
||||||
|
|
||||||
base_margin = cp_rng.randn(rows)
|
base_margin = cp_rng.randn(rows)
|
||||||
self.run_inplace_base_margin(booster, dtrain, X, base_margin)
|
self.run_inplace_base_margin(device, booster, dtrain, X, base_margin)
|
||||||
|
|
||||||
# Create a wide dataset
|
# Create a wide dataset
|
||||||
X = cp_rng.randn(100, 10000)
|
X = cp_rng.randn(100, 10000)
|
||||||
@ -318,7 +336,7 @@ class TestGPUPredict:
|
|||||||
run_threaded_predict(X, rows, predict_df)
|
run_threaded_predict(X, rows, predict_df)
|
||||||
|
|
||||||
base_margin = cudf.Series(rng.randn(rows))
|
base_margin = cudf.Series(rng.randn(rows))
|
||||||
self.run_inplace_base_margin(booster, dtrain, X, base_margin)
|
self.run_inplace_base_margin(0, booster, dtrain, X, base_margin)
|
||||||
|
|
||||||
@given(
|
@given(
|
||||||
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
|
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
|
||||||
|
|||||||
@ -9,7 +9,7 @@ import pytest
|
|||||||
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
from xgboost import testing as tm
|
from xgboost import testing as tm
|
||||||
from xgboost.testing.ranking import run_ranking_qid_df
|
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
|
||||||
|
|
||||||
sys.path.append("tests/python")
|
sys.path.append("tests/python")
|
||||||
import test_with_sklearn as twskl # noqa
|
import test_with_sklearn as twskl # noqa
|
||||||
@ -165,6 +165,11 @@ def test_ranking_qid_df():
|
|||||||
run_ranking_qid_df(cudf, "gpu_hist")
|
run_ranking_qid_df(cudf, "gpu_hist")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
|
def test_ranking_categorical() -> None:
|
||||||
|
run_ranking_categorical(device="cuda")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_cupy())
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@pytest.mark.mgpu
|
@pytest.mark.mgpu
|
||||||
def test_device_ordinal() -> None:
|
def test_device_ordinal() -> None:
|
||||||
|
|||||||
@ -211,7 +211,7 @@ class TestPandas:
|
|||||||
y = np.random.randn(kRows)
|
y = np.random.randn(kRows)
|
||||||
w = np.random.uniform(size=kRows).astype(np.float32)
|
w = np.random.uniform(size=kRows).astype(np.float32)
|
||||||
w_pd = pd.DataFrame(w)
|
w_pd = pd.DataFrame(w)
|
||||||
data = xgb.DMatrix(X, y, w_pd)
|
data = xgb.DMatrix(X, y, weight=w_pd)
|
||||||
|
|
||||||
assert data.num_row() == kRows
|
assert data.num_row() == kRows
|
||||||
assert data.num_col() == kCols
|
assert data.num_col() == kCols
|
||||||
@ -301,14 +301,14 @@ class TestPandas:
|
|||||||
|
|
||||||
@pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix])
|
@pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix])
|
||||||
def test_nullable_type(self, DMatrixT) -> None:
|
def test_nullable_type(self, DMatrixT) -> None:
|
||||||
from pandas.api.types import is_categorical_dtype
|
from xgboost.data import is_pd_cat_dtype
|
||||||
|
|
||||||
for orig, df in pd_dtypes():
|
for orig, df in pd_dtypes():
|
||||||
if hasattr(df.dtypes, "__iter__"):
|
if hasattr(df.dtypes, "__iter__"):
|
||||||
enable_categorical = any(is_categorical_dtype for dtype in df.dtypes)
|
enable_categorical = any(is_pd_cat_dtype(dtype) for dtype in df.dtypes)
|
||||||
else:
|
else:
|
||||||
# series
|
# series
|
||||||
enable_categorical = is_categorical_dtype(df.dtype)
|
enable_categorical = is_pd_cat_dtype(df.dtype)
|
||||||
|
|
||||||
f0_orig = orig[orig.columns[0]] if isinstance(orig, pd.DataFrame) else orig
|
f0_orig = orig[orig.columns[0]] if isinstance(orig, pd.DataFrame) else orig
|
||||||
f0 = df[df.columns[0]] if isinstance(df, pd.DataFrame) else df
|
f0 = df[df.columns[0]] if isinstance(df, pd.DataFrame) else df
|
||||||
|
|||||||
@ -12,7 +12,7 @@ from sklearn.utils.estimator_checks import parametrize_with_checks
|
|||||||
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
from xgboost import testing as tm
|
from xgboost import testing as tm
|
||||||
from xgboost.testing.ranking import run_ranking_qid_df
|
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
|
||||||
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
|
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
|
||||||
from xgboost.testing.updater import get_basescore
|
from xgboost.testing.updater import get_basescore
|
||||||
|
|
||||||
@ -173,6 +173,11 @@ def test_ranking():
|
|||||||
np.testing.assert_almost_equal(pred, pred_orig)
|
np.testing.assert_almost_equal(pred, pred_orig)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
|
def test_ranking_categorical() -> None:
|
||||||
|
run_ranking_categorical(device="cpu")
|
||||||
|
|
||||||
|
|
||||||
def test_ranking_metric() -> None:
|
def test_ranking_metric() -> None:
|
||||||
from sklearn.metrics import roc_auc_score
|
from sklearn.metrics import roc_auc_score
|
||||||
|
|
||||||
@ -935,6 +940,7 @@ def save_load_model(model_path):
|
|||||||
predt_0 = clf.predict(X)
|
predt_0 = clf.predict(X)
|
||||||
clf.save_model(model_path)
|
clf.save_model(model_path)
|
||||||
clf.load_model(model_path)
|
clf.load_model(model_path)
|
||||||
|
assert clf.booster == "gblinear"
|
||||||
predt_1 = clf.predict(X)
|
predt_1 = clf.predict(X)
|
||||||
np.testing.assert_allclose(predt_0, predt_1)
|
np.testing.assert_allclose(predt_0, predt_1)
|
||||||
assert clf.best_iteration == best_iteration
|
assert clf.best_iteration == best_iteration
|
||||||
@ -950,25 +956,26 @@ def save_load_model(model_path):
|
|||||||
|
|
||||||
def test_save_load_model():
|
def test_save_load_model():
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
model_path = os.path.join(tempdir, 'digits.model')
|
model_path = os.path.join(tempdir, "digits.model")
|
||||||
save_load_model(model_path)
|
save_load_model(model_path)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
model_path = os.path.join(tempdir, 'digits.model.json')
|
model_path = os.path.join(tempdir, "digits.model.json")
|
||||||
save_load_model(model_path)
|
save_load_model(model_path)
|
||||||
|
|
||||||
from sklearn.datasets import load_digits
|
from sklearn.datasets import load_digits
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
model_path = os.path.join(tempdir, 'digits.model.ubj')
|
model_path = os.path.join(tempdir, "digits.model.ubj")
|
||||||
digits = load_digits(n_class=2)
|
digits = load_digits(n_class=2)
|
||||||
y = digits['target']
|
y = digits["target"]
|
||||||
X = digits['data']
|
X = digits["data"]
|
||||||
booster = xgb.train({'tree_method': 'hist',
|
booster = xgb.train(
|
||||||
'objective': 'binary:logistic'},
|
{"tree_method": "hist", "objective": "binary:logistic"},
|
||||||
dtrain=xgb.DMatrix(X, y),
|
dtrain=xgb.DMatrix(X, y),
|
||||||
num_boost_round=4)
|
num_boost_round=4,
|
||||||
|
)
|
||||||
predt_0 = booster.predict(xgb.DMatrix(X))
|
predt_0 = booster.predict(xgb.DMatrix(X))
|
||||||
booster.save_model(model_path)
|
booster.save_model(model_path)
|
||||||
cls = xgb.XGBClassifier()
|
cls = xgb.XGBClassifier()
|
||||||
@ -1002,6 +1009,8 @@ def test_save_load_model():
|
|||||||
clf = xgb.XGBClassifier()
|
clf = xgb.XGBClassifier()
|
||||||
clf.load_model(model_path)
|
clf.load_model(model_path)
|
||||||
assert clf.classes_.size == 10
|
assert clf.classes_.size == 10
|
||||||
|
assert clf.objective == "multi:softprob"
|
||||||
|
|
||||||
np.testing.assert_equal(clf.classes_, np.arange(10))
|
np.testing.assert_equal(clf.classes_, np.arange(10))
|
||||||
assert clf.n_classes_ == 10
|
assert clf.n_classes_ == 10
|
||||||
|
|
||||||
|
|||||||
@ -2,6 +2,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
import sklearn
|
import sklearn
|
||||||
|
|
||||||
@ -13,7 +14,7 @@ from pyspark.ml.linalg import Vectors
|
|||||||
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
|
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
|
||||||
from pyspark.sql import SparkSession
|
from pyspark.sql import SparkSession
|
||||||
|
|
||||||
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
|
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor, SparkXGBRegressorModel
|
||||||
|
|
||||||
gpu_discovery_script_path = "tests/test_distributed/test_gpu_with_spark/discover_gpu.sh"
|
gpu_discovery_script_path = "tests/test_distributed/test_gpu_with_spark/discover_gpu.sh"
|
||||||
|
|
||||||
@ -242,3 +243,33 @@ def test_sparkxgb_regressor_feature_cols_with_gpu(spark_diabetes_dataset_feature
|
|||||||
evaluator = RegressionEvaluator(metricName="rmse")
|
evaluator = RegressionEvaluator(metricName="rmse")
|
||||||
rmse = evaluator.evaluate(pred_result_df)
|
rmse = evaluator.evaluate(pred_result_df)
|
||||||
assert rmse <= 65.0
|
assert rmse <= 65.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_gpu_transform(spark_diabetes_dataset) -> None:
|
||||||
|
regressor = SparkXGBRegressor(device="cuda", num_workers=num_workers)
|
||||||
|
train_df, test_df = spark_diabetes_dataset
|
||||||
|
model: SparkXGBRegressorModel = regressor.fit(train_df)
|
||||||
|
|
||||||
|
# The model trained with GPUs, and transform with GPU configurations.
|
||||||
|
assert model._gpu_transform()
|
||||||
|
|
||||||
|
model.set_device("cpu")
|
||||||
|
assert not model._gpu_transform()
|
||||||
|
# without error
|
||||||
|
cpu_rows = model.transform(test_df).select("prediction").collect()
|
||||||
|
|
||||||
|
regressor = SparkXGBRegressor(device="cpu", num_workers=num_workers)
|
||||||
|
model = regressor.fit(train_df)
|
||||||
|
|
||||||
|
# The model trained with CPUs. Even with GPU configurations,
|
||||||
|
# still prefer transforming with CPUs
|
||||||
|
assert not model._gpu_transform()
|
||||||
|
|
||||||
|
# Set gpu transform explicitly.
|
||||||
|
model.set_device("cuda")
|
||||||
|
assert model._gpu_transform()
|
||||||
|
# without error
|
||||||
|
gpu_rows = model.transform(test_df).select("prediction").collect()
|
||||||
|
|
||||||
|
for cpu, gpu in zip(cpu_rows, gpu_rows):
|
||||||
|
np.testing.assert_allclose(cpu.prediction, gpu.prediction, atol=1e-3)
|
||||||
|
|||||||
@ -1932,6 +1932,7 @@ class TestWithDask:
|
|||||||
cls.client = client
|
cls.client = client
|
||||||
cls.fit(X, y)
|
cls.fit(X, y)
|
||||||
predt_0 = cls.predict(X)
|
predt_0 = cls.predict(X)
|
||||||
|
proba_0 = cls.predict_proba(X)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
path = os.path.join(tmpdir, "model.pkl")
|
path = os.path.join(tmpdir, "model.pkl")
|
||||||
@ -1941,7 +1942,9 @@ class TestWithDask:
|
|||||||
with open(path, "rb") as fd:
|
with open(path, "rb") as fd:
|
||||||
cls = pickle.load(fd)
|
cls = pickle.load(fd)
|
||||||
predt_1 = cls.predict(X)
|
predt_1 = cls.predict(X)
|
||||||
|
proba_1 = cls.predict_proba(X)
|
||||||
np.testing.assert_allclose(predt_0.compute(), predt_1.compute())
|
np.testing.assert_allclose(predt_0.compute(), predt_1.compute())
|
||||||
|
np.testing.assert_allclose(proba_0.compute(), proba_1.compute())
|
||||||
|
|
||||||
path = os.path.join(tmpdir, "cls.json")
|
path = os.path.join(tmpdir, "cls.json")
|
||||||
cls.save_model(path)
|
cls.save_model(path)
|
||||||
@ -1950,16 +1953,20 @@ class TestWithDask:
|
|||||||
cls.load_model(path)
|
cls.load_model(path)
|
||||||
assert cls.n_classes_ == 10
|
assert cls.n_classes_ == 10
|
||||||
predt_2 = cls.predict(X)
|
predt_2 = cls.predict(X)
|
||||||
|
proba_2 = cls.predict_proba(X)
|
||||||
|
|
||||||
np.testing.assert_allclose(predt_0.compute(), predt_2.compute())
|
np.testing.assert_allclose(predt_0.compute(), predt_2.compute())
|
||||||
|
np.testing.assert_allclose(proba_0.compute(), proba_2.compute())
|
||||||
|
|
||||||
# Use single node to load
|
# Use single node to load
|
||||||
cls = xgb.XGBClassifier()
|
cls = xgb.XGBClassifier()
|
||||||
cls.load_model(path)
|
cls.load_model(path)
|
||||||
assert cls.n_classes_ == 10
|
assert cls.n_classes_ == 10
|
||||||
predt_3 = cls.predict(X_)
|
predt_3 = cls.predict(X_)
|
||||||
|
proba_3 = cls.predict_proba(X_)
|
||||||
|
|
||||||
np.testing.assert_allclose(predt_0.compute(), predt_3)
|
np.testing.assert_allclose(predt_0.compute(), predt_3)
|
||||||
|
np.testing.assert_allclose(proba_0.compute(), proba_3)
|
||||||
|
|
||||||
|
|
||||||
def test_dask_unsupported_features(client: "Client") -> None:
|
def test_dask_unsupported_features(client: "Client") -> None:
|
||||||
|
|||||||
@ -888,6 +888,34 @@ class TestPySparkLocal:
|
|||||||
clf = SparkXGBClassifier(device="cuda")
|
clf = SparkXGBClassifier(device="cuda")
|
||||||
clf._validate_params()
|
clf._validate_params()
|
||||||
|
|
||||||
|
def test_gpu_transform(self, clf_data: ClfData) -> None:
|
||||||
|
"""local mode"""
|
||||||
|
classifier = SparkXGBClassifier(device="cpu")
|
||||||
|
model: SparkXGBClassifierModel = classifier.fit(clf_data.cls_df_train)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
path = "file:" + tmpdir
|
||||||
|
model.write().overwrite().save(path)
|
||||||
|
|
||||||
|
# The model trained with CPU, transform defaults to cpu
|
||||||
|
assert not model._gpu_transform()
|
||||||
|
|
||||||
|
# without error
|
||||||
|
model.transform(clf_data.cls_df_test).collect()
|
||||||
|
|
||||||
|
model.set_device("cuda")
|
||||||
|
assert model._gpu_transform()
|
||||||
|
|
||||||
|
model_loaded = SparkXGBClassifierModel.load(path)
|
||||||
|
|
||||||
|
# The model trained with CPU, transform defaults to cpu
|
||||||
|
assert not model_loaded._gpu_transform()
|
||||||
|
# without error
|
||||||
|
model_loaded.transform(clf_data.cls_df_test).collect()
|
||||||
|
|
||||||
|
model_loaded.set_device("cuda")
|
||||||
|
assert model_loaded._gpu_transform()
|
||||||
|
|
||||||
|
|
||||||
class XgboostLocalTest(SparkTestCase):
|
class XgboostLocalTest(SparkTestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user