Compare commits
211 Commits
master-roc
...
release_2.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11afdc957e | ||
|
|
56d2821bb9 | ||
|
|
dc7ee041cc | ||
|
|
7dc152450e | ||
|
|
41ce8f28b2 | ||
|
|
0ffc52e05c | ||
|
|
82d81bca94 | ||
|
|
6ec5cf26fc | ||
|
|
1ec57fd1a3 | ||
|
|
d0774a78e4 | ||
|
|
8d160a206e | ||
|
|
a41bc0975c | ||
|
|
782b73f2bb | ||
|
|
a408254c2f | ||
|
|
22e891dafa | ||
|
|
89530c80a7 | ||
|
|
946ab53b57 | ||
|
|
afd03a6934 | ||
|
|
f7da938458 | ||
|
|
6ab6577511 | ||
|
|
8c57558d74 | ||
|
|
58aa98a796 | ||
|
|
92273b39d8 | ||
|
|
e824b18bf6 | ||
|
|
66ee89d8b4 | ||
|
|
54d1d72d01 | ||
|
|
032bcc57f9 | ||
|
|
ace7713201 | ||
|
|
096047c547 | ||
|
|
e75dd75bb2 | ||
|
|
4d387cbfbf | ||
|
|
3fde9361d7 | ||
|
|
b67c2ed96d | ||
|
|
177fd79864 | ||
|
|
06487d3896 | ||
|
|
e50ccc4d3c | ||
|
|
add57f8880 | ||
|
|
a0d3573c74 | ||
|
|
4301558a57 | ||
|
|
2e7e9d3b2d | ||
|
|
3e0c7d1dee | ||
|
|
2f47a1ebe6 | ||
|
|
5ca7daaa13 | ||
|
|
5f78360949 | ||
|
|
35cde3b1b2 | ||
|
|
ce345c30a8 | ||
|
|
af8845405a | ||
|
|
9ee1852d4e | ||
|
|
6ecd7903f2 | ||
|
|
42867a4805 | ||
|
|
c5b575e00e | ||
|
|
1354138b7d | ||
|
|
b994a38b28 | ||
|
|
3a834c4992 | ||
|
|
b22644fc10 | ||
|
|
7663d47383 | ||
|
|
88fc8badfa | ||
|
|
8cad8c693c | ||
|
|
b066accad6 | ||
|
|
b324d51f14 | ||
|
|
65097212b3 | ||
|
|
4a24ca2f95 | ||
|
|
83e6fceb5c | ||
|
|
e4538cb13c | ||
|
|
5446c501af | ||
|
|
313a74b582 | ||
|
|
65d83e288f | ||
|
|
f645cf51c1 | ||
|
|
db8420225b | ||
|
|
843fdde61b | ||
|
|
08bc4b0c0f | ||
|
|
6825d986fd | ||
|
|
d155ec77f9 | ||
|
|
991738690f | ||
|
|
aeb3fd1c95 | ||
|
|
141a062e00 | ||
|
|
acad01afc9 | ||
|
|
f289e5001d | ||
|
|
06d9b998ce | ||
|
|
c50cc424bc | ||
|
|
8c77e936d1 | ||
|
|
18034a4291 | ||
|
|
7ee4734d3a | ||
|
|
ee582f03c3 | ||
|
|
f3286bac04 | ||
|
|
3ee3bea683 | ||
|
|
5098735698 | ||
|
|
e74b3bbf3c | ||
|
|
22525c002a | ||
|
|
80961039d7 | ||
|
|
1474789787 | ||
|
|
1dc138404a | ||
|
|
e1d050f64e | ||
|
|
7fbc561e17 | ||
|
|
d97be6f396 | ||
|
|
f1211cffca | ||
|
|
e0716afabf | ||
|
|
595cd81251 | ||
|
|
0325ce0bed | ||
|
|
a79a35c22c | ||
|
|
4484c7f073 | ||
|
|
8207015e48 | ||
|
|
364df7db0f | ||
|
|
a2bab03205 | ||
|
|
b71c1b50de | ||
|
|
fa2336fcfd | ||
|
|
7d96758382 | ||
|
|
b0dacc5a80 | ||
|
|
f64152bf97 | ||
|
|
b4dbe7a649 | ||
|
|
e5b6219a84 | ||
|
|
3a07b1edf8 | ||
|
|
9bf16a2ca6 | ||
|
|
332f6a89a9 | ||
|
|
204d0c9a53 | ||
|
|
e961016e71 | ||
|
|
f0b8c02f15 | ||
|
|
5e8b1842b9 | ||
|
|
9f072b50ba | ||
|
|
e1ddb5ae58 | ||
|
|
643e2a7b39 | ||
|
|
bde3107c3e | ||
|
|
5edfc1e2e9 | ||
|
|
c073417d0c | ||
|
|
9bbbeb3f03 | ||
|
|
4bde2e3412 | ||
|
|
58a9fe07b6 | ||
|
|
41407850d5 | ||
|
|
968a1db4c0 | ||
|
|
ad710e4888 | ||
|
|
4e3c699814 | ||
|
|
757de84398 | ||
|
|
d27f9dfdce | ||
|
|
14cc438a64 | ||
|
|
911a5d8a60 | ||
|
|
54b076b40f | ||
|
|
91a5ef762e | ||
|
|
8fd2af1c8b | ||
|
|
bb6adda8a3 | ||
|
|
a76ccff390 | ||
|
|
61c0b19331 | ||
|
|
fa9f69dd85 | ||
|
|
080fc35c4b | ||
|
|
ccce4cf7e1 | ||
|
|
713ab9e1a0 | ||
|
|
134cbfddbe | ||
|
|
6e2c5be83e | ||
|
|
185dbce21f | ||
|
|
49732359ef | ||
|
|
ec9f500a49 | ||
|
|
53244bef6f | ||
|
|
f0febfbcac | ||
|
|
1c58ff61d1 | ||
|
|
1530c03f7d | ||
|
|
309268de02 | ||
|
|
500428cc0f | ||
|
|
495816f694 | ||
|
|
df42dd2c53 | ||
|
|
f55243fda0 | ||
|
|
1e09c21456 | ||
|
|
0ed5d3c849 | ||
|
|
f67e7de7ef | ||
|
|
5044713388 | ||
|
|
c875f0425f | ||
|
|
4fd08b6c32 | ||
|
|
b9d86d44d6 | ||
|
|
a56055225a | ||
|
|
6eba0a56ec | ||
|
|
00c24a58b1 | ||
|
|
6fa248b75f | ||
|
|
946f9e9802 | ||
|
|
4c4e5af29c | ||
|
|
7e1b06417b | ||
|
|
cdd7794641 | ||
|
|
cd743a1ae9 | ||
|
|
a45005863b | ||
|
|
bdcb036592 | ||
|
|
7a3a9b682a | ||
|
|
0a711662c3 | ||
|
|
312e58ec99 | ||
|
|
ca8f4e7993 | ||
|
|
60795f22de | ||
|
|
05fdca893f | ||
|
|
d8cc93f3f2 | ||
|
|
62c4efac51 | ||
|
|
ba9e00d911 | ||
|
|
d3be67ad8e | ||
|
|
2eb0b6aae4 | ||
|
|
327f1494f1 | ||
|
|
fa92aa56ee | ||
|
|
427f6c2a1a | ||
|
|
270c7b4802 | ||
|
|
0fc1f640a9 | ||
|
|
762fd9028d | ||
|
|
f2009533e1 | ||
|
|
53b5cd73f2 | ||
|
|
52b05d934e | ||
|
|
840f15209c | ||
|
|
1e1c7fd8d5 | ||
|
|
f5f800c80d | ||
|
|
6b7be96373 | ||
|
|
75712b9c3c | ||
|
|
ed45aa2816 | ||
|
|
f286ae5bfa | ||
|
|
f13a7f8d91 | ||
|
|
c51a1c9aae | ||
|
|
30de728631 | ||
|
|
75fa15b36d | ||
|
|
eb30cb6293 | ||
|
|
cafbfce51f | ||
|
|
6039a71e6c |
17
.github/workflows/jvm_tests.yml
vendored
17
.github/workflows/jvm_tests.yml
vendored
@ -51,14 +51,14 @@ jobs:
|
|||||||
id: extract_branch
|
id: extract_branch
|
||||||
if: |
|
if: |
|
||||||
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
||||||
matrix.os == 'windows-latest'
|
(matrix.os == 'windows-latest' || matrix.os == 'macos-11')
|
||||||
|
|
||||||
- name: Publish artifact xgboost4j.dll to S3
|
- name: Publish artifact xgboost4j.dll to S3
|
||||||
run: |
|
run: |
|
||||||
cd lib/
|
cd lib/
|
||||||
Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
|
Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
|
||||||
dir
|
dir
|
||||||
python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read
|
python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
|
||||||
if: |
|
if: |
|
||||||
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
||||||
matrix.os == 'windows-latest'
|
matrix.os == 'windows-latest'
|
||||||
@ -66,6 +66,19 @@ jobs:
|
|||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
|
||||||
|
|
||||||
|
- name: Publish artifact libxgboost4j.dylib to S3
|
||||||
|
run: |
|
||||||
|
cd lib/
|
||||||
|
mv -v libxgboost4j.dylib libxgboost4j_${{ github.sha }}.dylib
|
||||||
|
ls
|
||||||
|
python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
|
||||||
|
if: |
|
||||||
|
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
|
||||||
|
matrix.os == 'macos-11'
|
||||||
|
env:
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
|
||||||
|
|
||||||
|
|
||||||
- name: Test XGBoost4J (Core, Spark, Examples)
|
- name: Test XGBoost4J (Core, Spark, Examples)
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
4
.github/workflows/r_tests.yml
vendored
4
.github/workflows/r_tests.yml
vendored
@ -25,7 +25,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: 'true'
|
submodules: 'true'
|
||||||
|
|
||||||
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1
|
- uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
|
||||||
with:
|
with:
|
||||||
r-version: ${{ matrix.config.r }}
|
r-version: ${{ matrix.config.r }}
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: 'true'
|
submodules: 'true'
|
||||||
|
|
||||||
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1
|
- uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
|
||||||
with:
|
with:
|
||||||
r-version: ${{ matrix.config.r }}
|
r-version: ${{ matrix.config.r }}
|
||||||
|
|
||||||
|
|||||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -5,3 +5,6 @@
|
|||||||
[submodule "gputreeshap"]
|
[submodule "gputreeshap"]
|
||||||
path = gputreeshap
|
path = gputreeshap
|
||||||
url = https://github.com/rapidsai/gputreeshap.git
|
url = https://github.com/rapidsai/gputreeshap.git
|
||||||
|
[submodule "rocgputreeshap"]
|
||||||
|
path = rocgputreeshap
|
||||||
|
url = https://github.com/ROCmSoftwarePlatform/rocgputreeshap
|
||||||
|
|||||||
@ -32,4 +32,3 @@ formats:
|
|||||||
python:
|
python:
|
||||||
install:
|
install:
|
||||||
- requirements: doc/requirements.txt
|
- requirements: doc/requirements.txt
|
||||||
system_packages: true
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
||||||
project(xgboost LANGUAGES CXX C VERSION 2.0.0)
|
project(xgboost LANGUAGES CXX C VERSION 2.0.2)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
cmake_policy(SET CMP0022 NEW)
|
||||||
@ -58,7 +58,7 @@ option(ENABLE_ALL_WARNINGS "Enable all compiler warnings. Only effective for GCC
|
|||||||
option(LOG_CAPI_INVOCATION "Log all C API invocations for debugging" OFF)
|
option(LOG_CAPI_INVOCATION "Log all C API invocations for debugging" OFF)
|
||||||
option(GOOGLE_TEST "Build google tests" OFF)
|
option(GOOGLE_TEST "Build google tests" OFF)
|
||||||
option(USE_DMLC_GTEST "Use google tests bundled with dmlc-core submodule" OFF)
|
option(USE_DMLC_GTEST "Use google tests bundled with dmlc-core submodule" OFF)
|
||||||
option(USE_DEVICE_DEBUG "Generate CUDA device debug info." OFF)
|
option(USE_DEVICE_DEBUG "Generate CUDA/HIP device debug info." OFF)
|
||||||
option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF)
|
option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF)
|
||||||
set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header")
|
set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header")
|
||||||
option(RABIT_MOCK "Build rabit with mock" OFF)
|
option(RABIT_MOCK "Build rabit with mock" OFF)
|
||||||
@ -71,6 +71,10 @@ option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
|
|||||||
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
|
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
|
||||||
set(GPU_COMPUTE_VER "" CACHE STRING
|
set(GPU_COMPUTE_VER "" CACHE STRING
|
||||||
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
|
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
|
||||||
|
## HIP
|
||||||
|
option(USE_HIP "Build with GPU acceleration" OFF)
|
||||||
|
option(USE_RCCL "Build with RCCL to enable distributed GPU support." OFF)
|
||||||
|
option(BUILD_WITH_SHARED_RCCL "Build with shared RCCL library." OFF)
|
||||||
## Copied From dmlc
|
## Copied From dmlc
|
||||||
option(USE_HDFS "Build with HDFS support" OFF)
|
option(USE_HDFS "Build with HDFS support" OFF)
|
||||||
option(USE_AZURE "Build with AZURE support" OFF)
|
option(USE_AZURE "Build with AZURE support" OFF)
|
||||||
@ -93,6 +97,7 @@ option(ADD_PKGCONFIG "Add xgboost.pc into system." ON)
|
|||||||
if (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
|
if (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
|
||||||
message(SEND_ERROR "Do not enable `USE_DEBUG_OUTPUT' with release build.")
|
message(SEND_ERROR "Do not enable `USE_DEBUG_OUTPUT' with release build.")
|
||||||
endif (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
|
endif (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
|
||||||
|
|
||||||
if (USE_NCCL AND NOT (USE_CUDA))
|
if (USE_NCCL AND NOT (USE_CUDA))
|
||||||
message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
|
message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
|
||||||
endif (USE_NCCL AND NOT (USE_CUDA))
|
endif (USE_NCCL AND NOT (USE_CUDA))
|
||||||
@ -102,6 +107,17 @@ endif (USE_DEVICE_DEBUG AND NOT (USE_CUDA))
|
|||||||
if (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
|
if (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
|
||||||
message(SEND_ERROR "Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.")
|
message(SEND_ERROR "Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.")
|
||||||
endif (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
|
endif (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
|
||||||
|
|
||||||
|
if (USE_RCCL AND NOT (USE_HIP))
|
||||||
|
message(SEND_ERROR "`USE_RCCL` must be enabled with `USE_HIP` flag.")
|
||||||
|
endif (USE_RCCL AND NOT (USE_HIP))
|
||||||
|
if (USE_DEVICE_DEBUG AND NOT (USE_HIP))
|
||||||
|
message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_HIP` flag.")
|
||||||
|
endif (USE_DEVICE_DEBUG AND NOT (USE_HIP))
|
||||||
|
if (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
|
||||||
|
message(SEND_ERROR "Build XGBoost with -DUSE_RCCL=ON to enable BUILD_WITH_SHARED_RCCL.")
|
||||||
|
endif (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
|
||||||
|
|
||||||
if (JVM_BINDINGS AND R_LIB)
|
if (JVM_BINDINGS AND R_LIB)
|
||||||
message(SEND_ERROR "`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.")
|
message(SEND_ERROR "`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.")
|
||||||
endif (JVM_BINDINGS AND R_LIB)
|
endif (JVM_BINDINGS AND R_LIB)
|
||||||
@ -115,9 +131,15 @@ endif (USE_AVX)
|
|||||||
if (PLUGIN_LZ4)
|
if (PLUGIN_LZ4)
|
||||||
message(SEND_ERROR "The option 'PLUGIN_LZ4' is removed from XGBoost.")
|
message(SEND_ERROR "The option 'PLUGIN_LZ4' is removed from XGBoost.")
|
||||||
endif (PLUGIN_LZ4)
|
endif (PLUGIN_LZ4)
|
||||||
|
|
||||||
if (PLUGIN_RMM AND NOT (USE_CUDA))
|
if (PLUGIN_RMM AND NOT (USE_CUDA))
|
||||||
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
|
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
|
||||||
endif (PLUGIN_RMM AND NOT (USE_CUDA))
|
endif (PLUGIN_RMM AND NOT (USE_CUDA))
|
||||||
|
|
||||||
|
if (PLUGIN_RMM AND NOT (USE_HIP))
|
||||||
|
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_HIP` flag.")
|
||||||
|
endif (PLUGIN_RMM AND NOT (USE_HIP))
|
||||||
|
|
||||||
if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
|
if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
|
||||||
message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
|
message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
|
||||||
endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
|
endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
|
||||||
@ -170,6 +192,24 @@ if (USE_CUDA)
|
|||||||
find_package(CUDAToolkit REQUIRED)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
endif (USE_CUDA)
|
endif (USE_CUDA)
|
||||||
|
|
||||||
|
if (USE_HIP)
|
||||||
|
set(USE_OPENMP ON CACHE BOOL "HIP requires OpenMP" FORCE)
|
||||||
|
# `export CXX=' is ignored by CMake HIP.
|
||||||
|
set(CMAKE_HIP_HOST_COMPILER ${CMAKE_CXX_COMPILER})
|
||||||
|
message(STATUS "Configured HIP host compiler: ${CMAKE_HIP_HOST_COMPILER}")
|
||||||
|
|
||||||
|
enable_language(HIP)
|
||||||
|
find_package(hip REQUIRED)
|
||||||
|
find_package(rocthrust REQUIRED)
|
||||||
|
find_package(hipcub REQUIRED)
|
||||||
|
|
||||||
|
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -I${HIP_INCLUDE_DIRS} -I${HIP_INCLUDE_DIRS}/hip")
|
||||||
|
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -Wunused-result -w")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_INCLUDE_DIRS}")
|
||||||
|
add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
|
||||||
|
endif (USE_HIP)
|
||||||
|
|
||||||
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
||||||
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
|
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
|
||||||
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
|
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
|
||||||
@ -209,6 +249,10 @@ if (USE_NCCL)
|
|||||||
find_package(Nccl REQUIRED)
|
find_package(Nccl REQUIRED)
|
||||||
endif (USE_NCCL)
|
endif (USE_NCCL)
|
||||||
|
|
||||||
|
if (USE_RCCL)
|
||||||
|
find_package(rccl REQUIRED)
|
||||||
|
endif (USE_RCCL)
|
||||||
|
|
||||||
# dmlc-core
|
# dmlc-core
|
||||||
msvc_use_static_runtime()
|
msvc_use_static_runtime()
|
||||||
if (FORCE_SHARED_CRT)
|
if (FORCE_SHARED_CRT)
|
||||||
@ -233,6 +277,11 @@ endif (RABIT_BUILD_MPI)
|
|||||||
add_subdirectory(${xgboost_SOURCE_DIR}/src)
|
add_subdirectory(${xgboost_SOURCE_DIR}/src)
|
||||||
target_link_libraries(objxgboost PUBLIC dmlc)
|
target_link_libraries(objxgboost PUBLIC dmlc)
|
||||||
|
|
||||||
|
# Link -lstdc++fs for GCC 8.x
|
||||||
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0")
|
||||||
|
target_link_libraries(objxgboost PUBLIC stdc++fs)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Exports some R specific definitions and objects
|
# Exports some R specific definitions and objects
|
||||||
if (R_LIB)
|
if (R_LIB)
|
||||||
add_subdirectory(${xgboost_SOURCE_DIR}/R-package)
|
add_subdirectory(${xgboost_SOURCE_DIR}/R-package)
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 2.0.0.1
|
Version: 2.0.2.1
|
||||||
Date: 2022-10-18
|
Date: 2023-10-12
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Tianqi", "Chen", role = c("aut"),
|
person("Tianqi", "Chen", role = c("aut"),
|
||||||
email = "tianqi.tchen@gmail.com"),
|
email = "tianqi.tchen@gmail.com"),
|
||||||
|
|||||||
@ -70,7 +70,7 @@ cb.print.evaluation <- function(period = 1, showsd = TRUE) {
|
|||||||
i == env$begin_iteration ||
|
i == env$begin_iteration ||
|
||||||
i == env$end_iteration) {
|
i == env$end_iteration) {
|
||||||
stdev <- if (showsd) env$bst_evaluation_err else NULL
|
stdev <- if (showsd) env$bst_evaluation_err else NULL
|
||||||
msg <- format.eval.string(i, env$bst_evaluation, stdev)
|
msg <- .format_eval_string(i, env$bst_evaluation, stdev)
|
||||||
cat(msg, '\n')
|
cat(msg, '\n')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -380,7 +380,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
|||||||
if ((maximize && score > best_score) ||
|
if ((maximize && score > best_score) ||
|
||||||
(!maximize && score < best_score)) {
|
(!maximize && score < best_score)) {
|
||||||
|
|
||||||
best_msg <<- format.eval.string(i, env$bst_evaluation, env$bst_evaluation_err)
|
best_msg <<- .format_eval_string(
|
||||||
|
i, env$bst_evaluation, env$bst_evaluation_err
|
||||||
|
)
|
||||||
best_score <<- score
|
best_score <<- score
|
||||||
best_iteration <<- i
|
best_iteration <<- i
|
||||||
best_ntreelimit <<- best_iteration * env$num_parallel_tree
|
best_ntreelimit <<- best_iteration * env$num_parallel_tree
|
||||||
@ -754,7 +756,7 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
|
|||||||
#
|
#
|
||||||
|
|
||||||
# Format the evaluation metric string
|
# Format the evaluation metric string
|
||||||
format.eval.string <- function(iter, eval_res, eval_err = NULL) {
|
.format_eval_string <- function(iter, eval_res, eval_err = NULL) {
|
||||||
if (length(eval_res) == 0)
|
if (length(eval_res) == 0)
|
||||||
stop('no evaluation results')
|
stop('no evaluation results')
|
||||||
enames <- names(eval_res)
|
enames <- names(eval_res)
|
||||||
|
|||||||
18
R-package/configure
vendored
18
R-package/configure
vendored
@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.71 for xgboost 2.0.0.
|
# Generated by GNU Autoconf 2.71 for xgboost 2.0.2.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
||||||
@ -607,8 +607,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='xgboost'
|
PACKAGE_NAME='xgboost'
|
||||||
PACKAGE_TARNAME='xgboost'
|
PACKAGE_TARNAME='xgboost'
|
||||||
PACKAGE_VERSION='2.0.0'
|
PACKAGE_VERSION='2.0.2'
|
||||||
PACKAGE_STRING='xgboost 2.0.0'
|
PACKAGE_STRING='xgboost 2.0.2'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures xgboost 2.0.0 to adapt to many kinds of systems.
|
\`configure' configures xgboost 2.0.2 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@ -1287,7 +1287,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of xgboost 2.0.0:";;
|
short | recursive ) echo "Configuration of xgboost 2.0.2:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@ -1367,7 +1367,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
xgboost configure 2.0.0
|
xgboost configure 2.0.2
|
||||||
generated by GNU Autoconf 2.71
|
generated by GNU Autoconf 2.71
|
||||||
|
|
||||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by xgboost $as_me 2.0.0, which was
|
It was created by xgboost $as_me 2.0.2, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
$ $0$ac_configure_args_raw
|
$ $0$ac_configure_args_raw
|
||||||
@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by xgboost $as_me 2.0.0, which was
|
This file was extended by xgboost $as_me 2.0.2, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config='$ac_cs_config_escaped'
|
ac_cs_config='$ac_cs_config_escaped'
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
xgboost config.status 2.0.0
|
xgboost config.status 2.0.2
|
||||||
configured by $0, generated by GNU Autoconf 2.71,
|
configured by $0, generated by GNU Autoconf 2.71,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
AC_PREREQ(2.69)
|
AC_PREREQ(2.69)
|
||||||
|
|
||||||
AC_INIT([xgboost],[2.0.0],[],[xgboost],[])
|
AC_INIT([xgboost],[2.0.2],[],[xgboost],[])
|
||||||
|
|
||||||
: ${R_HOME=`R RHOME`}
|
: ${R_HOME=`R RHOME`}
|
||||||
if test -z "${R_HOME}"; then
|
if test -z "${R_HOME}"; then
|
||||||
|
|||||||
@ -120,11 +120,25 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
|
|||||||
ctx.nthread = asInteger(n_threads);
|
ctx.nthread = asInteger(n_threads);
|
||||||
std::int32_t threads = ctx.Threads();
|
std::int32_t threads = ctx.Threads();
|
||||||
|
|
||||||
|
if (is_int) {
|
||||||
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
|
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
|
||||||
for (size_t j = 0; j < ncol; ++j) {
|
for (size_t j = 0; j < ncol; ++j) {
|
||||||
data[i * ncol + j] = is_int ? static_cast<float>(iin[i + nrow * j]) : din[i + nrow * j];
|
auto v = iin[i + nrow * j];
|
||||||
|
if (v == NA_INTEGER) {
|
||||||
|
data[i * ncol + j] = std::numeric_limits<float>::quiet_NaN();
|
||||||
|
} else {
|
||||||
|
data[i * ncol + j] = static_cast<float>(v);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
|
||||||
|
for (size_t j = 0; j < ncol; ++j) {
|
||||||
|
data[i * ncol + j] = din[i + nrow * j];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
DMatrixHandle handle;
|
DMatrixHandle handle;
|
||||||
CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol,
|
CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol,
|
||||||
asReal(missing), &handle, threads));
|
asReal(missing), &handle, threads));
|
||||||
|
|||||||
@ -32,7 +32,7 @@ namespace common {
|
|||||||
bool CheckNAN(double v) {
|
bool CheckNAN(double v) {
|
||||||
return ISNAN(v);
|
return ISNAN(v);
|
||||||
}
|
}
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
double LogGamma(double v) {
|
double LogGamma(double v) {
|
||||||
return lgammafn(v);
|
return lgammafn(v);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -56,6 +56,42 @@ test_that("xgb.DMatrix: basic construction", {
|
|||||||
expect_equal(raw_fd, raw_dgc)
|
expect_equal(raw_fd, raw_dgc)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test_that("xgb.DMatrix: NA", {
|
||||||
|
n_samples <- 3
|
||||||
|
x <- cbind(
|
||||||
|
x1 = sample(x = 4, size = n_samples, replace = TRUE),
|
||||||
|
x2 = sample(x = 4, size = n_samples, replace = TRUE)
|
||||||
|
)
|
||||||
|
x[1, "x1"] <- NA
|
||||||
|
|
||||||
|
m <- xgb.DMatrix(x)
|
||||||
|
xgb.DMatrix.save(m, "int.dmatrix")
|
||||||
|
|
||||||
|
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
|
||||||
|
colnames(x) <- c("x1", "x2")
|
||||||
|
m <- xgb.DMatrix(x)
|
||||||
|
|
||||||
|
xgb.DMatrix.save(m, "float.dmatrix")
|
||||||
|
|
||||||
|
iconn <- file("int.dmatrix", "rb")
|
||||||
|
fconn <- file("float.dmatrix", "rb")
|
||||||
|
|
||||||
|
expect_equal(file.size("int.dmatrix"), file.size("float.dmatrix"))
|
||||||
|
|
||||||
|
bytes <- file.size("int.dmatrix")
|
||||||
|
idmatrix <- readBin(iconn, "raw", n = bytes)
|
||||||
|
fdmatrix <- readBin(fconn, "raw", n = bytes)
|
||||||
|
|
||||||
|
expect_equal(length(idmatrix), length(fdmatrix))
|
||||||
|
expect_equal(idmatrix, fdmatrix)
|
||||||
|
|
||||||
|
close(iconn)
|
||||||
|
close(fconn)
|
||||||
|
|
||||||
|
file.remove("int.dmatrix")
|
||||||
|
file.remove("float.dmatrix")
|
||||||
|
})
|
||||||
|
|
||||||
test_that("xgb.DMatrix: saving, loading", {
|
test_that("xgb.DMatrix: saving, loading", {
|
||||||
# save to a local file
|
# save to a local file
|
||||||
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
||||||
|
|||||||
@ -181,6 +181,23 @@ function(xgboost_set_cuda_flags target)
|
|||||||
CUDA_RUNTIME_LIBRARY Static)
|
CUDA_RUNTIME_LIBRARY Static)
|
||||||
endfunction(xgboost_set_cuda_flags)
|
endfunction(xgboost_set_cuda_flags)
|
||||||
|
|
||||||
|
# Set HIP related flags to target.
|
||||||
|
function(xgboost_set_hip_flags target)
|
||||||
|
if (USE_DEVICE_DEBUG)
|
||||||
|
target_compile_options(${target} PRIVATE
|
||||||
|
$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:HIP>>:-G>)
|
||||||
|
endif (USE_DEVICE_DEBUG)
|
||||||
|
|
||||||
|
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_HIP=1)
|
||||||
|
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/rocgputreeshap)
|
||||||
|
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/warp-primitives/include)
|
||||||
|
|
||||||
|
set_target_properties(${target} PROPERTIES
|
||||||
|
HIP_STANDARD 17
|
||||||
|
HIP_STANDARD_REQUIRED ON
|
||||||
|
HIP_SEPARABLE_COMPILATION OFF)
|
||||||
|
endfunction(xgboost_set_hip_flags)
|
||||||
|
|
||||||
macro(xgboost_link_nccl target)
|
macro(xgboost_link_nccl target)
|
||||||
if (BUILD_STATIC_LIB)
|
if (BUILD_STATIC_LIB)
|
||||||
target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR})
|
target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR})
|
||||||
@ -193,6 +210,20 @@ macro(xgboost_link_nccl target)
|
|||||||
endif (BUILD_STATIC_LIB)
|
endif (BUILD_STATIC_LIB)
|
||||||
endmacro(xgboost_link_nccl)
|
endmacro(xgboost_link_nccl)
|
||||||
|
|
||||||
|
macro(xgboost_link_rccl target)
|
||||||
|
if(BUILD_STATIC_LIB)
|
||||||
|
target_include_directories(${target} PUBLIC ${RCCL_INCLUDE_DIR}/rccl)
|
||||||
|
target_compile_definitions(${target} PUBLIC -DXGBOOST_USE_RCCL=1)
|
||||||
|
target_link_directories(${target} PUBLIC ${HIP_LIB_INSTALL_DIR})
|
||||||
|
target_link_libraries(${target} PUBLIC ${RCCL_LIBRARY})
|
||||||
|
else()
|
||||||
|
target_include_directories(${target} PRIVATE ${RCCL_INCLUDE_DIR}/rccl)
|
||||||
|
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_RCCL=1)
|
||||||
|
target_link_directories(${target} PUBLIC ${HIP_LIB_INSTALL_DIR})
|
||||||
|
target_link_libraries(${target} PRIVATE ${RCCL_LIBRARY})
|
||||||
|
endif()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
# compile options
|
# compile options
|
||||||
macro(xgboost_target_properties target)
|
macro(xgboost_target_properties target)
|
||||||
set_target_properties(${target} PROPERTIES
|
set_target_properties(${target} PROPERTIES
|
||||||
@ -215,6 +246,10 @@ macro(xgboost_target_properties target)
|
|||||||
-Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
|
-Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
|
||||||
-Wall -Wextra -Wno-expansion-to-defined>
|
-Wall -Wextra -Wno-expansion-to-defined>
|
||||||
)
|
)
|
||||||
|
target_compile_options(${target} PUBLIC
|
||||||
|
$<IF:$<COMPILE_LANGUAGE:HIP>,
|
||||||
|
-Wall -Wextra >
|
||||||
|
)
|
||||||
endif(ENABLE_ALL_WARNINGS)
|
endif(ENABLE_ALL_WARNINGS)
|
||||||
|
|
||||||
target_compile_options(${target}
|
target_compile_options(${target}
|
||||||
@ -283,6 +318,10 @@ macro(xgboost_target_link_libraries target)
|
|||||||
target_link_libraries(${target} PUBLIC CUDA::cudart_static)
|
target_link_libraries(${target} PUBLIC CUDA::cudart_static)
|
||||||
endif (USE_CUDA)
|
endif (USE_CUDA)
|
||||||
|
|
||||||
|
if (USE_HIP)
|
||||||
|
xgboost_set_hip_flags(${target})
|
||||||
|
endif (USE_HIP)
|
||||||
|
|
||||||
if (PLUGIN_RMM)
|
if (PLUGIN_RMM)
|
||||||
target_link_libraries(${target} PRIVATE rmm::rmm)
|
target_link_libraries(${target} PRIVATE rmm::rmm)
|
||||||
endif (PLUGIN_RMM)
|
endif (PLUGIN_RMM)
|
||||||
@ -291,6 +330,10 @@ macro(xgboost_target_link_libraries target)
|
|||||||
xgboost_link_nccl(${target})
|
xgboost_link_nccl(${target})
|
||||||
endif (USE_NCCL)
|
endif (USE_NCCL)
|
||||||
|
|
||||||
|
if(USE_RCCL)
|
||||||
|
xgboost_link_rccl(${target})
|
||||||
|
endif()
|
||||||
|
|
||||||
if (USE_NVTX)
|
if (USE_NVTX)
|
||||||
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
||||||
endif (USE_NVTX)
|
endif (USE_NVTX)
|
||||||
|
|||||||
@ -3,6 +3,8 @@
|
|||||||
set(USE_OPENMP @USE_OPENMP@)
|
set(USE_OPENMP @USE_OPENMP@)
|
||||||
set(USE_CUDA @USE_CUDA@)
|
set(USE_CUDA @USE_CUDA@)
|
||||||
set(USE_NCCL @USE_NCCL@)
|
set(USE_NCCL @USE_NCCL@)
|
||||||
|
set(USE_HIP @USE_HIP@)
|
||||||
|
set(USE_RCCL @USE_RCCL@)
|
||||||
set(XGBOOST_BUILD_STATIC_LIB @BUILD_STATIC_LIB@)
|
set(XGBOOST_BUILD_STATIC_LIB @BUILD_STATIC_LIB@)
|
||||||
|
|
||||||
include(CMakeFindDependencyMacro)
|
include(CMakeFindDependencyMacro)
|
||||||
@ -15,6 +17,9 @@ if (XGBOOST_BUILD_STATIC_LIB)
|
|||||||
if(USE_CUDA)
|
if(USE_CUDA)
|
||||||
find_dependency(CUDA)
|
find_dependency(CUDA)
|
||||||
endif()
|
endif()
|
||||||
|
if(USE_HIP)
|
||||||
|
find_dependency(HIP)
|
||||||
|
endif()
|
||||||
# nccl should be linked statically if xgboost is built as static library.
|
# nccl should be linked statically if xgboost is built as static library.
|
||||||
endif (XGBOOST_BUILD_STATIC_LIB)
|
endif (XGBOOST_BUILD_STATIC_LIB)
|
||||||
|
|
||||||
|
|||||||
@ -4,13 +4,13 @@ python mapfeat.py
|
|||||||
# split train and test
|
# split train and test
|
||||||
python mknfold.py machine.txt 1
|
python mknfold.py machine.txt 1
|
||||||
# training and output the models
|
# training and output the models
|
||||||
../../xgboost machine.conf
|
../../../xgboost machine.conf
|
||||||
# output predictions of test data
|
# output predictions of test data
|
||||||
../../xgboost machine.conf task=pred model_in=0002.model
|
../../../xgboost machine.conf task=pred model_in=0002.model
|
||||||
# print the boosters of 0002.model in dump.raw.txt
|
# print the boosters of 0002.model in dump.raw.txt
|
||||||
../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
|
../../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
|
||||||
# print the boosters of 0002.model in dump.nice.txt with feature map
|
# print the boosters of 0002.model in dump.nice.txt with feature map
|
||||||
../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
|
../../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||||
|
|
||||||
# cat the result
|
# cat the result
|
||||||
cat dump.nice.txt
|
cat dump.nice.txt
|
||||||
|
|||||||
@ -21,12 +21,14 @@ def normpath(path):
|
|||||||
else:
|
else:
|
||||||
return normalized
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
def cp(source, target):
|
def cp(source, target):
|
||||||
source = normpath(source)
|
source = normpath(source)
|
||||||
target = normpath(target)
|
target = normpath(target)
|
||||||
print("cp {0} {1}".format(source, target))
|
print("cp {0} {1}".format(source, target))
|
||||||
shutil.copy(source, target)
|
shutil.copy(source, target)
|
||||||
|
|
||||||
|
|
||||||
def maybe_makedirs(path):
|
def maybe_makedirs(path):
|
||||||
path = normpath(path)
|
path = normpath(path)
|
||||||
print("mkdir -p " + path)
|
print("mkdir -p " + path)
|
||||||
@ -36,6 +38,7 @@ def maybe_makedirs(path):
|
|||||||
if e.errno != errno.EEXIST:
|
if e.errno != errno.EEXIST:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def cd(path):
|
def cd(path):
|
||||||
path = normpath(path)
|
path = normpath(path)
|
||||||
@ -47,18 +50,22 @@ def cd(path):
|
|||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
|
||||||
|
|
||||||
def run(command, **kwargs):
|
def run(command, **kwargs):
|
||||||
print(command)
|
print(command)
|
||||||
subprocess.check_call(command, shell=True, **kwargs)
|
subprocess.check_call(command, shell=True, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def get_current_git_tag():
|
def get_current_git_tag():
|
||||||
out = subprocess.check_output(["git", "tag", "--points-at", "HEAD"])
|
out = subprocess.check_output(["git", "tag", "--points-at", "HEAD"])
|
||||||
return out.decode().split("\n")[0]
|
return out.decode().split("\n")[0]
|
||||||
|
|
||||||
|
|
||||||
def get_current_commit_hash():
|
def get_current_commit_hash():
|
||||||
out = subprocess.check_output(["git", "rev-parse", "HEAD"])
|
out = subprocess.check_output(["git", "rev-parse", "HEAD"])
|
||||||
return out.decode().split("\n")[0]
|
return out.decode().split("\n")[0]
|
||||||
|
|
||||||
|
|
||||||
def get_current_git_branch():
|
def get_current_git_branch():
|
||||||
out = subprocess.check_output(["git", "log", "-n", "1", "--pretty=%d", "HEAD"])
|
out = subprocess.check_output(["git", "log", "-n", "1", "--pretty=%d", "HEAD"])
|
||||||
m = re.search(r"release_[0-9\.]+", out.decode())
|
m = re.search(r"release_[0-9\.]+", out.decode())
|
||||||
@ -66,38 +73,53 @@ def get_current_git_branch():
|
|||||||
raise ValueError("Expected branch name of form release_xxx")
|
raise ValueError("Expected branch name of form release_xxx")
|
||||||
return m.group(0)
|
return m.group(0)
|
||||||
|
|
||||||
|
|
||||||
def retrieve(url, filename=None):
|
def retrieve(url, filename=None):
|
||||||
print(f"{url} -> {filename}")
|
print(f"{url} -> {filename}")
|
||||||
return urlretrieve(url, filename)
|
return urlretrieve(url, filename)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--release-version", type=str, required=True,
|
parser.add_argument(
|
||||||
help="Version of the release being prepared")
|
"--release-version",
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="Version of the release being prepared",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if sys.platform != "darwin" or platform.machine() != "x86_64":
|
if sys.platform != "darwin" or platform.machine() != "arm64":
|
||||||
raise NotImplementedError("Please run this script using an Intel Mac")
|
raise NotImplementedError("Please run this script using an M1 Mac")
|
||||||
|
|
||||||
version = args.release_version
|
version = args.release_version
|
||||||
expected_git_tag = "v" + version
|
expected_git_tag = "v" + version
|
||||||
current_git_tag = get_current_git_tag()
|
current_git_tag = get_current_git_tag()
|
||||||
if current_git_tag != expected_git_tag:
|
if current_git_tag != expected_git_tag:
|
||||||
if not current_git_tag:
|
if not current_git_tag:
|
||||||
raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD has no tag. "
|
raise ValueError(
|
||||||
f"Run: git checkout {expected_git_tag}")
|
f"Expected git tag {expected_git_tag} but current HEAD has no tag. "
|
||||||
raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD is at tag "
|
f"Run: git checkout {expected_git_tag}"
|
||||||
f"{current_git_tag}. Run: git checkout {expected_git_tag}")
|
)
|
||||||
|
raise ValueError(
|
||||||
|
f"Expected git tag {expected_git_tag} but current HEAD is at tag "
|
||||||
|
f"{current_git_tag}. Run: git checkout {expected_git_tag}"
|
||||||
|
)
|
||||||
|
|
||||||
commit_hash = get_current_commit_hash()
|
commit_hash = get_current_commit_hash()
|
||||||
git_branch = get_current_git_branch()
|
git_branch = get_current_git_branch()
|
||||||
print(f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}")
|
print(
|
||||||
|
f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}"
|
||||||
|
)
|
||||||
|
|
||||||
with cd("jvm-packages/"):
|
with cd("jvm-packages/"):
|
||||||
print("====copying pure-Python tracker====")
|
print("====copying pure-Python tracker====")
|
||||||
for use_cuda in [True, False]:
|
for use_cuda in [True, False]:
|
||||||
xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
|
xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
|
||||||
cp("../python-package/xgboost/tracker.py", f"{xgboost4j}/src/main/resources")
|
cp(
|
||||||
|
"../python-package/xgboost/tracker.py",
|
||||||
|
f"{xgboost4j}/src/main/resources",
|
||||||
|
)
|
||||||
|
|
||||||
print("====copying resources for testing====")
|
print("====copying resources for testing====")
|
||||||
with cd("../demo/CLI/regression"):
|
with cd("../demo/CLI/regression"):
|
||||||
@ -115,7 +137,11 @@ def main():
|
|||||||
cp(file, f"{xgboost4j_spark}/src/test/resources")
|
cp(file, f"{xgboost4j_spark}/src/test/resources")
|
||||||
|
|
||||||
print("====Creating directories to hold native binaries====")
|
print("====Creating directories to hold native binaries====")
|
||||||
for os_ident, arch in [("linux", "x86_64"), ("windows", "x86_64"), ("macos", "x86_64")]:
|
for os_ident, arch in [
|
||||||
|
("linux", "x86_64"),
|
||||||
|
("windows", "x86_64"),
|
||||||
|
("macos", "x86_64"),
|
||||||
|
]:
|
||||||
output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
|
output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
|
||||||
maybe_makedirs(output_dir)
|
maybe_makedirs(output_dir)
|
||||||
for os_ident, arch in [("linux", "x86_64")]:
|
for os_ident, arch in [("linux", "x86_64")]:
|
||||||
@ -123,52 +149,86 @@ def main():
|
|||||||
maybe_makedirs(output_dir)
|
maybe_makedirs(output_dir)
|
||||||
|
|
||||||
print("====Downloading native binaries from CI====")
|
print("====Downloading native binaries from CI====")
|
||||||
nightly_bucket_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
|
nightly_bucket_prefix = (
|
||||||
maven_repo_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc"
|
"https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
|
||||||
|
)
|
||||||
|
maven_repo_prefix = (
|
||||||
|
"https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc"
|
||||||
|
)
|
||||||
|
|
||||||
retrieve(url=f"{nightly_bucket_prefix}/{git_branch}/xgboost4j_{commit_hash}.dll",
|
retrieve(
|
||||||
filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll")
|
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/xgboost4j_{commit_hash}.dll",
|
||||||
|
filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll",
|
||||||
|
)
|
||||||
|
retrieve(
|
||||||
|
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib",
|
||||||
|
filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib",
|
||||||
|
)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
# libxgboost4j.so for Linux x86_64, CPU only
|
# libxgboost4j.so for Linux x86_64, CPU only
|
||||||
zip_path = os.path.join(tempdir, "xgboost4j_2.12.jar")
|
zip_path = os.path.join(tempdir, "xgboost4j_2.12.jar")
|
||||||
extract_dir = os.path.join(tempdir, "xgboost4j")
|
extract_dir = os.path.join(tempdir, "xgboost4j")
|
||||||
retrieve(url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/"
|
retrieve(
|
||||||
|
url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/"
|
||||||
f"xgboost4j_2.12-{version}.jar",
|
f"xgboost4j_2.12-{version}.jar",
|
||||||
filename=zip_path)
|
filename=zip_path,
|
||||||
|
)
|
||||||
os.mkdir(extract_dir)
|
os.mkdir(extract_dir)
|
||||||
with zipfile.ZipFile(zip_path, "r") as t:
|
with zipfile.ZipFile(zip_path, "r") as t:
|
||||||
t.extractall(extract_dir)
|
t.extractall(extract_dir)
|
||||||
cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
|
cp(
|
||||||
"xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so")
|
os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
|
||||||
|
"xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
|
||||||
|
)
|
||||||
|
|
||||||
# libxgboost4j.so for Linux x86_64, GPU support
|
# libxgboost4j.so for Linux x86_64, GPU support
|
||||||
zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar")
|
zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar")
|
||||||
extract_dir = os.path.join(tempdir, "xgboost4j-gpu")
|
extract_dir = os.path.join(tempdir, "xgboost4j-gpu")
|
||||||
retrieve(url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/"
|
retrieve(
|
||||||
|
url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/"
|
||||||
f"xgboost4j-gpu_2.12-{version}.jar",
|
f"xgboost4j-gpu_2.12-{version}.jar",
|
||||||
filename=zip_path)
|
filename=zip_path,
|
||||||
|
)
|
||||||
os.mkdir(extract_dir)
|
os.mkdir(extract_dir)
|
||||||
with zipfile.ZipFile(zip_path, "r") as t:
|
with zipfile.ZipFile(zip_path, "r") as t:
|
||||||
t.extractall(extract_dir)
|
t.extractall(extract_dir)
|
||||||
cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
|
cp(
|
||||||
"xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so")
|
os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
|
||||||
|
"xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
|
||||||
|
)
|
||||||
|
|
||||||
print("====Next Steps====")
|
print("====Next Steps====")
|
||||||
print("1. Gain upload right to Maven Central repo.")
|
print("1. Gain upload right to Maven Central repo.")
|
||||||
print("1-1. Sign up for a JIRA account at Sonatype: ")
|
print("1-1. Sign up for a JIRA account at Sonatype: ")
|
||||||
print("1-2. File a JIRA ticket: "
|
print(
|
||||||
|
"1-2. File a JIRA ticket: "
|
||||||
"https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: "
|
"https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: "
|
||||||
"https://issues.sonatype.org/browse/OSSRH-67724")
|
"https://issues.sonatype.org/browse/OSSRH-67724"
|
||||||
print("2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
|
)
|
||||||
"https://central.sonatype.org/publish/publish-maven/")
|
print(
|
||||||
print("3. Now on a Mac machine, run:")
|
"2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
|
||||||
|
"https://central.sonatype.org/publish/publish-maven/"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"3. Now on a M1 Mac machine, run the following to build Scala 2.12 artifacts:"
|
||||||
|
)
|
||||||
print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests")
|
print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests")
|
||||||
print("4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
|
print(
|
||||||
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-1085 "
|
"4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
|
||||||
|
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx "
|
||||||
"to inspect the staged JAR files. Finally, press Release button to publish the "
|
"to inspect the staged JAR files. Finally, press Release button to publish the "
|
||||||
"artifacts to the Maven Central repository.")
|
"artifacts to the Maven Central repository. The top-level metapackage should be "
|
||||||
|
"named xgboost-jvm_2.12."
|
||||||
|
)
|
||||||
|
print("5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:")
|
||||||
|
print(" rm -rf targets/")
|
||||||
|
print(" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests")
|
||||||
|
print(
|
||||||
|
"6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts."
|
||||||
|
"The top-level metapackage should be named xgboost-jvm_2.13."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@ -329,7 +329,7 @@ Parameters for Linear Booster (``booster=gblinear``)
|
|||||||
- Choice of algorithm to fit linear model
|
- Choice of algorithm to fit linear model
|
||||||
|
|
||||||
- ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.
|
- ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.
|
||||||
- ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution.
|
- ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the ``device`` parameter is set to ``cuda`` or ``gpu``, a GPU variant would be used.
|
||||||
|
|
||||||
* ``feature_selector`` [default= ``cyclic``]
|
* ``feature_selector`` [default= ``cyclic``]
|
||||||
|
|
||||||
|
|||||||
@ -58,19 +58,19 @@
|
|||||||
/*!
|
/*!
|
||||||
* \brief Tag function as usable by device
|
* \brief Tag function as usable by device
|
||||||
*/
|
*/
|
||||||
#if defined (__CUDA__) || defined(__NVCC__)
|
#if defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
|
||||||
#define XGBOOST_DEVICE __host__ __device__
|
#define XGBOOST_DEVICE __host__ __device__
|
||||||
#else
|
#else
|
||||||
#define XGBOOST_DEVICE
|
#define XGBOOST_DEVICE
|
||||||
#endif // defined (__CUDA__) || defined(__NVCC__)
|
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
#if defined(__CUDA__) || defined(__CUDACC__)
|
#if defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
|
#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
|
||||||
#define XGBOOST_DEV_INLINE __device__ __forceinline__
|
#define XGBOOST_DEV_INLINE __device__ __forceinline__
|
||||||
#else
|
#else
|
||||||
#define XGBOOST_HOST_DEV_INLINE
|
#define XGBOOST_HOST_DEV_INLINE
|
||||||
#define XGBOOST_DEV_INLINE
|
#define XGBOOST_DEV_INLINE
|
||||||
#endif // defined(__CUDA__) || defined(__CUDACC__)
|
#endif // defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
// These check are for Makefile.
|
// These check are for Makefile.
|
||||||
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
||||||
@ -274,8 +274,8 @@ class GradientPairInt64 {
|
|||||||
GradientPairInt64(GradientPairInt64 const &g) = default;
|
GradientPairInt64(GradientPairInt64 const &g) = default;
|
||||||
GradientPairInt64 &operator=(GradientPairInt64 const &g) = default;
|
GradientPairInt64 &operator=(GradientPairInt64 const &g) = default;
|
||||||
|
|
||||||
XGBOOST_DEVICE [[nodiscard]] T GetQuantisedGrad() const { return grad_; }
|
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedGrad() const { return grad_; }
|
||||||
XGBOOST_DEVICE [[nodiscard]] T GetQuantisedHess() const { return hess_; }
|
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedHess() const { return hess_; }
|
||||||
|
|
||||||
XGBOOST_DEVICE GradientPairInt64 &operator+=(const GradientPairInt64 &rhs) {
|
XGBOOST_DEVICE GradientPairInt64 &operator+=(const GradientPairInt64 &rhs) {
|
||||||
grad_ += rhs.grad_;
|
grad_ += rhs.grad_;
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*!
|
/**
|
||||||
* Copyright 2020 by Contributors
|
* Copyright 2020-2023, XGBoost Contributors
|
||||||
* \file global_config.h
|
* \file global_config.h
|
||||||
* \brief Global configuration for XGBoost
|
* \brief Global configuration for XGBoost
|
||||||
* \author Hyunsu Cho
|
* \author Hyunsu Cho
|
||||||
@ -7,24 +7,22 @@
|
|||||||
#ifndef XGBOOST_GLOBAL_CONFIG_H_
|
#ifndef XGBOOST_GLOBAL_CONFIG_H_
|
||||||
#define XGBOOST_GLOBAL_CONFIG_H_
|
#define XGBOOST_GLOBAL_CONFIG_H_
|
||||||
|
|
||||||
#include <xgboost/parameter.h>
|
#include <dmlc/thread_local.h> // for ThreadLocalStore
|
||||||
#include <vector>
|
#include <xgboost/parameter.h> // for XGBoostParameter
|
||||||
#include <string>
|
|
||||||
|
#include <cstdint> // for int32_t
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
class Json;
|
|
||||||
|
|
||||||
struct GlobalConfiguration : public XGBoostParameter<GlobalConfiguration> {
|
struct GlobalConfiguration : public XGBoostParameter<GlobalConfiguration> {
|
||||||
int verbosity { 1 };
|
std::int32_t verbosity{1};
|
||||||
bool use_rmm { false };
|
bool use_rmm{false};
|
||||||
DMLC_DECLARE_PARAMETER(GlobalConfiguration) {
|
DMLC_DECLARE_PARAMETER(GlobalConfiguration) {
|
||||||
DMLC_DECLARE_FIELD(verbosity)
|
DMLC_DECLARE_FIELD(verbosity)
|
||||||
.set_range(0, 3)
|
.set_range(0, 3)
|
||||||
.set_default(1) // shows only warning
|
.set_default(1) // shows only warning
|
||||||
.describe("Flag to print out detailed breakdown of runtime.");
|
.describe("Flag to print out detailed breakdown of runtime.");
|
||||||
DMLC_DECLARE_FIELD(use_rmm)
|
DMLC_DECLARE_FIELD(use_rmm).set_default(false).describe(
|
||||||
.set_default(false)
|
"Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
|
||||||
.describe("Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -58,11 +58,11 @@
|
|||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
// Sets a function to call instead of cudaSetDevice();
|
// Sets a function to call instead of cudaSetDevice();
|
||||||
// only added for testing
|
// only added for testing
|
||||||
void SetCudaSetDeviceHandler(void (*handler)(int));
|
void SetCudaSetDeviceHandler(void (*handler)(int));
|
||||||
#endif // __CUDACC__
|
#endif // __CUDACC__ || __HIPCC__
|
||||||
|
|
||||||
template <typename T> struct HostDeviceVectorImpl;
|
template <typename T> struct HostDeviceVectorImpl;
|
||||||
|
|
||||||
|
|||||||
@ -30,11 +30,11 @@
|
|||||||
|
|
||||||
// decouple it from xgboost.
|
// decouple it from xgboost.
|
||||||
#ifndef LINALG_HD
|
#ifndef LINALG_HD
|
||||||
#if defined(__CUDA__) || defined(__NVCC__)
|
#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
|
||||||
#define LINALG_HD __host__ __device__
|
#define LINALG_HD __host__ __device__
|
||||||
#else
|
#else
|
||||||
#define LINALG_HD
|
#define LINALG_HD
|
||||||
#endif // defined (__CUDA__) || defined(__NVCC__)
|
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
|
||||||
#endif // LINALG_HD
|
#endif // LINALG_HD
|
||||||
|
|
||||||
namespace xgboost::linalg {
|
namespace xgboost::linalg {
|
||||||
@ -118,7 +118,7 @@ using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
|
|||||||
|
|
||||||
template <int32_t n, typename Fn>
|
template <int32_t n, typename Fn>
|
||||||
LINALG_HD constexpr auto UnrollLoop(Fn fn) {
|
LINALG_HD constexpr auto UnrollLoop(Fn fn) {
|
||||||
#if defined __CUDA_ARCH__
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
#pragma unroll n
|
#pragma unroll n
|
||||||
#endif // defined __CUDA_ARCH__
|
#endif // defined __CUDA_ARCH__
|
||||||
for (int32_t i = 0; i < n; ++i) {
|
for (int32_t i = 0; i < n; ++i) {
|
||||||
@ -136,7 +136,7 @@ int32_t NativePopc(T v) {
|
|||||||
inline LINALG_HD int Popc(uint32_t v) {
|
inline LINALG_HD int Popc(uint32_t v) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__)
|
||||||
return __popc(v);
|
return __popc(v);
|
||||||
#elif defined(__GNUC__) || defined(__clang__)
|
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__)
|
||||||
return __builtin_popcount(v);
|
return __builtin_popcount(v);
|
||||||
#elif defined(_MSC_VER)
|
#elif defined(_MSC_VER)
|
||||||
return __popcnt(v);
|
return __popcnt(v);
|
||||||
@ -148,7 +148,7 @@ inline LINALG_HD int Popc(uint32_t v) {
|
|||||||
inline LINALG_HD int Popc(uint64_t v) {
|
inline LINALG_HD int Popc(uint64_t v) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__)
|
||||||
return __popcll(v);
|
return __popcll(v);
|
||||||
#elif defined(__GNUC__) || defined(__clang__)
|
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__)
|
||||||
return __builtin_popcountll(v);
|
return __builtin_popcountll(v);
|
||||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||||
return __popcnt64(v);
|
return __popcnt64(v);
|
||||||
|
|||||||
@ -41,7 +41,9 @@
|
|||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#endif // defined(__CUDACC__)
|
#elif defined(__HIPCC__)
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* The version number 1910 is picked up from GSL.
|
* The version number 1910 is picked up from GSL.
|
||||||
@ -104,7 +106,42 @@ namespace common {
|
|||||||
|
|
||||||
#define SPAN_CHECK KERNEL_CHECK
|
#define SPAN_CHECK KERNEL_CHECK
|
||||||
|
|
||||||
#else // ------------------------------ not CUDA ----------------------------
|
#elif defined(__HIPCC__)
|
||||||
|
// Usual logging facility is not available inside device code.
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
|
||||||
|
// Windows HIP doesn't have __assert_fail.
|
||||||
|
#define HIP_KERNEL_CHECK(cond) \
|
||||||
|
do { \
|
||||||
|
if (XGBOOST_EXPECT(!(cond), false)) { \
|
||||||
|
__builtin_trap(); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#else // defined(_MSC_VER)
|
||||||
|
|
||||||
|
#define __ASSERT_STR_HELPER(x) #x
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
#define HIP_KERNEL_CHECK(cond) \
|
||||||
|
(XGBOOST_EXPECT((cond), true) \
|
||||||
|
? static_cast<void>(0) \
|
||||||
|
: __assert_fail(__ASSERT_STR_HELPER((cond)), __FILE__, __LINE__, __PRETTY_FUNCTION__))
|
||||||
|
#else
|
||||||
|
#define HIP_KERNEL_CHECK(cond) \
|
||||||
|
(XGBOOST_EXPECT((cond), true) \
|
||||||
|
? static_cast<void>(0) \
|
||||||
|
: __builtin_trap())
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // defined(_MSC_VER)
|
||||||
|
|
||||||
|
#define KERNEL_CHECK HIP_KERNEL_CHECK
|
||||||
|
|
||||||
|
#define SPAN_CHECK KERNEL_CHECK
|
||||||
|
|
||||||
|
#else // ------------------------------ not CUDA or HIP ----------------------------
|
||||||
|
|
||||||
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
|
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
|
||||||
|
|
||||||
@ -120,7 +157,7 @@ namespace common {
|
|||||||
|
|
||||||
#endif // defined(XGBOOST_STRICT_R_MODE)
|
#endif // defined(XGBOOST_STRICT_R_MODE)
|
||||||
|
|
||||||
#endif // __CUDA_ARCH__
|
#endif // __CUDA_ARCH__ || __HIPCC__
|
||||||
|
|
||||||
#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))
|
#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))
|
||||||
|
|
||||||
@ -317,7 +354,7 @@ struct IsSpanOracle<Span<T, Extent>> : std::true_type {};
|
|||||||
template <class T>
|
template <class T>
|
||||||
struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {};
|
struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {};
|
||||||
|
|
||||||
// Re-implement std algorithms here to adopt CUDA.
|
// Re-implement std algorithms here to adopt CUDA/HIP
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct Less {
|
struct Less {
|
||||||
XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {
|
XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {
|
||||||
|
|||||||
@ -6,6 +6,6 @@
|
|||||||
|
|
||||||
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
|
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
|
||||||
#define XGBOOST_VER_MINOR 0 /* NOLINT */
|
#define XGBOOST_VER_MINOR 0 /* NOLINT */
|
||||||
#define XGBOOST_VER_PATCH 0 /* NOLINT */
|
#define XGBOOST_VER_PATCH 2 /* NOLINT */
|
||||||
|
|
||||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||||
|
|||||||
@ -9,6 +9,11 @@ if (USE_CUDA)
|
|||||||
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu)
|
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu)
|
||||||
endif (USE_CUDA)
|
endif (USE_CUDA)
|
||||||
|
|
||||||
|
if (USE_HIP)
|
||||||
|
list(APPEND JVM_SOURCES
|
||||||
|
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.hip)
|
||||||
|
endif (USE_HIP)
|
||||||
|
|
||||||
add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})
|
add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})
|
||||||
|
|
||||||
if (ENABLE_ALL_WARNINGS)
|
if (ENABLE_ALL_WARNINGS)
|
||||||
@ -25,4 +30,3 @@ target_include_directories(xgboost4j
|
|||||||
${PROJECT_SOURCE_DIR}/rabit/include)
|
${PROJECT_SOURCE_DIR}/rabit/include)
|
||||||
|
|
||||||
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
|
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
|
||||||
target_link_libraries(xgboost4j PRIVATE ${JAVA_JVM_LIBRARY})
|
|
||||||
|
|||||||
@ -22,6 +22,8 @@ CONFIG = {
|
|||||||
|
|
||||||
"USE_CUDA": "OFF",
|
"USE_CUDA": "OFF",
|
||||||
"USE_NCCL": "OFF",
|
"USE_NCCL": "OFF",
|
||||||
|
"USE_HIP": "OFF",
|
||||||
|
"USE_RCCL": "OFF",
|
||||||
"JVM_BINDINGS": "ON",
|
"JVM_BINDINGS": "ON",
|
||||||
"LOG_CAPI_INVOCATION": "OFF"
|
"LOG_CAPI_INVOCATION": "OFF"
|
||||||
}
|
}
|
||||||
@ -74,6 +76,7 @@ if __name__ == "__main__":
|
|||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF')
|
parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF')
|
||||||
parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
|
parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
|
||||||
|
parser.add_argument('--use-hip', type=str, choices=['ON', 'OFF'], default='OFF')
|
||||||
cli_args = parser.parse_args()
|
cli_args = parser.parse_args()
|
||||||
|
|
||||||
if sys.platform == "darwin":
|
if sys.platform == "darwin":
|
||||||
@ -84,7 +87,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
print("building Java wrapper")
|
print("building Java wrapper")
|
||||||
with cd(".."):
|
with cd(".."):
|
||||||
build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' else 'build'
|
build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'build'
|
||||||
maybe_makedirs(build_dir)
|
maybe_makedirs(build_dir)
|
||||||
with cd(build_dir):
|
with cd(build_dir):
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
@ -103,6 +106,9 @@ if __name__ == "__main__":
|
|||||||
if cli_args.use_cuda == 'ON':
|
if cli_args.use_cuda == 'ON':
|
||||||
CONFIG['USE_CUDA'] = 'ON'
|
CONFIG['USE_CUDA'] = 'ON'
|
||||||
CONFIG['USE_NCCL'] = 'ON'
|
CONFIG['USE_NCCL'] = 'ON'
|
||||||
|
elif cli_args.use_hip== 'ON':
|
||||||
|
CONFIG['USE_HIP'] = 'ON'
|
||||||
|
CONFIG['USE_RCCL'] = 'ON'
|
||||||
|
|
||||||
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
|
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
|
||||||
|
|
||||||
@ -125,8 +131,8 @@ if __name__ == "__main__":
|
|||||||
run(f'"{sys.executable}" mapfeat.py')
|
run(f'"{sys.executable}" mapfeat.py')
|
||||||
run(f'"{sys.executable}" mknfold.py machine.txt 1')
|
run(f'"{sys.executable}" mknfold.py machine.txt 1')
|
||||||
|
|
||||||
xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j'
|
xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip== 'ON' else 'xgboost4j'
|
||||||
xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j-spark'
|
xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'xgboost4j-spark'
|
||||||
|
|
||||||
print("copying native library")
|
print("copying native library")
|
||||||
library_name, os_folder = {
|
library_name, os_folder = {
|
||||||
|
|||||||
@ -5,8 +5,8 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
@ -43,6 +43,7 @@
|
|||||||
<maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count>
|
<maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count>
|
||||||
<log.capi.invocation>OFF</log.capi.invocation>
|
<log.capi.invocation>OFF</log.capi.invocation>
|
||||||
<use.cuda>OFF</use.cuda>
|
<use.cuda>OFF</use.cuda>
|
||||||
|
<use.hip>OFF</use.hip>
|
||||||
<cudf.version>23.08.0</cudf.version>
|
<cudf.version>23.08.0</cudf.version>
|
||||||
<spark.rapids.version>23.08.0</spark.rapids.version>
|
<spark.rapids.version>23.08.0</spark.rapids.version>
|
||||||
<cudf.classifier>cuda11</cudf.classifier>
|
<cudf.classifier>cuda11</cudf.classifier>
|
||||||
@ -189,6 +190,93 @@
|
|||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
</profile>
|
</profile>
|
||||||
|
<profile>
|
||||||
|
<id>release-cpu-only</id>
|
||||||
|
<modules>
|
||||||
|
<module>xgboost4j</module>
|
||||||
|
<module>xgboost4j-example</module>
|
||||||
|
<module>xgboost4j-spark</module>
|
||||||
|
<module>xgboost4j-flink</module>
|
||||||
|
</modules>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
|
<version>3.3.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>empty-javadoc-jar</id>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>jar</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<classifier>javadoc</classifier>
|
||||||
|
<classesDirectory>${basedir}/javadoc</classesDirectory>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-release-plugin</artifactId>
|
||||||
|
<version>3.0.1</version>
|
||||||
|
<configuration>
|
||||||
|
<autoVersionSubmodules>true</autoVersionSubmodules>
|
||||||
|
<useReleaseProfile>false</useReleaseProfile>
|
||||||
|
<releaseProfiles>release</releaseProfiles>
|
||||||
|
<goals>deploy</goals>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-gpg-plugin</artifactId>
|
||||||
|
<version>3.1.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>sign-artifacts</id>
|
||||||
|
<phase>verify</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>sign</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-source-plugin</artifactId>
|
||||||
|
<version>3.3.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>attach-sources</id>
|
||||||
|
<goals>
|
||||||
|
<goal>jar-no-fork</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.sonatype.plugins</groupId>
|
||||||
|
<artifactId>nexus-staging-maven-plugin</artifactId>
|
||||||
|
<version>1.6.13</version>
|
||||||
|
<extensions>true</extensions>
|
||||||
|
<configuration>
|
||||||
|
<serverId>ossrh</serverId>
|
||||||
|
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
|
||||||
|
<autoReleaseAfterClose>false</autoReleaseAfterClose>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<skipTests>true</skipTests>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
<profile>
|
<profile>
|
||||||
<id>assembly</id>
|
<id>assembly</id>
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@ -5,12 +5,12 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<name>xgboost4j-example</name>
|
<name>xgboost4j-example</name>
|
||||||
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
|
|||||||
@ -5,13 +5,13 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<name>xgboost4j-flink</name>
|
<name>xgboost4j-flink</name>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
<properties>
|
<properties>
|
||||||
<flink-ml.version>2.2.0</flink-ml.version>
|
<flink-ml.version>2.2.0</flink-ml.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|||||||
@ -5,12 +5,12 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
<name>xgboost4j-gpu</name>
|
<name>xgboost4j-gpu</name>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
@ -104,6 +104,8 @@
|
|||||||
<argument>${log.capi.invocation}</argument>
|
<argument>${log.capi.invocation}</argument>
|
||||||
<argument>--use-cuda</argument>
|
<argument>--use-cuda</argument>
|
||||||
<argument>${use.cuda}</argument>
|
<argument>${use.cuda}</argument>
|
||||||
|
<argument>--use-hip</argument>
|
||||||
|
<argument>${use.hip}</argument>
|
||||||
</arguments>
|
</arguments>
|
||||||
<workingDirectory>${user.dir}</workingDirectory>
|
<workingDirectory>${user.dir}</workingDirectory>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
// Created by bobwang on 2021/9/8.
|
// Created by bobwang on 2021/9/8.
|
||||||
//
|
//
|
||||||
|
|
||||||
#ifndef XGBOOST_USE_CUDA
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
|
|
||||||
#include <jni.h>
|
#include <jni.h>
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,10 @@
|
|||||||
#include <jni.h>
|
#include <jni.h>
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
#include "../../../../src/common/device_helpers.cuh"
|
#include "../../../../src/common/device_helpers.cuh"
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
#include "../../../../src/common/device_helpers.hip.h"
|
||||||
|
#endif
|
||||||
#include "../../../../src/common/cuda_pinned_allocator.h"
|
#include "../../../../src/common/cuda_pinned_allocator.h"
|
||||||
#include "../../../../src/data/array_interface.h"
|
#include "../../../../src/data/array_interface.h"
|
||||||
#include "jvm_utils.h"
|
#include "jvm_utils.h"
|
||||||
|
|||||||
4
jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.hip
Normal file
4
jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "xgboost4j-gpu.cu"
|
||||||
|
#endif
|
||||||
@ -5,8 +5,8 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<name>xgboost4j-spark-gpu</name>
|
<name>xgboost4j-spark-gpu</name>
|
||||||
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
|
||||||
|
|||||||
@ -5,8 +5,8 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<name>xgboost4j-spark</name>
|
<name>xgboost4j-spark</name>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
|
|||||||
@ -5,12 +5,12 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm</artifactId>
|
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<name>xgboost4j</name>
|
<name>xgboost4j</name>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.2</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@ -15,6 +15,10 @@ class BuildConfiguration: # pylint: disable=R0902
|
|||||||
use_cuda: bool = False
|
use_cuda: bool = False
|
||||||
# Whether to enable NCCL
|
# Whether to enable NCCL
|
||||||
use_nccl: bool = False
|
use_nccl: bool = False
|
||||||
|
# Whether to enablea HIP
|
||||||
|
use_hip: bool = False
|
||||||
|
# Whether to enable RCCL
|
||||||
|
use_rccl: bool = False
|
||||||
# Whether to enable HDFS
|
# Whether to enable HDFS
|
||||||
use_hdfs: bool = False
|
use_hdfs: bool = False
|
||||||
# Whether to enable Azure Storage
|
# Whether to enable Azure Storage
|
||||||
|
|||||||
@ -132,17 +132,29 @@ def locate_or_build_libxgboost(
|
|||||||
|
|
||||||
if build_config.use_system_libxgboost:
|
if build_config.use_system_libxgboost:
|
||||||
# Find libxgboost from system prefix
|
# Find libxgboost from system prefix
|
||||||
sys_base_prefix = pathlib.Path(sys.base_prefix).absolute().resolve()
|
sys_prefix = pathlib.Path(sys.base_prefix)
|
||||||
libxgboost_sys = sys_base_prefix / "lib" / _lib_name()
|
sys_prefix_candidates = [
|
||||||
if not libxgboost_sys.exists():
|
sys_prefix / "lib",
|
||||||
|
# Paths possibly used on Windows
|
||||||
|
sys_prefix / "bin",
|
||||||
|
sys_prefix / "Library",
|
||||||
|
sys_prefix / "Library" / "bin",
|
||||||
|
sys_prefix / "Library" / "lib",
|
||||||
|
]
|
||||||
|
sys_prefix_candidates = [
|
||||||
|
p.expanduser().resolve() for p in sys_prefix_candidates
|
||||||
|
]
|
||||||
|
for candidate_dir in sys_prefix_candidates:
|
||||||
|
libtreelite_sys = candidate_dir / _lib_name()
|
||||||
|
if libtreelite_sys.exists():
|
||||||
|
logger.info("Using system XGBoost: %s", str(libtreelite_sys))
|
||||||
|
return libtreelite_sys
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"use_system_libxgboost was specified but {_lib_name()} is "
|
f"use_system_libxgboost was specified but {_lib_name()} is "
|
||||||
f"not found in {libxgboost_sys.parent}"
|
f"not found. Paths searched (in order): \n"
|
||||||
|
+ "\n".join([f"* {str(p)}" for p in sys_prefix_candidates])
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Using system XGBoost: %s", str(libxgboost_sys))
|
|
||||||
return libxgboost_sys
|
|
||||||
|
|
||||||
libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
|
libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
|
||||||
if libxgboost is not None:
|
if libxgboost is not None:
|
||||||
return libxgboost
|
return libxgboost
|
||||||
|
|||||||
@ -7,7 +7,7 @@ build-backend = "packager.pep517"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "xgboost"
|
name = "xgboost"
|
||||||
version = "2.0.0-dev"
|
version = "2.0.2"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
|
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
|
||||||
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
|
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
2.0.0-dev
|
2.0.2
|
||||||
|
|||||||
@ -88,6 +88,18 @@ def is_cudf_available() -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_cupy_available() -> bool:
|
||||||
|
"""Check cupy package available or not"""
|
||||||
|
if importlib.util.find_spec("cupy") is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
import cupy
|
||||||
|
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import scipy.sparse as scipy_sparse
|
import scipy.sparse as scipy_sparse
|
||||||
from scipy.sparse import csr_matrix as scipy_csr
|
from scipy.sparse import csr_matrix as scipy_csr
|
||||||
|
|||||||
@ -2399,6 +2399,7 @@ class Booster:
|
|||||||
_is_cudf_df,
|
_is_cudf_df,
|
||||||
_is_cupy_array,
|
_is_cupy_array,
|
||||||
_is_list,
|
_is_list,
|
||||||
|
_is_np_array_like,
|
||||||
_is_pandas_df,
|
_is_pandas_df,
|
||||||
_is_pandas_series,
|
_is_pandas_series,
|
||||||
_is_tuple,
|
_is_tuple,
|
||||||
@ -2428,7 +2429,7 @@ class Booster:
|
|||||||
f"got {data.shape[1]}"
|
f"got {data.shape[1]}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(data, np.ndarray):
|
if _is_np_array_like(data):
|
||||||
from .data import _ensure_np_dtype
|
from .data import _ensure_np_dtype
|
||||||
|
|
||||||
data, _ = _ensure_np_dtype(data, data.dtype)
|
data, _ = _ensure_np_dtype(data, data.dtype)
|
||||||
|
|||||||
@ -164,8 +164,8 @@ def _is_scipy_coo(data: DataType) -> bool:
|
|||||||
return isinstance(data, scipy.sparse.coo_matrix)
|
return isinstance(data, scipy.sparse.coo_matrix)
|
||||||
|
|
||||||
|
|
||||||
def _is_numpy_array(data: DataType) -> bool:
|
def _is_np_array_like(data: DataType) -> bool:
|
||||||
return isinstance(data, (np.ndarray, np.matrix))
|
return hasattr(data, "__array_interface__")
|
||||||
|
|
||||||
|
|
||||||
def _ensure_np_dtype(
|
def _ensure_np_dtype(
|
||||||
@ -317,7 +317,6 @@ def pandas_feature_info(
|
|||||||
) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]:
|
) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]:
|
||||||
"""Handle feature info for pandas dataframe."""
|
"""Handle feature info for pandas dataframe."""
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.api.types import is_categorical_dtype, is_sparse
|
|
||||||
|
|
||||||
# handle feature names
|
# handle feature names
|
||||||
if feature_names is None and meta is None:
|
if feature_names is None and meta is None:
|
||||||
@ -332,10 +331,10 @@ def pandas_feature_info(
|
|||||||
if feature_types is None and meta is None:
|
if feature_types is None and meta is None:
|
||||||
feature_types = []
|
feature_types = []
|
||||||
for dtype in data.dtypes:
|
for dtype in data.dtypes:
|
||||||
if is_sparse(dtype):
|
if is_pd_sparse_dtype(dtype):
|
||||||
feature_types.append(_pandas_dtype_mapper[dtype.subtype.name])
|
feature_types.append(_pandas_dtype_mapper[dtype.subtype.name])
|
||||||
elif (
|
elif (
|
||||||
is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
|
is_pd_cat_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
|
||||||
) and enable_categorical:
|
) and enable_categorical:
|
||||||
feature_types.append(CAT_T)
|
feature_types.append(CAT_T)
|
||||||
else:
|
else:
|
||||||
@ -345,18 +344,13 @@ def pandas_feature_info(
|
|||||||
|
|
||||||
def is_nullable_dtype(dtype: PandasDType) -> bool:
|
def is_nullable_dtype(dtype: PandasDType) -> bool:
|
||||||
"""Whether dtype is a pandas nullable type."""
|
"""Whether dtype is a pandas nullable type."""
|
||||||
from pandas.api.types import (
|
from pandas.api.types import is_bool_dtype, is_float_dtype, is_integer_dtype
|
||||||
is_bool_dtype,
|
|
||||||
is_categorical_dtype,
|
|
||||||
is_float_dtype,
|
|
||||||
is_integer_dtype,
|
|
||||||
)
|
|
||||||
|
|
||||||
is_int = is_integer_dtype(dtype) and dtype.name in pandas_nullable_mapper
|
is_int = is_integer_dtype(dtype) and dtype.name in pandas_nullable_mapper
|
||||||
# np.bool has alias `bool`, while pd.BooleanDtype has `boolean`.
|
# np.bool has alias `bool`, while pd.BooleanDtype has `boolean`.
|
||||||
is_bool = is_bool_dtype(dtype) and dtype.name == "boolean"
|
is_bool = is_bool_dtype(dtype) and dtype.name == "boolean"
|
||||||
is_float = is_float_dtype(dtype) and dtype.name in pandas_nullable_mapper
|
is_float = is_float_dtype(dtype) and dtype.name in pandas_nullable_mapper
|
||||||
return is_int or is_bool or is_float or is_categorical_dtype(dtype)
|
return is_int or is_bool or is_float or is_pd_cat_dtype(dtype)
|
||||||
|
|
||||||
|
|
||||||
def is_pa_ext_dtype(dtype: Any) -> bool:
|
def is_pa_ext_dtype(dtype: Any) -> bool:
|
||||||
@ -371,17 +365,48 @@ def is_pa_ext_categorical_dtype(dtype: Any) -> bool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_pd_cat_dtype(dtype: PandasDType) -> bool:
|
||||||
|
"""Wrapper for testing pandas category type."""
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
if hasattr(pd.util, "version") and hasattr(pd.util.version, "Version"):
|
||||||
|
Version = pd.util.version.Version
|
||||||
|
if Version(pd.__version__) >= Version("2.1.0"):
|
||||||
|
from pandas import CategoricalDtype
|
||||||
|
|
||||||
|
return isinstance(dtype, CategoricalDtype)
|
||||||
|
|
||||||
|
from pandas.api.types import is_categorical_dtype
|
||||||
|
|
||||||
|
return is_categorical_dtype(dtype)
|
||||||
|
|
||||||
|
|
||||||
|
def is_pd_sparse_dtype(dtype: PandasDType) -> bool:
|
||||||
|
"""Wrapper for testing pandas sparse type."""
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
if hasattr(pd.util, "version") and hasattr(pd.util.version, "Version"):
|
||||||
|
Version = pd.util.version.Version
|
||||||
|
if Version(pd.__version__) >= Version("2.1.0"):
|
||||||
|
from pandas import SparseDtype
|
||||||
|
|
||||||
|
return isinstance(dtype, SparseDtype)
|
||||||
|
|
||||||
|
from pandas.api.types import is_sparse
|
||||||
|
|
||||||
|
return is_sparse(dtype)
|
||||||
|
|
||||||
|
|
||||||
def pandas_cat_null(data: DataFrame) -> DataFrame:
|
def pandas_cat_null(data: DataFrame) -> DataFrame:
|
||||||
"""Handle categorical dtype and nullable extension types from pandas."""
|
"""Handle categorical dtype and nullable extension types from pandas."""
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.api.types import is_categorical_dtype
|
|
||||||
|
|
||||||
# handle category codes and nullable.
|
# handle category codes and nullable.
|
||||||
cat_columns = []
|
cat_columns = []
|
||||||
nul_columns = []
|
nul_columns = []
|
||||||
# avoid an unnecessary conversion if possible
|
# avoid an unnecessary conversion if possible
|
||||||
for col, dtype in zip(data.columns, data.dtypes):
|
for col, dtype in zip(data.columns, data.dtypes):
|
||||||
if is_categorical_dtype(dtype):
|
if is_pd_cat_dtype(dtype):
|
||||||
cat_columns.append(col)
|
cat_columns.append(col)
|
||||||
elif is_pa_ext_categorical_dtype(dtype):
|
elif is_pa_ext_categorical_dtype(dtype):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -398,7 +423,7 @@ def pandas_cat_null(data: DataFrame) -> DataFrame:
|
|||||||
transformed = data
|
transformed = data
|
||||||
|
|
||||||
def cat_codes(ser: pd.Series) -> pd.Series:
|
def cat_codes(ser: pd.Series) -> pd.Series:
|
||||||
if is_categorical_dtype(ser.dtype):
|
if is_pd_cat_dtype(ser.dtype):
|
||||||
return ser.cat.codes
|
return ser.cat.codes
|
||||||
assert is_pa_ext_categorical_dtype(ser.dtype)
|
assert is_pa_ext_categorical_dtype(ser.dtype)
|
||||||
# Not yet supported, the index is not ordered for some reason. Alternately:
|
# Not yet supported, the index is not ordered for some reason. Alternately:
|
||||||
@ -454,14 +479,12 @@ def _transform_pandas_df(
|
|||||||
meta: Optional[str] = None,
|
meta: Optional[str] = None,
|
||||||
meta_type: Optional[NumpyDType] = None,
|
meta_type: Optional[NumpyDType] = None,
|
||||||
) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]:
|
) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]:
|
||||||
from pandas.api.types import is_categorical_dtype, is_sparse
|
|
||||||
|
|
||||||
pyarrow_extension = False
|
pyarrow_extension = False
|
||||||
for dtype in data.dtypes:
|
for dtype in data.dtypes:
|
||||||
if not (
|
if not (
|
||||||
(dtype.name in _pandas_dtype_mapper)
|
(dtype.name in _pandas_dtype_mapper)
|
||||||
or is_sparse(dtype)
|
or is_pd_sparse_dtype(dtype)
|
||||||
or (is_categorical_dtype(dtype) and enable_categorical)
|
or (is_pd_cat_dtype(dtype) and enable_categorical)
|
||||||
or is_pa_ext_dtype(dtype)
|
or is_pa_ext_dtype(dtype)
|
||||||
):
|
):
|
||||||
_invalid_dataframe_dtype(data)
|
_invalid_dataframe_dtype(data)
|
||||||
@ -515,9 +538,8 @@ def _meta_from_pandas_series(
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Help transform pandas series for meta data like labels"""
|
"""Help transform pandas series for meta data like labels"""
|
||||||
data = data.values.astype("float")
|
data = data.values.astype("float")
|
||||||
from pandas.api.types import is_sparse
|
|
||||||
|
|
||||||
if is_sparse(data):
|
if is_pd_sparse_dtype(getattr(data, "dtype", data)):
|
||||||
data = data.to_dense() # type: ignore
|
data = data.to_dense() # type: ignore
|
||||||
assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
|
assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
|
||||||
_meta_from_numpy(data, name, dtype, handle)
|
_meta_from_numpy(data, name, dtype, handle)
|
||||||
@ -539,13 +561,11 @@ def _from_pandas_series(
|
|||||||
feature_names: Optional[FeatureNames],
|
feature_names: Optional[FeatureNames],
|
||||||
feature_types: Optional[FeatureTypes],
|
feature_types: Optional[FeatureTypes],
|
||||||
) -> DispatchedDataBackendReturnType:
|
) -> DispatchedDataBackendReturnType:
|
||||||
from pandas.api.types import is_categorical_dtype
|
|
||||||
|
|
||||||
if (data.dtype.name not in _pandas_dtype_mapper) and not (
|
if (data.dtype.name not in _pandas_dtype_mapper) and not (
|
||||||
is_categorical_dtype(data.dtype) and enable_categorical
|
is_pd_cat_dtype(data.dtype) and enable_categorical
|
||||||
):
|
):
|
||||||
_invalid_dataframe_dtype(data)
|
_invalid_dataframe_dtype(data)
|
||||||
if enable_categorical and is_categorical_dtype(data.dtype):
|
if enable_categorical and is_pd_cat_dtype(data.dtype):
|
||||||
data = data.cat.codes
|
data = data.cat.codes
|
||||||
return _from_numpy_array(
|
return _from_numpy_array(
|
||||||
data.values.reshape(data.shape[0], 1).astype("float"),
|
data.values.reshape(data.shape[0], 1).astype("float"),
|
||||||
@ -1051,7 +1071,7 @@ def dispatch_data_backend(
|
|||||||
return _from_scipy_csr(
|
return _from_scipy_csr(
|
||||||
data.tocsr(), missing, threads, feature_names, feature_types
|
data.tocsr(), missing, threads, feature_names, feature_types
|
||||||
)
|
)
|
||||||
if _is_numpy_array(data):
|
if _is_np_array_like(data):
|
||||||
return _from_numpy_array(
|
return _from_numpy_array(
|
||||||
data, missing, threads, feature_names, feature_types, data_split_mode
|
data, missing, threads, feature_names, feature_types, data_split_mode
|
||||||
)
|
)
|
||||||
@ -1194,7 +1214,7 @@ def dispatch_meta_backend(
|
|||||||
if _is_tuple(data):
|
if _is_tuple(data):
|
||||||
_meta_from_tuple(data, name, dtype, handle)
|
_meta_from_tuple(data, name, dtype, handle)
|
||||||
return
|
return
|
||||||
if _is_numpy_array(data):
|
if _is_np_array_like(data):
|
||||||
_meta_from_numpy(data, name, dtype, handle)
|
_meta_from_numpy(data, name, dtype, handle)
|
||||||
return
|
return
|
||||||
if _is_pandas_df(data):
|
if _is_pandas_df(data):
|
||||||
@ -1281,7 +1301,7 @@ def _proxy_transform(
|
|||||||
return _transform_dlpack(data), None, feature_names, feature_types
|
return _transform_dlpack(data), None, feature_names, feature_types
|
||||||
if _is_list(data) or _is_tuple(data):
|
if _is_list(data) or _is_tuple(data):
|
||||||
data = np.array(data)
|
data = np.array(data)
|
||||||
if _is_numpy_array(data):
|
if _is_np_array_like(data):
|
||||||
data, _ = _ensure_np_dtype(data, data.dtype)
|
data, _ = _ensure_np_dtype(data, data.dtype)
|
||||||
return data, None, feature_names, feature_types
|
return data, None, feature_names, feature_types
|
||||||
if _is_scipy_csr(data):
|
if _is_scipy_csr(data):
|
||||||
@ -1331,7 +1351,7 @@ def dispatch_proxy_set_data(
|
|||||||
if not allow_host:
|
if not allow_host:
|
||||||
raise err
|
raise err
|
||||||
|
|
||||||
if _is_numpy_array(data):
|
if _is_np_array_like(data):
|
||||||
_check_data_shape(data)
|
_check_data_shape(data)
|
||||||
proxy._set_data_from_array(data) # pylint: disable=W0212
|
proxy._set_data_from_array(data) # pylint: disable=W0212
|
||||||
return
|
return
|
||||||
|
|||||||
@ -31,16 +31,15 @@ def find_lib_path() -> List[str]:
|
|||||||
]
|
]
|
||||||
|
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
if platform.architecture()[0] == "64bit":
|
# On Windows, Conda may install libs in different paths
|
||||||
dll_path.append(os.path.join(curr_path, "../../windows/x64/Release/"))
|
dll_path.extend(
|
||||||
# hack for pip installation when copy all parent source
|
[
|
||||||
# directory here
|
os.path.join(sys.base_prefix, "bin"),
|
||||||
dll_path.append(os.path.join(curr_path, "./windows/x64/Release/"))
|
os.path.join(sys.base_prefix, "Library"),
|
||||||
else:
|
os.path.join(sys.base_prefix, "Library", "bin"),
|
||||||
dll_path.append(os.path.join(curr_path, "../../windows/Release/"))
|
os.path.join(sys.base_prefix, "Library", "lib"),
|
||||||
# hack for pip installation when copy all parent source
|
]
|
||||||
# directory here
|
)
|
||||||
dll_path.append(os.path.join(curr_path, "./windows/Release/"))
|
|
||||||
dll_path = [os.path.join(p, "xgboost.dll") for p in dll_path]
|
dll_path = [os.path.join(p, "xgboost.dll") for p in dll_path]
|
||||||
elif sys.platform.startswith(("linux", "freebsd", "emscripten")):
|
elif sys.platform.startswith(("linux", "freebsd", "emscripten")):
|
||||||
dll_path = [os.path.join(p, "libxgboost.so") for p in dll_path]
|
dll_path = [os.path.join(p, "libxgboost.so") for p in dll_path]
|
||||||
|
|||||||
@ -2093,7 +2093,17 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
X, qid = _get_qid(X, None)
|
X, qid = _get_qid(X, None)
|
||||||
Xyq = DMatrix(X, y, qid=qid)
|
# fixme(jiamingy): base margin and group weight is not yet supported. We might
|
||||||
|
# need to make extra special fields in the dataframe.
|
||||||
|
Xyq = DMatrix(
|
||||||
|
X,
|
||||||
|
y,
|
||||||
|
qid=qid,
|
||||||
|
missing=self.missing,
|
||||||
|
enable_categorical=self.enable_categorical,
|
||||||
|
nthread=self.n_jobs,
|
||||||
|
feature_types=self.feature_types,
|
||||||
|
)
|
||||||
if callable(self.eval_metric):
|
if callable(self.eval_metric):
|
||||||
metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
|
metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
|
||||||
result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric)
|
result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric)
|
||||||
|
|||||||
@ -22,7 +22,7 @@ from typing import (
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pyspark import SparkContext, cloudpickle
|
from pyspark import RDD, SparkContext, cloudpickle
|
||||||
from pyspark.ml import Estimator, Model
|
from pyspark.ml import Estimator, Model
|
||||||
from pyspark.ml.functions import array_to_vector, vector_to_array
|
from pyspark.ml.functions import array_to_vector, vector_to_array
|
||||||
from pyspark.ml.linalg import VectorUDT
|
from pyspark.ml.linalg import VectorUDT
|
||||||
@ -44,6 +44,7 @@ from pyspark.ml.util import (
|
|||||||
MLWritable,
|
MLWritable,
|
||||||
MLWriter,
|
MLWriter,
|
||||||
)
|
)
|
||||||
|
from pyspark.resource import ResourceProfileBuilder, TaskResourceRequests
|
||||||
from pyspark.sql import Column, DataFrame
|
from pyspark.sql import Column, DataFrame
|
||||||
from pyspark.sql.functions import col, countDistinct, pandas_udf, rand, struct
|
from pyspark.sql.functions import col, countDistinct, pandas_udf, rand, struct
|
||||||
from pyspark.sql.types import (
|
from pyspark.sql.types import (
|
||||||
@ -59,11 +60,12 @@ from scipy.special import expit, softmax # pylint: disable=no-name-in-module
|
|||||||
|
|
||||||
import xgboost
|
import xgboost
|
||||||
from xgboost import XGBClassifier
|
from xgboost import XGBClassifier
|
||||||
from xgboost.compat import is_cudf_available
|
from xgboost.compat import is_cudf_available, is_cupy_available
|
||||||
from xgboost.core import Booster, _check_distributed_params
|
from xgboost.core import Booster, _check_distributed_params
|
||||||
from xgboost.sklearn import DEFAULT_N_ESTIMATORS, XGBModel, _can_use_qdm
|
from xgboost.sklearn import DEFAULT_N_ESTIMATORS, XGBModel, _can_use_qdm
|
||||||
from xgboost.training import train as worker_train
|
from xgboost.training import train as worker_train
|
||||||
|
|
||||||
|
from .._typing import ArrayLike
|
||||||
from .data import (
|
from .data import (
|
||||||
_read_csr_matrix_from_unwrapped_spark_vec,
|
_read_csr_matrix_from_unwrapped_spark_vec,
|
||||||
alias,
|
alias,
|
||||||
@ -87,6 +89,7 @@ from .utils import (
|
|||||||
_get_rabit_args,
|
_get_rabit_args,
|
||||||
_get_spark_session,
|
_get_spark_session,
|
||||||
_is_local,
|
_is_local,
|
||||||
|
_is_standalone_or_localcluster,
|
||||||
deserialize_booster,
|
deserialize_booster,
|
||||||
deserialize_xgb_model,
|
deserialize_xgb_model,
|
||||||
get_class_name,
|
get_class_name,
|
||||||
@ -241,6 +244,13 @@ class _SparkXGBParams(
|
|||||||
TypeConverters.toList,
|
TypeConverters.toList,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def set_device(self, value: str) -> "_SparkXGBParams":
|
||||||
|
"""Set device, optional value: cpu, cuda, gpu"""
|
||||||
|
_check_distributed_params({"device": value})
|
||||||
|
assert value in ("cpu", "cuda", "gpu")
|
||||||
|
self.set(self.device, value)
|
||||||
|
return self
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _xgb_cls(cls) -> Type[XGBModel]:
|
def _xgb_cls(cls) -> Type[XGBModel]:
|
||||||
"""
|
"""
|
||||||
@ -334,6 +344,54 @@ class _SparkXGBParams(
|
|||||||
predict_params[param.name] = self.getOrDefault(param)
|
predict_params[param.name] = self.getOrDefault(param)
|
||||||
return predict_params
|
return predict_params
|
||||||
|
|
||||||
|
def _validate_gpu_params(self) -> None:
|
||||||
|
"""Validate the gpu parameters and gpu configurations"""
|
||||||
|
|
||||||
|
if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu):
|
||||||
|
ss = _get_spark_session()
|
||||||
|
sc = ss.sparkContext
|
||||||
|
|
||||||
|
if _is_local(sc):
|
||||||
|
# Support GPU training in Spark local mode is just for debugging
|
||||||
|
# purposes, so it's okay for printing the below warning instead of
|
||||||
|
# checking the real gpu numbers and raising the exception.
|
||||||
|
get_logger(self.__class__.__name__).warning(
|
||||||
|
"You have enabled GPU in spark local mode. Please make sure your"
|
||||||
|
" local node has at least %d GPUs",
|
||||||
|
self.getOrDefault(self.num_workers),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
executor_gpus = sc.getConf().get("spark.executor.resource.gpu.amount")
|
||||||
|
if executor_gpus is None:
|
||||||
|
raise ValueError(
|
||||||
|
"The `spark.executor.resource.gpu.amount` is required for training"
|
||||||
|
" on GPU."
|
||||||
|
)
|
||||||
|
|
||||||
|
if not (ss.version >= "3.4.0" and _is_standalone_or_localcluster(sc)):
|
||||||
|
# We will enable stage-level scheduling in spark 3.4.0+ which doesn't
|
||||||
|
# require spark.task.resource.gpu.amount to be set explicitly
|
||||||
|
gpu_per_task = sc.getConf().get("spark.task.resource.gpu.amount")
|
||||||
|
if gpu_per_task is not None:
|
||||||
|
if float(gpu_per_task) < 1.0:
|
||||||
|
raise ValueError(
|
||||||
|
"XGBoost doesn't support GPU fractional configurations. "
|
||||||
|
"Please set `spark.task.resource.gpu.amount=spark.executor"
|
||||||
|
".resource.gpu.amount`"
|
||||||
|
)
|
||||||
|
|
||||||
|
if float(gpu_per_task) > 1.0:
|
||||||
|
get_logger(self.__class__.__name__).warning(
|
||||||
|
"%s GPUs for each Spark task is configured, but each "
|
||||||
|
"XGBoost training task uses only 1 GPU.",
|
||||||
|
gpu_per_task,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"The `spark.task.resource.gpu.amount` is required for training"
|
||||||
|
" on GPU."
|
||||||
|
)
|
||||||
|
|
||||||
def _validate_params(self) -> None:
|
def _validate_params(self) -> None:
|
||||||
# pylint: disable=too-many-branches
|
# pylint: disable=too-many-branches
|
||||||
init_model = self.getOrDefault("xgb_model")
|
init_model = self.getOrDefault("xgb_model")
|
||||||
@ -413,53 +471,7 @@ class _SparkXGBParams(
|
|||||||
"`pyspark.ml.linalg.Vector` type."
|
"`pyspark.ml.linalg.Vector` type."
|
||||||
)
|
)
|
||||||
|
|
||||||
if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu):
|
self._validate_gpu_params()
|
||||||
gpu_per_task = (
|
|
||||||
_get_spark_session()
|
|
||||||
.sparkContext.getConf()
|
|
||||||
.get("spark.task.resource.gpu.amount")
|
|
||||||
)
|
|
||||||
|
|
||||||
is_local = _is_local(_get_spark_session().sparkContext)
|
|
||||||
|
|
||||||
if is_local:
|
|
||||||
# checking spark local mode.
|
|
||||||
if gpu_per_task is not None:
|
|
||||||
raise RuntimeError(
|
|
||||||
"The spark local mode does not support gpu configuration."
|
|
||||||
"Please remove spark.executor.resource.gpu.amount and "
|
|
||||||
"spark.task.resource.gpu.amount"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Support GPU training in Spark local mode is just for debugging
|
|
||||||
# purposes, so it's okay for printing the below warning instead of
|
|
||||||
# checking the real gpu numbers and raising the exception.
|
|
||||||
get_logger(self.__class__.__name__).warning(
|
|
||||||
"You have enabled GPU in spark local mode. Please make sure your"
|
|
||||||
" local node has at least %d GPUs",
|
|
||||||
self.getOrDefault(self.num_workers),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# checking spark non-local mode.
|
|
||||||
if gpu_per_task is not None:
|
|
||||||
if float(gpu_per_task) < 1.0:
|
|
||||||
raise ValueError(
|
|
||||||
"XGBoost doesn't support GPU fractional configurations. "
|
|
||||||
"Please set `spark.task.resource.gpu.amount=spark.executor"
|
|
||||||
".resource.gpu.amount`"
|
|
||||||
)
|
|
||||||
|
|
||||||
if float(gpu_per_task) > 1.0:
|
|
||||||
get_logger(self.__class__.__name__).warning(
|
|
||||||
"%s GPUs for each Spark task is configured, but each "
|
|
||||||
"XGBoost training task uses only 1 GPU.",
|
|
||||||
gpu_per_task,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"The `spark.task.resource.gpu.amount` is required for training"
|
|
||||||
" on GPU."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_and_convert_feature_col_as_float_col_list(
|
def _validate_and_convert_feature_col_as_float_col_list(
|
||||||
@ -584,6 +596,8 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
arbitrary_params_dict={},
|
arbitrary_params_dict={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.logger = get_logger(self.__class__.__name__)
|
||||||
|
|
||||||
def setParams(self, **kwargs: Any) -> None: # pylint: disable=invalid-name
|
def setParams(self, **kwargs: Any) -> None: # pylint: disable=invalid-name
|
||||||
"""
|
"""
|
||||||
Set params for the estimator.
|
Set params for the estimator.
|
||||||
@ -886,6 +900,116 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
|
|
||||||
return booster_params, train_call_kwargs_params, dmatrix_kwargs
|
return booster_params, train_call_kwargs_params, dmatrix_kwargs
|
||||||
|
|
||||||
|
def _skip_stage_level_scheduling(self) -> bool:
|
||||||
|
# pylint: disable=too-many-return-statements
|
||||||
|
"""Check if stage-level scheduling is not needed,
|
||||||
|
return true to skip stage-level scheduling"""
|
||||||
|
|
||||||
|
if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu):
|
||||||
|
ss = _get_spark_session()
|
||||||
|
sc = ss.sparkContext
|
||||||
|
|
||||||
|
if ss.version < "3.4.0":
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost requires spark version 3.4.0+"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not _is_standalone_or_localcluster(sc):
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost requires spark standalone or "
|
||||||
|
"local-cluster mode"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
executor_cores = sc.getConf().get("spark.executor.cores")
|
||||||
|
executor_gpus = sc.getConf().get("spark.executor.resource.gpu.amount")
|
||||||
|
if executor_cores is None or executor_gpus is None:
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost requires spark.executor.cores, "
|
||||||
|
"spark.executor.resource.gpu.amount to be set."
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
if int(executor_cores) == 1:
|
||||||
|
# there will be only 1 task running at any time.
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost requires spark.executor.cores > 1 "
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
if int(executor_gpus) > 1:
|
||||||
|
# For spark.executor.resource.gpu.amount > 1, we suppose user knows how to configure
|
||||||
|
# to make xgboost run successfully.
|
||||||
|
#
|
||||||
|
self.logger.info(
|
||||||
|
"Stage-level scheduling in xgboost will not work "
|
||||||
|
"when spark.executor.resource.gpu.amount>1"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
task_gpu_amount = sc.getConf().get("spark.task.resource.gpu.amount")
|
||||||
|
|
||||||
|
if task_gpu_amount is None:
|
||||||
|
# The ETL tasks will not grab a gpu when spark.task.resource.gpu.amount is not set,
|
||||||
|
# but with stage-level scheduling, we can make training task grab the gpu.
|
||||||
|
return False
|
||||||
|
|
||||||
|
if float(task_gpu_amount) == float(executor_gpus):
|
||||||
|
# spark.executor.resource.gpu.amount=spark.task.resource.gpu.amount "
|
||||||
|
# results in only 1 task running at a time, which may cause perf issue.
|
||||||
|
return True
|
||||||
|
|
||||||
|
# We can enable stage-level scheduling
|
||||||
|
return False
|
||||||
|
|
||||||
|
# CPU training doesn't require stage-level scheduling
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _try_stage_level_scheduling(self, rdd: RDD) -> RDD:
|
||||||
|
"""Try to enable stage-level scheduling"""
|
||||||
|
|
||||||
|
if self._skip_stage_level_scheduling():
|
||||||
|
return rdd
|
||||||
|
|
||||||
|
ss = _get_spark_session()
|
||||||
|
|
||||||
|
# executor_cores will not be None
|
||||||
|
executor_cores = ss.sparkContext.getConf().get("spark.executor.cores")
|
||||||
|
assert executor_cores is not None
|
||||||
|
|
||||||
|
# Spark-rapids is a project to leverage GPUs to accelerate spark SQL.
|
||||||
|
# If spark-rapids is enabled, to avoid GPU OOM, we don't allow other
|
||||||
|
# ETL gpu tasks running alongside training tasks.
|
||||||
|
spark_plugins = ss.conf.get("spark.plugins", " ")
|
||||||
|
assert spark_plugins is not None
|
||||||
|
spark_rapids_sql_enabled = ss.conf.get("spark.rapids.sql.enabled", "true")
|
||||||
|
assert spark_rapids_sql_enabled is not None
|
||||||
|
|
||||||
|
task_cores = (
|
||||||
|
int(executor_cores)
|
||||||
|
if "com.nvidia.spark.SQLPlugin" in spark_plugins
|
||||||
|
and "true" == spark_rapids_sql_enabled.lower()
|
||||||
|
else (int(executor_cores) // 2) + 1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Each training task requires cpu cores > total executor cores//2 + 1 which can
|
||||||
|
# make sure the tasks be sent to different executors.
|
||||||
|
#
|
||||||
|
# Please note that we can't use GPU to limit the concurrent tasks because of
|
||||||
|
# https://issues.apache.org/jira/browse/SPARK-45527.
|
||||||
|
|
||||||
|
task_gpus = 1.0
|
||||||
|
treqs = TaskResourceRequests().cpus(task_cores).resource("gpu", task_gpus)
|
||||||
|
rp = ResourceProfileBuilder().require(treqs).build
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
"XGBoost training tasks require the resource(cores=%s, gpu=%s).",
|
||||||
|
task_cores,
|
||||||
|
task_gpus,
|
||||||
|
)
|
||||||
|
return rdd.withResources(rp)
|
||||||
|
|
||||||
def _fit(self, dataset: DataFrame) -> "_SparkXGBModel":
|
def _fit(self, dataset: DataFrame) -> "_SparkXGBModel":
|
||||||
# pylint: disable=too-many-statements, too-many-locals
|
# pylint: disable=too-many-statements, too-many-locals
|
||||||
self._validate_params()
|
self._validate_params()
|
||||||
@ -986,14 +1110,16 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _run_job() -> Tuple[str, str]:
|
def _run_job() -> Tuple[str, str]:
|
||||||
ret = (
|
rdd = (
|
||||||
dataset.mapInPandas(
|
dataset.mapInPandas(
|
||||||
_train_booster, schema="config string, booster string" # type: ignore
|
_train_booster, # type: ignore
|
||||||
|
schema="config string, booster string",
|
||||||
)
|
)
|
||||||
.rdd.barrier()
|
.rdd.barrier()
|
||||||
.mapPartitions(lambda x: x)
|
.mapPartitions(lambda x: x)
|
||||||
.collect()[0]
|
|
||||||
)
|
)
|
||||||
|
rdd_with_resource = self._try_stage_level_scheduling(rdd)
|
||||||
|
ret = rdd_with_resource.collect()[0]
|
||||||
return ret[0], ret[1]
|
return ret[0], ret[1]
|
||||||
|
|
||||||
get_logger("XGBoost-PySpark").info(
|
get_logger("XGBoost-PySpark").info(
|
||||||
@ -1117,12 +1243,111 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
)
|
)
|
||||||
return features_col, feature_col_names
|
return features_col, feature_col_names
|
||||||
|
|
||||||
|
def _get_pred_contrib_col_name(self) -> Optional[str]:
|
||||||
|
"""Return the pred_contrib_col col name"""
|
||||||
|
pred_contrib_col_name = None
|
||||||
|
if (
|
||||||
|
self.isDefined(self.pred_contrib_col)
|
||||||
|
and self.getOrDefault(self.pred_contrib_col) != ""
|
||||||
|
):
|
||||||
|
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
|
||||||
|
|
||||||
|
return pred_contrib_col_name
|
||||||
|
|
||||||
|
def _out_schema(self) -> Tuple[bool, str]:
|
||||||
|
"""Return the bool to indicate if it's a single prediction, true is single prediction,
|
||||||
|
and the returned type of the user-defined function. The value must
|
||||||
|
be a DDL-formatted type string."""
|
||||||
|
|
||||||
|
if self._get_pred_contrib_col_name() is not None:
|
||||||
|
return False, f"{pred.prediction} double, {pred.pred_contrib} array<double>"
|
||||||
|
|
||||||
|
return True, "double"
|
||||||
|
|
||||||
|
def _get_predict_func(self) -> Callable:
|
||||||
|
"""Return the true prediction function which will be running on the executor side"""
|
||||||
|
|
||||||
|
predict_params = self._gen_predict_params_dict()
|
||||||
|
pred_contrib_col_name = self._get_pred_contrib_col_name()
|
||||||
|
|
||||||
|
def _predict(
|
||||||
|
model: XGBModel, X: ArrayLike, base_margin: Optional[ArrayLike]
|
||||||
|
) -> Union[pd.DataFrame, pd.Series]:
|
||||||
|
data = {}
|
||||||
|
preds = model.predict(
|
||||||
|
X,
|
||||||
|
base_margin=base_margin,
|
||||||
|
validate_features=False,
|
||||||
|
**predict_params,
|
||||||
|
)
|
||||||
|
data[pred.prediction] = pd.Series(preds)
|
||||||
|
|
||||||
|
if pred_contrib_col_name is not None:
|
||||||
|
contribs = pred_contribs(model, X, base_margin)
|
||||||
|
data[pred.pred_contrib] = pd.Series(list(contribs))
|
||||||
|
return pd.DataFrame(data=data)
|
||||||
|
|
||||||
|
return data[pred.prediction]
|
||||||
|
|
||||||
|
return _predict
|
||||||
|
|
||||||
|
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
|
||||||
|
"""Post process of transform"""
|
||||||
|
prediction_col_name = self.getOrDefault(self.predictionCol)
|
||||||
|
single_pred, _ = self._out_schema()
|
||||||
|
|
||||||
|
if single_pred:
|
||||||
|
if prediction_col_name:
|
||||||
|
dataset = dataset.withColumn(prediction_col_name, pred_col)
|
||||||
|
else:
|
||||||
|
pred_struct_col = "_prediction_struct"
|
||||||
|
dataset = dataset.withColumn(pred_struct_col, pred_col)
|
||||||
|
|
||||||
|
if prediction_col_name:
|
||||||
|
dataset = dataset.withColumn(
|
||||||
|
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
|
||||||
|
)
|
||||||
|
|
||||||
|
pred_contrib_col_name = self._get_pred_contrib_col_name()
|
||||||
|
if pred_contrib_col_name is not None:
|
||||||
|
dataset = dataset.withColumn(
|
||||||
|
pred_contrib_col_name,
|
||||||
|
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
|
||||||
|
)
|
||||||
|
|
||||||
|
dataset = dataset.drop(pred_struct_col)
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
def _gpu_transform(self) -> bool:
|
||||||
|
"""If gpu is used to do the prediction, true to gpu prediction"""
|
||||||
|
|
||||||
|
if _is_local(_get_spark_session().sparkContext):
|
||||||
|
# if it's local model, we just use the internal "device"
|
||||||
|
return use_cuda(self.getOrDefault(self.device))
|
||||||
|
|
||||||
|
gpu_per_task = (
|
||||||
|
_get_spark_session()
|
||||||
|
.sparkContext.getConf()
|
||||||
|
.get("spark.task.resource.gpu.amount")
|
||||||
|
)
|
||||||
|
|
||||||
|
# User don't set gpu configurations, just use cpu
|
||||||
|
if gpu_per_task is None:
|
||||||
|
if use_cuda(self.getOrDefault(self.device)):
|
||||||
|
get_logger("XGBoost-PySpark").warning(
|
||||||
|
"Do the prediction on the CPUs since "
|
||||||
|
"no gpu configurations are set"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# User already sets the gpu configurations, we just use the internal "device".
|
||||||
|
return use_cuda(self.getOrDefault(self.device))
|
||||||
|
|
||||||
def _transform(self, dataset: DataFrame) -> DataFrame:
|
def _transform(self, dataset: DataFrame) -> DataFrame:
|
||||||
# pylint: disable=too-many-statements, too-many-locals
|
# pylint: disable=too-many-statements, too-many-locals
|
||||||
# Save xgb_sklearn_model and predict_params to be local variable
|
# Save xgb_sklearn_model and predict_params to be local variable
|
||||||
# to avoid the `self` object to be pickled to remote.
|
# to avoid the `self` object to be pickled to remote.
|
||||||
xgb_sklearn_model = self._xgb_sklearn_model
|
xgb_sklearn_model = self._xgb_sklearn_model
|
||||||
predict_params = self._gen_predict_params_dict()
|
|
||||||
|
|
||||||
has_base_margin = False
|
has_base_margin = False
|
||||||
if (
|
if (
|
||||||
@ -1137,79 +1362,92 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
features_col, feature_col_names = self._get_feature_col(dataset)
|
features_col, feature_col_names = self._get_feature_col(dataset)
|
||||||
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
|
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
|
||||||
|
|
||||||
pred_contrib_col_name = None
|
predict_func = self._get_predict_func()
|
||||||
if (
|
|
||||||
self.isDefined(self.pred_contrib_col)
|
|
||||||
and self.getOrDefault(self.pred_contrib_col) != ""
|
|
||||||
):
|
|
||||||
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
|
|
||||||
|
|
||||||
single_pred = True
|
_, schema = self._out_schema()
|
||||||
schema = "double"
|
|
||||||
if pred_contrib_col_name:
|
is_local = _is_local(_get_spark_session().sparkContext)
|
||||||
single_pred = False
|
run_on_gpu = self._gpu_transform()
|
||||||
schema = f"{pred.prediction} double, {pred.pred_contrib} array<double>"
|
|
||||||
|
|
||||||
@pandas_udf(schema) # type: ignore
|
@pandas_udf(schema) # type: ignore
|
||||||
def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:
|
def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:
|
||||||
assert xgb_sklearn_model is not None
|
assert xgb_sklearn_model is not None
|
||||||
model = xgb_sklearn_model
|
model = xgb_sklearn_model
|
||||||
|
|
||||||
|
from pyspark import TaskContext
|
||||||
|
|
||||||
|
context = TaskContext.get()
|
||||||
|
assert context is not None
|
||||||
|
|
||||||
|
dev_ordinal = -1
|
||||||
|
|
||||||
|
if is_cudf_available():
|
||||||
|
if is_local:
|
||||||
|
if run_on_gpu and is_cupy_available():
|
||||||
|
import cupy as cp # pylint: disable=import-error
|
||||||
|
|
||||||
|
total_gpus = cp.cuda.runtime.getDeviceCount()
|
||||||
|
if total_gpus > 0:
|
||||||
|
partition_id = context.partitionId()
|
||||||
|
# For transform local mode, default the dev_ordinal to
|
||||||
|
# (partition id) % gpus.
|
||||||
|
dev_ordinal = partition_id % total_gpus
|
||||||
|
elif run_on_gpu:
|
||||||
|
dev_ordinal = _get_gpu_id(context)
|
||||||
|
|
||||||
|
if dev_ordinal >= 0:
|
||||||
|
device = "cuda:" + str(dev_ordinal)
|
||||||
|
get_logger("XGBoost-PySpark").info(
|
||||||
|
"Do the inference with device: %s", device
|
||||||
|
)
|
||||||
|
model.set_params(device=device)
|
||||||
|
else:
|
||||||
|
get_logger("XGBoost-PySpark").info("Do the inference on the CPUs")
|
||||||
|
else:
|
||||||
|
msg = (
|
||||||
|
"CUDF is unavailable, fallback the inference on the CPUs"
|
||||||
|
if run_on_gpu
|
||||||
|
else "Do the inference on the CPUs"
|
||||||
|
)
|
||||||
|
get_logger("XGBoost-PySpark").info(msg)
|
||||||
|
|
||||||
|
def to_gpu_if_possible(data: ArrayLike) -> ArrayLike:
|
||||||
|
"""Move the data to gpu if possible"""
|
||||||
|
if dev_ordinal >= 0:
|
||||||
|
import cudf # pylint: disable=import-error
|
||||||
|
import cupy as cp # pylint: disable=import-error
|
||||||
|
|
||||||
|
# We must set the device after import cudf, which will change the device id to 0
|
||||||
|
# See https://github.com/rapidsai/cudf/issues/11386
|
||||||
|
cp.cuda.runtime.setDevice(dev_ordinal) # pylint: disable=I1101
|
||||||
|
df = cudf.DataFrame(data)
|
||||||
|
del data
|
||||||
|
return df
|
||||||
|
return data
|
||||||
|
|
||||||
for data in iterator:
|
for data in iterator:
|
||||||
if enable_sparse_data_optim:
|
if enable_sparse_data_optim:
|
||||||
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
|
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
|
||||||
else:
|
else:
|
||||||
if feature_col_names is not None:
|
if feature_col_names is not None:
|
||||||
X = data[feature_col_names]
|
tmp = data[feature_col_names]
|
||||||
else:
|
else:
|
||||||
X = stack_series(data[alias.data])
|
tmp = stack_series(data[alias.data])
|
||||||
|
X = to_gpu_if_possible(tmp)
|
||||||
|
|
||||||
if has_base_margin:
|
if has_base_margin:
|
||||||
base_margin = data[alias.margin].to_numpy()
|
base_margin = to_gpu_if_possible(data[alias.margin])
|
||||||
else:
|
else:
|
||||||
base_margin = None
|
base_margin = None
|
||||||
|
|
||||||
data = {}
|
yield predict_func(model, X, base_margin)
|
||||||
preds = model.predict(
|
|
||||||
X,
|
|
||||||
base_margin=base_margin,
|
|
||||||
validate_features=False,
|
|
||||||
**predict_params,
|
|
||||||
)
|
|
||||||
data[pred.prediction] = pd.Series(preds)
|
|
||||||
|
|
||||||
if pred_contrib_col_name:
|
|
||||||
contribs = pred_contribs(model, X, base_margin)
|
|
||||||
data[pred.pred_contrib] = pd.Series(list(contribs))
|
|
||||||
yield pd.DataFrame(data=data)
|
|
||||||
else:
|
|
||||||
yield data[pred.prediction]
|
|
||||||
|
|
||||||
if has_base_margin:
|
if has_base_margin:
|
||||||
pred_col = predict_udf(struct(*features_col, base_margin_col))
|
pred_col = predict_udf(struct(*features_col, base_margin_col))
|
||||||
else:
|
else:
|
||||||
pred_col = predict_udf(struct(*features_col))
|
pred_col = predict_udf(struct(*features_col))
|
||||||
|
|
||||||
prediction_col_name = self.getOrDefault(self.predictionCol)
|
return self._post_transform(dataset, pred_col)
|
||||||
|
|
||||||
if single_pred:
|
|
||||||
dataset = dataset.withColumn(prediction_col_name, pred_col)
|
|
||||||
else:
|
|
||||||
pred_struct_col = "_prediction_struct"
|
|
||||||
dataset = dataset.withColumn(pred_struct_col, pred_col)
|
|
||||||
|
|
||||||
dataset = dataset.withColumn(
|
|
||||||
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
|
|
||||||
)
|
|
||||||
|
|
||||||
if pred_contrib_col_name:
|
|
||||||
dataset = dataset.withColumn(
|
|
||||||
pred_contrib_col_name,
|
|
||||||
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
|
|
||||||
)
|
|
||||||
|
|
||||||
dataset = dataset.drop(pred_struct_col)
|
|
||||||
|
|
||||||
return dataset
|
|
||||||
|
|
||||||
|
|
||||||
class _ClassificationModel( # pylint: disable=abstract-method
|
class _ClassificationModel( # pylint: disable=abstract-method
|
||||||
@ -1221,22 +1459,21 @@ class _ClassificationModel( # pylint: disable=abstract-method
|
|||||||
.. Note:: This API is experimental.
|
.. Note:: This API is experimental.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _transform(self, dataset: DataFrame) -> DataFrame:
|
def _out_schema(self) -> Tuple[bool, str]:
|
||||||
# pylint: disable=too-many-statements, too-many-locals
|
schema = (
|
||||||
# Save xgb_sklearn_model and predict_params to be local variable
|
f"{pred.raw_prediction} array<double>, {pred.prediction} double,"
|
||||||
# to avoid the `self` object to be pickled to remote.
|
f" {pred.probability} array<double>"
|
||||||
xgb_sklearn_model = self._xgb_sklearn_model
|
|
||||||
predict_params = self._gen_predict_params_dict()
|
|
||||||
|
|
||||||
has_base_margin = False
|
|
||||||
if (
|
|
||||||
self.isDefined(self.base_margin_col)
|
|
||||||
and self.getOrDefault(self.base_margin_col) != ""
|
|
||||||
):
|
|
||||||
has_base_margin = True
|
|
||||||
base_margin_col = col(self.getOrDefault(self.base_margin_col)).alias(
|
|
||||||
alias.margin
|
|
||||||
)
|
)
|
||||||
|
if self._get_pred_contrib_col_name() is not None:
|
||||||
|
# We will force setting strict_shape to True when predicting contribs,
|
||||||
|
# So, it will also output 3-D shape result.
|
||||||
|
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
|
||||||
|
|
||||||
|
return False, schema
|
||||||
|
|
||||||
|
def _get_predict_func(self) -> Callable:
|
||||||
|
predict_params = self._gen_predict_params_dict()
|
||||||
|
pred_contrib_col_name = self._get_pred_contrib_col_name()
|
||||||
|
|
||||||
def transform_margin(margins: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
def transform_margin(margins: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||||
if margins.ndim == 1:
|
if margins.ndim == 1:
|
||||||
@ -1251,45 +1488,9 @@ class _ClassificationModel( # pylint: disable=abstract-method
|
|||||||
class_probs = softmax(raw_preds, axis=1)
|
class_probs = softmax(raw_preds, axis=1)
|
||||||
return raw_preds, class_probs
|
return raw_preds, class_probs
|
||||||
|
|
||||||
features_col, feature_col_names = self._get_feature_col(dataset)
|
def _predict(
|
||||||
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
|
model: XGBModel, X: ArrayLike, base_margin: Optional[np.ndarray]
|
||||||
|
) -> Union[pd.DataFrame, pd.Series]:
|
||||||
pred_contrib_col_name = None
|
|
||||||
if (
|
|
||||||
self.isDefined(self.pred_contrib_col)
|
|
||||||
and self.getOrDefault(self.pred_contrib_col) != ""
|
|
||||||
):
|
|
||||||
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
|
|
||||||
|
|
||||||
schema = (
|
|
||||||
f"{pred.raw_prediction} array<double>, {pred.prediction} double,"
|
|
||||||
f" {pred.probability} array<double>"
|
|
||||||
)
|
|
||||||
if pred_contrib_col_name:
|
|
||||||
# We will force setting strict_shape to True when predicting contribs,
|
|
||||||
# So, it will also output 3-D shape result.
|
|
||||||
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
|
|
||||||
|
|
||||||
@pandas_udf(schema) # type: ignore
|
|
||||||
def predict_udf(
|
|
||||||
iterator: Iterator[Tuple[pd.Series, ...]]
|
|
||||||
) -> Iterator[pd.DataFrame]:
|
|
||||||
assert xgb_sklearn_model is not None
|
|
||||||
model = xgb_sklearn_model
|
|
||||||
for data in iterator:
|
|
||||||
if enable_sparse_data_optim:
|
|
||||||
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
|
|
||||||
else:
|
|
||||||
if feature_col_names is not None:
|
|
||||||
X = data[feature_col_names] # type: ignore
|
|
||||||
else:
|
|
||||||
X = stack_series(data[alias.data])
|
|
||||||
|
|
||||||
if has_base_margin:
|
|
||||||
base_margin = stack_series(data[alias.margin])
|
|
||||||
else:
|
|
||||||
base_margin = None
|
|
||||||
|
|
||||||
margins = model.predict(
|
margins = model.predict(
|
||||||
X,
|
X,
|
||||||
base_margin=base_margin,
|
base_margin=base_margin,
|
||||||
@ -1308,19 +1509,17 @@ class _ClassificationModel( # pylint: disable=abstract-method
|
|||||||
pred.probability: pd.Series(list(class_probs)),
|
pred.probability: pd.Series(list(class_probs)),
|
||||||
}
|
}
|
||||||
|
|
||||||
if pred_contrib_col_name:
|
if pred_contrib_col_name is not None:
|
||||||
contribs = pred_contribs(model, X, base_margin, strict_shape=True)
|
contribs = pred_contribs(model, X, base_margin, strict_shape=True)
|
||||||
result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))
|
result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))
|
||||||
|
|
||||||
yield pd.DataFrame(data=result)
|
return pd.DataFrame(data=result)
|
||||||
|
|
||||||
if has_base_margin:
|
return _predict
|
||||||
pred_struct = predict_udf(struct(*features_col, base_margin_col))
|
|
||||||
else:
|
|
||||||
pred_struct = predict_udf(struct(*features_col))
|
|
||||||
|
|
||||||
|
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
|
||||||
pred_struct_col = "_prediction_struct"
|
pred_struct_col = "_prediction_struct"
|
||||||
dataset = dataset.withColumn(pred_struct_col, pred_struct)
|
dataset = dataset.withColumn(pred_struct_col, pred_col)
|
||||||
|
|
||||||
raw_prediction_col_name = self.getOrDefault(self.rawPredictionCol)
|
raw_prediction_col_name = self.getOrDefault(self.rawPredictionCol)
|
||||||
if raw_prediction_col_name:
|
if raw_prediction_col_name:
|
||||||
@ -1342,7 +1541,8 @@ class _ClassificationModel( # pylint: disable=abstract-method
|
|||||||
array_to_vector(getattr(col(pred_struct_col), pred.probability)),
|
array_to_vector(getattr(col(pred_struct_col), pred.probability)),
|
||||||
)
|
)
|
||||||
|
|
||||||
if pred_contrib_col_name:
|
pred_contrib_col_name = self._get_pred_contrib_col_name()
|
||||||
|
if pred_contrib_col_name is not None:
|
||||||
dataset = dataset.withColumn(
|
dataset = dataset.withColumn(
|
||||||
pred_contrib_col_name,
|
pred_contrib_col_name,
|
||||||
getattr(col(pred_struct_col), pred.pred_contrib),
|
getattr(col(pred_struct_col), pred.pred_contrib),
|
||||||
|
|||||||
@ -10,7 +10,7 @@ from threading import Thread
|
|||||||
from typing import Any, Callable, Dict, Optional, Set, Type
|
from typing import Any, Callable, Dict, Optional, Set, Type
|
||||||
|
|
||||||
import pyspark
|
import pyspark
|
||||||
from pyspark import BarrierTaskContext, SparkContext, SparkFiles
|
from pyspark import BarrierTaskContext, SparkContext, SparkFiles, TaskContext
|
||||||
from pyspark.sql.session import SparkSession
|
from pyspark.sql.session import SparkSession
|
||||||
|
|
||||||
from xgboost import Booster, XGBModel, collective
|
from xgboost import Booster, XGBModel, collective
|
||||||
@ -129,7 +129,14 @@ def _is_local(spark_context: SparkContext) -> bool:
|
|||||||
return spark_context._jsc.sc().isLocal()
|
return spark_context._jsc.sc().isLocal()
|
||||||
|
|
||||||
|
|
||||||
def _get_gpu_id(task_context: BarrierTaskContext) -> int:
|
def _is_standalone_or_localcluster(spark_context: SparkContext) -> bool:
|
||||||
|
master = spark_context.getConf().get("spark.master")
|
||||||
|
return master is not None and (
|
||||||
|
master.startswith("spark://") or master.startswith("local-cluster")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_gpu_id(task_context: TaskContext) -> int:
|
||||||
"""Get the gpu id from the task resources"""
|
"""Get the gpu id from the task resources"""
|
||||||
if task_context is None:
|
if task_context is None:
|
||||||
# This is a safety check.
|
# This is a safety check.
|
||||||
|
|||||||
@ -75,3 +75,28 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
|
|||||||
|
|
||||||
with pytest.raises(ValueError, match="Either `group` or `qid`."):
|
with pytest.raises(ValueError, match="Either `group` or `qid`."):
|
||||||
ranker.fit(df, y, eval_set=[(X, y)])
|
ranker.fit(df, y, eval_set=[(X, y)])
|
||||||
|
|
||||||
|
|
||||||
|
def run_ranking_categorical(device: str) -> None:
|
||||||
|
"""Test LTR with categorical features."""
|
||||||
|
from sklearn.model_selection import cross_val_score
|
||||||
|
|
||||||
|
X, y = tm.make_categorical(
|
||||||
|
n_samples=512, n_features=10, n_categories=3, onehot=False
|
||||||
|
)
|
||||||
|
rng = np.random.default_rng(1994)
|
||||||
|
qid = rng.choice(3, size=y.shape[0])
|
||||||
|
qid = np.sort(qid)
|
||||||
|
X["qid"] = qid
|
||||||
|
|
||||||
|
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
|
||||||
|
ltr.fit(X, y)
|
||||||
|
score = ltr.score(X, y)
|
||||||
|
assert score > 0.9
|
||||||
|
|
||||||
|
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
|
||||||
|
|
||||||
|
# test using the score function inside sklearn.
|
||||||
|
scores = cross_val_score(ltr, X, y)
|
||||||
|
for s in scores:
|
||||||
|
assert s > 0.7
|
||||||
|
|||||||
1
rocgputreeshap
Submodule
1
rocgputreeshap
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 2fea6734e83cf147c1bbe580ac4713cd50abcad5
|
||||||
@ -16,6 +16,11 @@ if (USE_CUDA)
|
|||||||
target_sources(objxgboost PRIVATE ${CUDA_SOURCES})
|
target_sources(objxgboost PRIVATE ${CUDA_SOURCES})
|
||||||
endif (USE_CUDA)
|
endif (USE_CUDA)
|
||||||
|
|
||||||
|
if (USE_HIP)
|
||||||
|
file(GLOB_RECURSE HIP_SOURCES *.hip *.hip.h)
|
||||||
|
target_sources(objxgboost PRIVATE ${HIP_SOURCES})
|
||||||
|
endif (USE_HIP)
|
||||||
|
|
||||||
target_include_directories(objxgboost
|
target_include_directories(objxgboost
|
||||||
PRIVATE
|
PRIVATE
|
||||||
${xgboost_SOURCE_DIR}/include
|
${xgboost_SOURCE_DIR}/include
|
||||||
@ -33,6 +38,7 @@ msvc_use_static_runtime()
|
|||||||
|
|
||||||
# This grouping organises source files nicely in visual studio
|
# This grouping organises source files nicely in visual studio
|
||||||
auto_source_group("${CUDA_SOURCES}")
|
auto_source_group("${CUDA_SOURCES}")
|
||||||
|
auto_source_group("${HIP_SOURCES}")
|
||||||
auto_source_group("${CPU_SOURCES}")
|
auto_source_group("${CPU_SOURCES}")
|
||||||
|
|
||||||
#-- End object library
|
#-- End object library
|
||||||
|
|||||||
@ -70,12 +70,14 @@ XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) {
|
|||||||
|
|
||||||
using GlobalConfigAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;
|
using GlobalConfigAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
void XGBBuildInfoDevice(Json *p_info) {
|
void XGBBuildInfoDevice(Json *p_info) {
|
||||||
auto &info = *p_info;
|
auto &info = *p_info;
|
||||||
info["USE_CUDA"] = Boolean{false};
|
info["USE_CUDA"] = Boolean{false};
|
||||||
info["USE_NCCL"] = Boolean{false};
|
info["USE_NCCL"] = Boolean{false};
|
||||||
|
info["USE_HIP"] = Boolean{false};
|
||||||
|
info["USE_RCCL"] = Boolean{false};
|
||||||
info["USE_RMM"] = Boolean{false};
|
info["USE_RMM"] = Boolean{false};
|
||||||
}
|
}
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
@ -279,7 +281,7 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
|
|||||||
API_END();
|
API_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef XGBOOST_USE_CUDA
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {
|
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {
|
||||||
API_BEGIN();
|
API_BEGIN();
|
||||||
common::AssertGPUSupport();
|
common::AssertGPUSupport();
|
||||||
@ -1189,7 +1191,7 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
|
|||||||
API_END();
|
API_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
|
XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
|
||||||
DMatrixHandle, xgboost::bst_ulong const **,
|
DMatrixHandle, xgboost::bst_ulong const **,
|
||||||
xgboost::bst_ulong *, const float **) {
|
xgboost::bst_ulong *, const float **) {
|
||||||
|
|||||||
@ -17,7 +17,11 @@ namespace xgboost {
|
|||||||
void XGBBuildInfoDevice(Json *p_info) {
|
void XGBBuildInfoDevice(Json *p_info) {
|
||||||
auto &info = *p_info;
|
auto &info = *p_info;
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
info["USE_CUDA"] = true;
|
info["USE_CUDA"] = true;
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
info["USE_HIP"] = true;
|
||||||
|
#endif
|
||||||
|
|
||||||
std::vector<Json> v{Json{Integer{THRUST_MAJOR_VERSION}}, Json{Integer{THRUST_MINOR_VERSION}},
|
std::vector<Json> v{Json{Integer{THRUST_MAJOR_VERSION}}, Json{Integer{THRUST_MINOR_VERSION}},
|
||||||
Json{Integer{THRUST_SUBMINOR_VERSION}}};
|
Json{Integer{THRUST_SUBMINOR_VERSION}}};
|
||||||
@ -30,8 +34,13 @@ void XGBBuildInfoDevice(Json *p_info) {
|
|||||||
info["USE_NCCL"] = Boolean{true};
|
info["USE_NCCL"] = Boolean{true};
|
||||||
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
|
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
|
||||||
info["NCCL_VERSION"] = v;
|
info["NCCL_VERSION"] = v;
|
||||||
|
#elif defined(XGBOOST_USE_RCCL)
|
||||||
|
info["USE_RCCL"] = Boolean{true};
|
||||||
|
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
|
||||||
|
info["RCCL_VERSION"] = v;
|
||||||
#else
|
#else
|
||||||
info["USE_NCCL"] = Boolean{false};
|
info["USE_NCCL"] = Boolean{false};
|
||||||
|
info["USE_RCCL"] = Boolean{false};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(XGBOOST_USE_RMM)
|
#if defined(XGBOOST_USE_RMM)
|
||||||
|
|||||||
4
src/c_api/c_api.hip
Normal file
4
src/c_api/c_api.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "c_api.cu"
|
||||||
|
#endif
|
||||||
@ -175,7 +175,7 @@ inline float GetMissing(Json const &config) {
|
|||||||
|
|
||||||
// Safe guard some global variables from being changed by XGBoost.
|
// Safe guard some global variables from being changed by XGBoost.
|
||||||
class XGBoostAPIGuard {
|
class XGBoostAPIGuard {
|
||||||
#if defined(XGBOOST_USE_CUDA)
|
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||||
int32_t device_id_ {0};
|
int32_t device_id_ {0};
|
||||||
|
|
||||||
void SetGPUAttribute();
|
void SetGPUAttribute();
|
||||||
|
|||||||
@ -51,7 +51,7 @@ void Communicator::Init(Json const& config) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef XGBOOST_USE_CUDA
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
void Communicator::Finalize() {
|
void Communicator::Finalize() {
|
||||||
communicator_->Shutdown();
|
communicator_->Shutdown();
|
||||||
communicator_.reset(new NoOpCommunicator());
|
communicator_.reset(new NoOpCommunicator());
|
||||||
|
|||||||
@ -5,7 +5,7 @@
|
|||||||
#include "device_communicator.cuh"
|
#include "device_communicator.cuh"
|
||||||
#include "device_communicator_adapter.cuh"
|
#include "device_communicator_adapter.cuh"
|
||||||
#include "noop_communicator.h"
|
#include "noop_communicator.h"
|
||||||
#ifdef XGBOOST_USE_NCCL
|
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
|
||||||
#include "nccl_device_communicator.cuh"
|
#include "nccl_device_communicator.cuh"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -28,7 +28,7 @@ DeviceCommunicator* Communicator::GetDevice(int device_ordinal) {
|
|||||||
communicator_->GetWorldSize() != old_world_size) {
|
communicator_->GetWorldSize() != old_world_size) {
|
||||||
old_device_ordinal = device_ordinal;
|
old_device_ordinal = device_ordinal;
|
||||||
old_world_size = communicator_->GetWorldSize();
|
old_world_size = communicator_->GetWorldSize();
|
||||||
#ifdef XGBOOST_USE_NCCL
|
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
|
||||||
switch (type_) {
|
switch (type_) {
|
||||||
case CommunicatorType::kRabit:
|
case CommunicatorType::kRabit:
|
||||||
device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, false));
|
device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, false));
|
||||||
|
|||||||
@ -98,7 +98,7 @@ class Communicator {
|
|||||||
/** @brief Get the communicator instance. */
|
/** @brief Get the communicator instance. */
|
||||||
static Communicator *Get() { return communicator_.get(); }
|
static Communicator *Get() { return communicator_.get(); }
|
||||||
|
|
||||||
#if defined(XGBOOST_USE_CUDA)
|
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||||
/**
|
/**
|
||||||
* @brief Get the device communicator.
|
* @brief Get the device communicator.
|
||||||
*
|
*
|
||||||
@ -230,7 +230,7 @@ class Communicator {
|
|||||||
|
|
||||||
static thread_local std::unique_ptr<Communicator> communicator_;
|
static thread_local std::unique_ptr<Communicator> communicator_;
|
||||||
static thread_local CommunicatorType type_;
|
static thread_local CommunicatorType type_;
|
||||||
#if defined(XGBOOST_USE_CUDA)
|
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||||
static thread_local std::unique_ptr<DeviceCommunicator> device_communicator_;
|
static thread_local std::unique_ptr<DeviceCommunicator> device_communicator_;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
4
src/collective/communicator.hip
Normal file
4
src/collective/communicator.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "communicator.cu"
|
||||||
|
#endif
|
||||||
@ -41,7 +41,6 @@ class DeviceCommunicatorAdapter : public DeviceCommunicator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||||
|
|
||||||
segments->clear();
|
segments->clear();
|
||||||
segments->resize(world_size_, 0);
|
segments->resize(world_size_, 0);
|
||||||
segments->at(rank_) = length_bytes;
|
segments->at(rank_) = length_bytes;
|
||||||
@ -60,6 +59,7 @@ class DeviceCommunicatorAdapter : public DeviceCommunicator {
|
|||||||
Broadcast(host_buffer_.data() + offset, as_bytes, i);
|
Broadcast(host_buffer_.data() + offset, as_bytes, i);
|
||||||
offset += as_bytes;
|
offset += as_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
dh::safe_cuda(cudaMemcpy(receive_buffer->data().get(), host_buffer_.data(), total_bytes,
|
dh::safe_cuda(cudaMemcpy(receive_buffer->data().get(), host_buffer_.data(), total_bytes,
|
||||||
cudaMemcpyDefault));
|
cudaMemcpyDefault));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
/*!
|
/*!
|
||||||
* Copyright 2023 XGBoost contributors
|
* Copyright 2023 XGBoost contributors
|
||||||
*/
|
*/
|
||||||
#if defined(XGBOOST_USE_NCCL)
|
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
|
||||||
#include "nccl_device_communicator.cuh"
|
#include "nccl_device_communicator.cuh"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|||||||
@ -35,12 +35,22 @@ class NcclDeviceCommunicator : public DeviceCommunicator {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr std::size_t kUuidLength =
|
static constexpr std::size_t kUuidLength =
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
|
sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
sizeof(hipUUID) / sizeof(uint64_t);
|
||||||
|
#endif
|
||||||
|
|
||||||
void GetCudaUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) const {
|
void GetCudaUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) const {
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
cudaDeviceProp prob{};
|
cudaDeviceProp prob{};
|
||||||
dh::safe_cuda(cudaGetDeviceProperties(&prob, device_ordinal_));
|
dh::safe_cuda(cudaGetDeviceProperties(&prob, device_ordinal_));
|
||||||
std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
|
std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
hipUUID id;
|
||||||
|
hipDeviceGetUuid(&id, device_ordinal_);
|
||||||
|
std::memcpy(uuid.data(), static_cast<void *>(&id), sizeof(id));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) {
|
static std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) {
|
||||||
|
|||||||
4
src/collective/nccl_device_communicator.hip
Normal file
4
src/collective/nccl_device_communicator.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "nccl_device_communicator.cu"
|
||||||
|
#endif
|
||||||
@ -10,7 +10,13 @@
|
|||||||
|
|
||||||
#include <cstddef> // size_t
|
#include <cstddef> // size_t
|
||||||
#include <cstdint> // int32_t
|
#include <cstdint> // int32_t
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include <hipcub/hipcub.hpp>
|
||||||
|
#elif defined(XGBOOST_USE_CUDA)
|
||||||
#include <cub/cub.cuh> // DispatchSegmentedRadixSort,NullType,DoubleBuffer
|
#include <cub/cub.cuh> // DispatchSegmentedRadixSort,NullType,DoubleBuffer
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <iterator> // distance
|
#include <iterator> // distance
|
||||||
#include <limits> // numeric_limits
|
#include <limits> // numeric_limits
|
||||||
#include <type_traits> // conditional_t,remove_const_t
|
#include <type_traits> // conditional_t,remove_const_t
|
||||||
@ -39,6 +45,7 @@ static void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_st
|
|||||||
using OffsetT = int;
|
using OffsetT = int;
|
||||||
|
|
||||||
// Null value type
|
// Null value type
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
|
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
|
||||||
cub::DoubleBuffer<cub::NullType> d_values;
|
cub::DoubleBuffer<cub::NullType> d_values;
|
||||||
|
|
||||||
@ -47,6 +54,20 @@ static void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_st
|
|||||||
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,
|
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,
|
||||||
num_segments, d_begin_offsets, d_end_offsets, begin_bit,
|
num_segments, d_begin_offsets, d_end_offsets, begin_bit,
|
||||||
end_bit, false, ctx->Stream(), debug_synchronous)));
|
end_bit, false, ctx->Stream(), debug_synchronous)));
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
if (IS_DESCENDING) {
|
||||||
|
rocprim::segmented_radix_sort_pairs_desc<KeyT, hipcub::NullType, BeginOffsetIteratorT>(d_temp_storage,
|
||||||
|
temp_storage_bytes, d_keys_in, d_keys_out, nullptr, nullptr, num_items,
|
||||||
|
num_segments, d_begin_offsets, d_end_offsets,
|
||||||
|
begin_bit, end_bit, ctx->Stream(), debug_synchronous);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rocprim::segmented_radix_sort_pairs<KeyT, hipcub::NullType, BeginOffsetIteratorT>(d_temp_storage,
|
||||||
|
temp_storage_bytes, d_keys_in, d_keys_out, nullptr, nullptr, num_items,
|
||||||
|
num_segments, d_begin_offsets, d_end_offsets,
|
||||||
|
begin_bit, end_bit, ctx->Stream(), debug_synchronous);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrapper around cub sort for easier `descending` sort.
|
// Wrapper around cub sort for easier `descending` sort.
|
||||||
@ -60,14 +81,18 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
|
|||||||
BeginOffsetIteratorT d_begin_offsets,
|
BeginOffsetIteratorT d_begin_offsets,
|
||||||
EndOffsetIteratorT d_end_offsets, dh::CUDAStreamView stream,
|
EndOffsetIteratorT d_end_offsets, dh::CUDAStreamView stream,
|
||||||
int begin_bit = 0, int end_bit = sizeof(KeyT) * 8) {
|
int begin_bit = 0, int end_bit = sizeof(KeyT) * 8) {
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
|
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
|
||||||
cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in), d_values_out);
|
cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in), d_values_out);
|
||||||
|
#endif
|
||||||
|
|
||||||
// In old version of cub, num_items in dispatch is also int32_t, no way to change.
|
// In old version of cub, num_items in dispatch is also int32_t, no way to change.
|
||||||
using OffsetT = std::conditional_t<dh::BuildWithCUDACub() && dh::HasThrustMinorVer<13>(),
|
using OffsetT = std::conditional_t<dh::BuildWithCUDACub() && dh::HasThrustMinorVer<13>(),
|
||||||
std::size_t, std::int32_t>;
|
std::size_t, std::int32_t>;
|
||||||
CHECK_LE(num_items, std::numeric_limits<OffsetT>::max());
|
CHECK_LE(num_items, std::numeric_limits<OffsetT>::max());
|
||||||
// For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation
|
// For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
#if THRUST_MAJOR_VERSION >= 2
|
#if THRUST_MAJOR_VERSION >= 2
|
||||||
dh::safe_cuda((cub::DispatchSegmentedRadixSort<
|
dh::safe_cuda((cub::DispatchSegmentedRadixSort<
|
||||||
descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
|
descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
|
||||||
@ -88,6 +113,18 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
|
|||||||
d_begin_offsets, d_end_offsets, begin_bit,
|
d_begin_offsets, d_end_offsets, begin_bit,
|
||||||
end_bit, false, stream, false)));
|
end_bit, false, stream, false)));
|
||||||
#endif
|
#endif
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
if (descending) {
|
||||||
|
rocprim::segmented_radix_sort_pairs_desc(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out,
|
||||||
|
d_values_in, d_values_out, num_items, num_segments,
|
||||||
|
d_begin_offsets, d_end_offsets, begin_bit, end_bit, stream, false);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rocprim::segmented_radix_sort_pairs(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out,
|
||||||
|
d_values_in, d_values_out, num_items, num_segments, d_begin_offsets, d_end_offsets,
|
||||||
|
begin_bit, end_bit, stream, false);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
|
|||||||
@ -17,14 +17,18 @@
|
|||||||
#include <thrust/copy.h>
|
#include <thrust/copy.h>
|
||||||
#include <thrust/device_ptr.h>
|
#include <thrust/device_ptr.h>
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
#endif // defined(__CUDACC__)
|
#elif defined(__HIPCC__)
|
||||||
|
#include <thrust/copy.h>
|
||||||
|
#include <thrust/device_ptr.h>
|
||||||
|
#include "device_helpers.hip.h"
|
||||||
|
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
#include "xgboost/span.h"
|
#include "xgboost/span.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
using BitFieldAtomicType = unsigned long long; // NOLINT
|
using BitFieldAtomicType = unsigned long long; // NOLINT
|
||||||
|
|
||||||
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
|
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
|
||||||
@ -48,7 +52,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
|
|||||||
|
|
||||||
return old;
|
return old;
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief A non-owning type with auxiliary methods defined for manipulating bits.
|
* @brief A non-owning type with auxiliary methods defined for manipulating bits.
|
||||||
@ -106,7 +110,7 @@ struct BitFieldContainer {
|
|||||||
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
|
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
|
||||||
return common::DivRoundUp(size, kValueSize);
|
return common::DivRoundUp(size, kValueSize);
|
||||||
}
|
}
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
|
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
|
||||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
size_t min_size = min(NumValues(), rhs.NumValues());
|
size_t min_size = min(NumValues(), rhs.NumValues());
|
||||||
@ -123,9 +127,9 @@ struct BitFieldContainer {
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
#endif // #if defined(__CUDA_ARCH__)
|
#endif // #if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
|
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
|
||||||
size_t min_size = min(NumValues(), rhs.NumValues());
|
size_t min_size = min(NumValues(), rhs.NumValues());
|
||||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -144,7 +148,7 @@ struct BitFieldContainer {
|
|||||||
}
|
}
|
||||||
#endif // defined(__CUDA_ARCH__)
|
#endif // defined(__CUDA_ARCH__)
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
__device__ auto Set(index_type pos) noexcept(true) {
|
__device__ auto Set(index_type pos) noexcept(true) {
|
||||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||||
value_type& value = Data()[pos_v.int_pos];
|
value_type& value = Data()[pos_v.int_pos];
|
||||||
@ -159,6 +163,14 @@ struct BitFieldContainer {
|
|||||||
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
|
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
|
||||||
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
|
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
|
||||||
}
|
}
|
||||||
|
#ifdef __HIPCC__
|
||||||
|
void Clear(index_type pos) noexcept(true) {
|
||||||
|
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||||
|
value_type& value = Data()[pos_v.int_pos];
|
||||||
|
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||||
|
value &= clear_bit;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
void Set(index_type pos) noexcept(true) {
|
void Set(index_type pos) noexcept(true) {
|
||||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||||
@ -172,7 +184,7 @@ struct BitFieldContainer {
|
|||||||
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||||
value &= clear_bit;
|
value &= clear_bit;
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDA_ARCH__)
|
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
|
|
||||||
XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) {
|
XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) {
|
||||||
pos_v = Direction::Shift(pos_v);
|
pos_v = Direction::Shift(pos_v);
|
||||||
|
|||||||
@ -52,7 +52,7 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
|
|||||||
*
|
*
|
||||||
* Go to left if it's NOT the matching category, which matches one-hot encoding.
|
* Go to left if it's NOT the matching category, which matches one-hot encoding.
|
||||||
*/
|
*/
|
||||||
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
|
inline XGBOOST_DEVICE bool Decision(common::Span<CatBitField::value_type const> cats, float cat) {
|
||||||
KCatBitField const s_cats(cats);
|
KCatBitField const s_cats(cats);
|
||||||
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
|
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -55,7 +55,7 @@ void EscapeU8(std::string const &string, std::string *p_buffer) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
int AllVisibleGPUs() { return 0; }
|
int AllVisibleGPUs() { return 0; }
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA)
|
||||||
|
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
* Copyright 2018-2022 XGBoost contributors
|
* Copyright 2018-2022 XGBoost contributors
|
||||||
*/
|
*/
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "cuda_to_hip.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
|
|||||||
@ -26,6 +26,12 @@
|
|||||||
|
|
||||||
#define WITH_CUDA() true
|
#define WITH_CUDA() true
|
||||||
|
|
||||||
|
#elif defined(__HIPCC__)
|
||||||
|
#include <thrust/system/hip/error.h>
|
||||||
|
#include <thrust/system_error.h>
|
||||||
|
|
||||||
|
#define WITH_CUDA() true
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define WITH_CUDA() false
|
#define WITH_CUDA() false
|
||||||
@ -39,8 +45,8 @@ namespace dh {
|
|||||||
*/
|
*/
|
||||||
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
|
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
|
||||||
|
|
||||||
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
|
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, int line)
|
||||||
int line) {
|
{
|
||||||
if (code != cudaSuccess) {
|
if (code != cudaSuccess) {
|
||||||
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
|
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
|
||||||
std::string{file} + ": " + // NOLINT
|
std::string{file} + ": " + // NOLINT
|
||||||
@ -48,7 +54,23 @@ inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
|
|||||||
}
|
}
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDACC__)
|
|
||||||
|
#elif defined(__HIPCC__)
|
||||||
|
/*
|
||||||
|
* Error handling functions
|
||||||
|
*/
|
||||||
|
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
|
||||||
|
|
||||||
|
inline hipError_t ThrowOnCudaError(hipError_t code, const char *file, int line)
|
||||||
|
{
|
||||||
|
if (code != hipSuccess) {
|
||||||
|
LOG(FATAL) << thrust::system_error(code, thrust::hip_category(),
|
||||||
|
std::string{file} + ": " + // NOLINT
|
||||||
|
std::to_string(line)).what();
|
||||||
|
}
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} // namespace dh
|
} // namespace dh
|
||||||
|
|
||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
@ -159,9 +181,9 @@ class Range {
|
|||||||
int AllVisibleGPUs();
|
int AllVisibleGPUs();
|
||||||
|
|
||||||
inline void AssertGPUSupport() {
|
inline void AssertGPUSupport() {
|
||||||
#ifndef XGBOOST_USE_CUDA
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
||||||
#endif // XGBOOST_USE_CUDA
|
#endif // XGBOOST_USE_CUDA && XGBOOST_USE_HIP
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void AssertOneAPISupport() {
|
inline void AssertOneAPISupport() {
|
||||||
@ -172,7 +194,7 @@ inline void AssertOneAPISupport() {
|
|||||||
|
|
||||||
void SetDevice(std::int32_t device);
|
void SetDevice(std::int32_t device);
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
inline void SetDevice(std::int32_t device) {
|
inline void SetDevice(std::int32_t device) {
|
||||||
if (device >= 0) {
|
if (device >= 0) {
|
||||||
AssertGPUSupport();
|
AssertGPUSupport();
|
||||||
|
|||||||
4
src/common/common.hip
Normal file
4
src/common/common.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "common.cu"
|
||||||
|
#endif
|
||||||
@ -11,9 +11,11 @@
|
|||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#if defined(__CUDACC__)
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
#endif // __CUDACC__
|
#elif defined(__HIPCC__)
|
||||||
|
#include "device_helpers.hip.h"
|
||||||
|
#endif // __CUDACC__ || __HIPCC__
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
@ -105,7 +107,7 @@ class CompressedBufferWriter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
__device__ void AtomicWriteSymbol
|
__device__ void AtomicWriteSymbol
|
||||||
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
|
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
|
||||||
size_t ibit_start = offset * symbol_bits_;
|
size_t ibit_start = offset * symbol_bits_;
|
||||||
@ -119,7 +121,7 @@ class CompressedBufferWriter {
|
|||||||
symbol >>= 8;
|
symbol >>= 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // __CUDACC__
|
#endif // __CUDACC__ || __HIPCC__
|
||||||
|
|
||||||
template <typename IterT>
|
template <typename IterT>
|
||||||
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
|
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
|
||||||
|
|||||||
@ -4,7 +4,6 @@
|
|||||||
#ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_
|
#ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_
|
||||||
#define XGBOOST_COMMON_CUDA_CONTEXT_CUH_
|
#define XGBOOST_COMMON_CUDA_CONTEXT_CUH_
|
||||||
#include <thrust/execution_policy.h>
|
#include <thrust/execution_policy.h>
|
||||||
|
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|||||||
@ -72,11 +72,23 @@ class pinned_allocator {
|
|||||||
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
|
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
|
||||||
|
|
||||||
pointer result(nullptr);
|
pointer result(nullptr);
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
dh::safe_cuda(hipHostMalloc(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
|
||||||
|
#else
|
||||||
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
|
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
|
||||||
|
#endif
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
|
inline void deallocate(pointer p, size_type) {
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
dh::safe_cuda(hipHostFree(p));
|
||||||
|
#else
|
||||||
|
dh::safe_cuda(cudaFreeHost(p));
|
||||||
|
#endif
|
||||||
|
} // NOLINT
|
||||||
|
|
||||||
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT
|
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT
|
||||||
|
|
||||||
|
|||||||
71
src/common/cuda_to_hip.h
Normal file
71
src/common/cuda_to_hip.h
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2017-2023 XGBoost contributors
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
|
||||||
|
#define cudaSuccess hipSuccess
|
||||||
|
#define cudaGetLastError hipGetLastError
|
||||||
|
|
||||||
|
#define cudaStream_t hipStream_t
|
||||||
|
#define cudaStreamCreate hipStreamCreate
|
||||||
|
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
|
||||||
|
#define cudaStreamDestroy hipStreamDestroy
|
||||||
|
#define cudaStreamWaitEvent hipStreamWaitEvent
|
||||||
|
#define cudaStreamSynchronize hipStreamSynchronize
|
||||||
|
#define cudaStreamPerThread hipStreamPerThread
|
||||||
|
#define cudaStreamLegacy hipStreamLegacy
|
||||||
|
|
||||||
|
#define cudaEvent_t hipEvent_t
|
||||||
|
#define cudaEventCreate hipEventCreate
|
||||||
|
#define cudaEventCreateWithFlags hipEventCreateWithFlags
|
||||||
|
#define cudaEventDestroy hipEventDestroy
|
||||||
|
|
||||||
|
#define cudaGetDevice hipGetDevice
|
||||||
|
#define cudaSetDevice hipSetDevice
|
||||||
|
#define cudaGetDeviceCount hipGetDeviceCount
|
||||||
|
#define cudaDeviceSynchronize hipDeviceSynchronize
|
||||||
|
|
||||||
|
#define cudaGetDeviceProperties hipGetDeviceProperties
|
||||||
|
#define cudaDeviceGetAttribute hipDeviceGetAttribute
|
||||||
|
|
||||||
|
#define cudaMallocHost hipMallocHost
|
||||||
|
#define cudaFreeHost hipFreeHost
|
||||||
|
#define cudaMalloc hipMalloc
|
||||||
|
#define cudaFree hipFree
|
||||||
|
|
||||||
|
#define cudaMemcpy hipMemcpy
|
||||||
|
#define cudaMemcpyAsync hipMemcpyAsync
|
||||||
|
#define cudaMemcpyDefault hipMemcpyDefault
|
||||||
|
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
|
||||||
|
#define cudaMemcpyHostToHost hipMemcpyHostToHost
|
||||||
|
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
|
||||||
|
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
|
||||||
|
#define cudaMemsetAsync hipMemsetAsync
|
||||||
|
#define cudaMemset hipMemset
|
||||||
|
|
||||||
|
#define cudaPointerAttributes hipPointerAttribute_t
|
||||||
|
#define cudaPointerGetAttributes hipPointerGetAttributes
|
||||||
|
|
||||||
|
#define cudaMemGetInfo hipMemGetInfo
|
||||||
|
#define cudaFuncSetAttribute hipFuncSetAttribute
|
||||||
|
|
||||||
|
#define cudaDevAttrMultiProcessorCount hipDeviceAttributeMultiprocessorCount
|
||||||
|
#define cudaOccupancyMaxActiveBlocksPerMultiprocessor hipOccupancyMaxActiveBlocksPerMultiprocessor
|
||||||
|
|
||||||
|
namespace thrust {
|
||||||
|
namespace hip {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace thrust {
|
||||||
|
namespace cuda = thrust::hip;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace hipcub {
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace cub = hipcub;
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -2,6 +2,8 @@
|
|||||||
* Copyright 2017-2023 XGBoost contributors
|
* Copyright 2017-2023 XGBoost contributors
|
||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
#include <thrust/binary_search.h> // thrust::upper_bound
|
#include <thrust/binary_search.h> // thrust::upper_bound
|
||||||
#include <thrust/device_malloc_allocator.h>
|
#include <thrust/device_malloc_allocator.h>
|
||||||
#include <thrust/device_ptr.h>
|
#include <thrust/device_ptr.h>
|
||||||
@ -1218,3 +1220,7 @@ class LDGIterator {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace dh
|
} // namespace dh
|
||||||
|
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
#include "device_helpers.hip.h"
|
||||||
|
#endif
|
||||||
|
|||||||
1137
src/common/device_helpers.hip.h
Normal file
1137
src/common/device_helpers.hip.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -3,9 +3,11 @@
|
|||||||
*/
|
*/
|
||||||
#include "error_msg.h"
|
#include "error_msg.h"
|
||||||
|
|
||||||
|
#include <mutex> // for call_once, once_flag
|
||||||
#include <sstream> // for stringstream
|
#include <sstream> // for stringstream
|
||||||
|
|
||||||
#include "../collective/communicator-inl.h" // for GetRank
|
#include "../collective/communicator-inl.h" // for GetRank
|
||||||
|
#include "xgboost/context.h" // for Context
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
|
|
||||||
namespace xgboost::error {
|
namespace xgboost::error {
|
||||||
@ -26,34 +28,43 @@ void WarnDeprecatedGPUHist() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void WarnManualUpdater() {
|
void WarnManualUpdater() {
|
||||||
bool static thread_local logged{false};
|
static std::once_flag flag;
|
||||||
if (logged) {
|
std::call_once(flag, [] {
|
||||||
return;
|
|
||||||
}
|
|
||||||
LOG(WARNING)
|
LOG(WARNING)
|
||||||
<< "You have manually specified the `updater` parameter. The `tree_method` parameter "
|
<< "You have manually specified the `updater` parameter. The `tree_method` parameter "
|
||||||
"will be ignored. Incorrect sequence of updaters will produce undefined "
|
"will be ignored. Incorrect sequence of updaters will produce undefined "
|
||||||
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
|
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
|
||||||
logged = true;
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void WarnDeprecatedGPUId() {
|
void WarnDeprecatedGPUId() {
|
||||||
static thread_local bool logged{false};
|
static std::once_flag flag;
|
||||||
if (logged) {
|
std::call_once(flag, [] {
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
|
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
|
||||||
msg += " E.g. device=cpu/cuda/cuda:0";
|
msg += " E.g. device=cpu/cuda/cuda:0";
|
||||||
LOG(WARNING) << msg;
|
LOG(WARNING) << msg;
|
||||||
logged = true;
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void WarnEmptyDataset() {
|
void WarnEmptyDataset() {
|
||||||
static thread_local bool logged{false};
|
static std::once_flag flag;
|
||||||
if (logged) {
|
std::call_once(flag,
|
||||||
return;
|
[] { LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank(); });
|
||||||
}
|
}
|
||||||
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank();
|
|
||||||
logged = true;
|
void MismatchedDevices(Context const* booster, Context const* data) {
|
||||||
|
static std::once_flag flag;
|
||||||
|
std::call_once(flag, [&] {
|
||||||
|
LOG(WARNING)
|
||||||
|
<< "Falling back to prediction using DMatrix due to mismatched devices. This might "
|
||||||
|
"lead to higher memory usage and slower performance. XGBoost is running on: "
|
||||||
|
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName() << ".\n"
|
||||||
|
<< R"(Potential solutions:
|
||||||
|
- Use a data structure that matches the device ordinal in the booster.
|
||||||
|
- Set the device for booster before call to inplace_predict.
|
||||||
|
|
||||||
|
This warning will only be shown once.
|
||||||
|
)";
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} // namespace xgboost::error
|
} // namespace xgboost::error
|
||||||
|
|||||||
@ -11,6 +11,7 @@
|
|||||||
#include <string> // for string
|
#include <string> // for string
|
||||||
|
|
||||||
#include "xgboost/base.h" // for bst_feature_t
|
#include "xgboost/base.h" // for bst_feature_t
|
||||||
|
#include "xgboost/context.h" // for Context
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
#include "xgboost/string_view.h" // for StringView
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
@ -94,5 +95,7 @@ constexpr StringView InvalidCUDAOrdinal() {
|
|||||||
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
|
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
|
||||||
"available for using GPU.";
|
"available for using GPU.";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MismatchedDevices(Context const* booster, Context const* data);
|
||||||
} // namespace xgboost::error
|
} // namespace xgboost::error
|
||||||
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
||||||
|
|||||||
@ -84,10 +84,19 @@ __global__ void GetColumnSizeSharedMemKernel(IterSpan<BatchIt> batch_iter,
|
|||||||
template <std::uint32_t kBlockThreads, typename Kernel>
|
template <std::uint32_t kBlockThreads, typename Kernel>
|
||||||
std::uint32_t EstimateGridSize(std::int32_t device, Kernel kernel, std::size_t shared_mem) {
|
std::uint32_t EstimateGridSize(std::int32_t device, Kernel kernel, std::size_t shared_mem) {
|
||||||
int n_mps = 0;
|
int n_mps = 0;
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device));
|
dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device));
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
dh::safe_cuda(hipDeviceGetAttribute(&n_mps, hipDeviceAttributeMultiprocessorCount, device));
|
||||||
|
#endif
|
||||||
int n_blocks_per_mp = 0;
|
int n_blocks_per_mp = 0;
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
|
dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
|
||||||
kBlockThreads, shared_mem));
|
kBlockThreads, shared_mem));
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
dh::safe_cuda(hipOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
|
||||||
|
kBlockThreads, shared_mem));
|
||||||
|
#endif
|
||||||
std::uint32_t grid_size = n_blocks_per_mp * n_mps;
|
std::uint32_t grid_size = n_blocks_per_mp * n_mps;
|
||||||
return grid_size;
|
return grid_size;
|
||||||
}
|
}
|
||||||
@ -170,6 +179,7 @@ void GetColumnSizesScan(int device, size_t num_columns, std::size_t num_cuts_per
|
|||||||
column_sizes_scan->begin(), [=] __device__(size_t column_size) {
|
column_sizes_scan->begin(), [=] __device__(size_t column_size) {
|
||||||
return thrust::min(num_cuts_per_feature, column_size);
|
return thrust::min(num_cuts_per_feature, column_size);
|
||||||
});
|
});
|
||||||
|
|
||||||
thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
|
thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
|
||||||
cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer());
|
cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer());
|
||||||
thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(),
|
thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(),
|
||||||
@ -294,6 +304,7 @@ void ProcessSlidingWindow(AdapterBatch const &batch, MetaInfo const &info,
|
|||||||
&column_sizes_scan,
|
&column_sizes_scan,
|
||||||
&sorted_entries);
|
&sorted_entries);
|
||||||
dh::XGBDeviceAllocator<char> alloc;
|
dh::XGBDeviceAllocator<char> alloc;
|
||||||
|
|
||||||
thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
|
thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
|
||||||
sorted_entries.end(), detail::EntryCompareOp());
|
sorted_entries.end(), detail::EntryCompareOp());
|
||||||
|
|
||||||
@ -353,11 +364,13 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
|
|||||||
bst_group_t group_idx = dh::SegmentId(d_group_ptr, ridx);
|
bst_group_t group_idx = dh::SegmentId(d_group_ptr, ridx);
|
||||||
return weights[group_idx];
|
return weights[group_idx];
|
||||||
});
|
});
|
||||||
|
|
||||||
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
|
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
|
||||||
weight_iter + begin, weight_iter + end,
|
weight_iter + begin, weight_iter + end,
|
||||||
batch_iter + begin,
|
batch_iter + begin,
|
||||||
d_temp_weights.data(), // output
|
d_temp_weights.data(), // output
|
||||||
is_valid);
|
is_valid);
|
||||||
|
|
||||||
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
|
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
|
||||||
} else {
|
} else {
|
||||||
CHECK_EQ(batch.NumRows(), weights.size());
|
CHECK_EQ(batch.NumRows(), weights.size());
|
||||||
@ -366,11 +379,13 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
|
|||||||
[=]__device__(size_t idx) -> float {
|
[=]__device__(size_t idx) -> float {
|
||||||
return weights[batch.GetElement(idx).row_idx];
|
return weights[batch.GetElement(idx).row_idx];
|
||||||
});
|
});
|
||||||
|
|
||||||
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
|
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
|
||||||
weight_iter + begin, weight_iter + end,
|
weight_iter + begin, weight_iter + end,
|
||||||
batch_iter + begin,
|
batch_iter + begin,
|
||||||
d_temp_weights.data(), // output
|
d_temp_weights.data(), // output
|
||||||
is_valid);
|
is_valid);
|
||||||
|
|
||||||
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
|
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
4
src/common/hist_util.hip
Normal file
4
src/common/hist_util.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "hist_util.cu"
|
||||||
|
#endif
|
||||||
@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2017-2023 by XGBoost contributors
|
* Copyright 2017-2023 by XGBoost contributors
|
||||||
*/
|
*/
|
||||||
#ifndef XGBOOST_USE_CUDA
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
|
|
||||||
// dummy implementation of HostDeviceVector in case CUDA is not used
|
// dummy implementation of HostDeviceVector in case CUDA is not used
|
||||||
|
|
||||||
@ -199,4 +199,4 @@ template class HostDeviceVector<std::size_t>;
|
|||||||
|
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|
||||||
#endif // XGBOOST_USE_CUDA
|
#endif // XGBOOST_USE_CUDA && !defined(XGBOOST_USE_HIP)
|
||||||
|
|||||||
@ -139,6 +139,7 @@ class HostDeviceVectorImpl {
|
|||||||
auto ptr = other->ConstDevicePointer();
|
auto ptr = other->ConstDevicePointer();
|
||||||
SetDevice();
|
SetDevice();
|
||||||
CHECK_EQ(this->DeviceIdx(), other->DeviceIdx());
|
CHECK_EQ(this->DeviceIdx(), other->DeviceIdx());
|
||||||
|
|
||||||
dh::safe_cuda(cudaMemcpyAsync(this->DevicePointer() + ori_size,
|
dh::safe_cuda(cudaMemcpyAsync(this->DevicePointer() + ori_size,
|
||||||
ptr,
|
ptr,
|
||||||
other->Size() * sizeof(T),
|
other->Size() * sizeof(T),
|
||||||
@ -195,6 +196,7 @@ class HostDeviceVectorImpl {
|
|||||||
gpu_access_ = access;
|
gpu_access_ = access;
|
||||||
if (data_h_.size() != data_d_->size()) { data_h_.resize(data_d_->size()); }
|
if (data_h_.size() != data_d_->size()) { data_h_.resize(data_d_->size()); }
|
||||||
SetDevice();
|
SetDevice();
|
||||||
|
|
||||||
dh::safe_cuda(cudaMemcpy(data_h_.data(),
|
dh::safe_cuda(cudaMemcpy(data_h_.data(),
|
||||||
data_d_->data().get(),
|
data_d_->data().get(),
|
||||||
data_d_->size() * sizeof(T),
|
data_d_->size() * sizeof(T),
|
||||||
@ -211,6 +213,7 @@ class HostDeviceVectorImpl {
|
|||||||
// data is on the host
|
// data is on the host
|
||||||
LazyResizeDevice(data_h_.size());
|
LazyResizeDevice(data_h_.size());
|
||||||
SetDevice();
|
SetDevice();
|
||||||
|
|
||||||
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(),
|
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(),
|
||||||
data_h_.data(),
|
data_h_.data(),
|
||||||
data_d_->size() * sizeof(T),
|
data_d_->size() * sizeof(T),
|
||||||
@ -239,6 +242,7 @@ class HostDeviceVectorImpl {
|
|||||||
LazyResizeDevice(Size());
|
LazyResizeDevice(Size());
|
||||||
gpu_access_ = GPUAccess::kWrite;
|
gpu_access_ = GPUAccess::kWrite;
|
||||||
SetDevice();
|
SetDevice();
|
||||||
|
|
||||||
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), other->data_d_->data().get(),
|
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), other->data_d_->data().get(),
|
||||||
data_d_->size() * sizeof(T), cudaMemcpyDefault));
|
data_d_->size() * sizeof(T), cudaMemcpyDefault));
|
||||||
}
|
}
|
||||||
@ -248,6 +252,7 @@ class HostDeviceVectorImpl {
|
|||||||
LazyResizeDevice(Size());
|
LazyResizeDevice(Size());
|
||||||
gpu_access_ = GPUAccess::kWrite;
|
gpu_access_ = GPUAccess::kWrite;
|
||||||
SetDevice();
|
SetDevice();
|
||||||
|
|
||||||
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), begin,
|
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), begin,
|
||||||
data_d_->size() * sizeof(T), cudaMemcpyDefault));
|
data_d_->size() * sizeof(T), cudaMemcpyDefault));
|
||||||
}
|
}
|
||||||
|
|||||||
4
src/common/host_device_vector.hip
Normal file
4
src/common/host_device_vector.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "host_device_vector.cu"
|
||||||
|
#endif
|
||||||
@ -384,7 +384,8 @@ class PrivateMmapConstStream : public AlignedResourceReadStream {
|
|||||||
* @param length See the `length` parameter of `mmap` for details.
|
* @param length See the `length` parameter of `mmap` for details.
|
||||||
*/
|
*/
|
||||||
explicit PrivateMmapConstStream(std::string path, std::size_t offset, std::size_t length)
|
explicit PrivateMmapConstStream(std::string path, std::size_t offset, std::size_t length)
|
||||||
: AlignedResourceReadStream{std::make_shared<MmapResource>(path, offset, length)} {}
|
: AlignedResourceReadStream{std::shared_ptr<MmapResource>{ // NOLINT
|
||||||
|
new MmapResource{std::move(path), offset, length}}} {}
|
||||||
~PrivateMmapConstStream() noexcept(false) override;
|
~PrivateMmapConstStream() noexcept(false) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -12,7 +12,8 @@
|
|||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace linalg {
|
namespace linalg {
|
||||||
template <typename T, int32_t D, typename Fn>
|
template <typename T, int32_t D, typename Fn>
|
||||||
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
|
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
|
||||||
|
{
|
||||||
dh::safe_cuda(cudaSetDevice(t.DeviceIdx()));
|
dh::safe_cuda(cudaSetDevice(t.DeviceIdx()));
|
||||||
static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
|
static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
|
||||||
"For function with return, use transform instead.");
|
"For function with return, use transform instead.");
|
||||||
@ -28,7 +29,8 @@ void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int32_t D, typename Fn>
|
template <typename T, int32_t D, typename Fn>
|
||||||
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
|
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
|
||||||
|
{
|
||||||
if (t.Contiguous()) {
|
if (t.Contiguous()) {
|
||||||
auto ptr = t.Values().data();
|
auto ptr = t.Values().data();
|
||||||
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); });
|
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); });
|
||||||
|
|||||||
@ -42,7 +42,7 @@ void ElementWiseKernelHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&& f
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
template <typename T, int32_t D, typename Fn>
|
template <typename T, int32_t D, typename Fn>
|
||||||
void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
|
void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
|
||||||
common::AssertGPUSupport();
|
common::AssertGPUSupport();
|
||||||
@ -60,7 +60,7 @@ void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn)
|
|||||||
}
|
}
|
||||||
ElementWiseKernelHost(t, ctx->Threads(), fn);
|
ElementWiseKernelHost(t, ctx->Threads(), fn);
|
||||||
}
|
}
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
|
|
||||||
template <typename T, std::int32_t kDim>
|
template <typename T, std::int32_t kDim>
|
||||||
auto cbegin(TensorView<T, kDim> const& v) { // NOLINT
|
auto cbegin(TensorView<T, kDim> const& v) { // NOLINT
|
||||||
|
|||||||
@ -143,7 +143,7 @@ CheckNAN(T) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
|
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIPCC__)
|
||||||
|
|
||||||
bool CheckNAN(double v);
|
bool CheckNAN(double v);
|
||||||
|
|
||||||
@ -152,23 +152,27 @@ bool CheckNAN(double v);
|
|||||||
XGBOOST_DEVICE bool inline CheckNAN(float x) {
|
XGBOOST_DEVICE bool inline CheckNAN(float x) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__)
|
||||||
return isnan(x);
|
return isnan(x);
|
||||||
|
#elif defined(__HIPCC__)
|
||||||
|
return __builtin_isnan(x);
|
||||||
#else
|
#else
|
||||||
return std::isnan(x);
|
return std::isnan(x);
|
||||||
#endif // defined(__CUDA_ARCH__)
|
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
}
|
}
|
||||||
|
|
||||||
XGBOOST_DEVICE bool inline CheckNAN(double x) {
|
XGBOOST_DEVICE bool inline CheckNAN(double x) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__)
|
||||||
return isnan(x);
|
return isnan(x);
|
||||||
|
#elif defined(__HIPCC__)
|
||||||
|
return __builtin_isnan(x);
|
||||||
#else
|
#else
|
||||||
return std::isnan(x);
|
return std::isnan(x);
|
||||||
#endif // defined(__CUDA_ARCH__)
|
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
|
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
|
||||||
// GPU version is not uploaded in CRAN anyway.
|
// GPU version is not uploaded in CRAN anyway.
|
||||||
// Specialize only when using R with CPU.
|
// Specialize only when using R with CPU.
|
||||||
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
|
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
double LogGamma(double v);
|
double LogGamma(double v);
|
||||||
|
|
||||||
#else // Not R or R with GPU.
|
#else // Not R or R with GPU.
|
||||||
@ -191,7 +195,7 @@ XGBOOST_DEVICE inline T LogGamma(T v) {
|
|||||||
#endif // _MSC_VER
|
#endif // _MSC_VER
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
|
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
|
|
||||||
} // namespace common
|
} // namespace common
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -15,6 +15,7 @@ double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
|
|||||||
values.SetDevice(ctx->gpu_id);
|
values.SetDevice(ctx->gpu_id);
|
||||||
auto const d_values = values.ConstDeviceSpan();
|
auto const d_values = values.ConstDeviceSpan();
|
||||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||||
|
|
||||||
return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
|
return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
|
||||||
thrust::plus<float>{});
|
thrust::plus<float>{});
|
||||||
}
|
}
|
||||||
|
|||||||
@ -99,12 +99,12 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
|
|||||||
|
|
||||||
namespace cuda_impl {
|
namespace cuda_impl {
|
||||||
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
|
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
inline double Reduce(Context const*, HostDeviceVector<float> const&) {
|
inline double Reduce(Context const*, HostDeviceVector<float> const&) {
|
||||||
AssertGPUSupport();
|
AssertGPUSupport();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
} // namespace cuda_impl
|
} // namespace cuda_impl
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
4
src/common/numeric.hip
Normal file
4
src/common/numeric.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "numeric.cu"
|
||||||
|
#endif
|
||||||
@ -109,6 +109,7 @@ template <typename T, typename U>
|
|||||||
void CopyTo(Span<T> out, Span<U> src) {
|
void CopyTo(Span<T> out, Span<U> src) {
|
||||||
CHECK_EQ(out.size(), src.size());
|
CHECK_EQ(out.size(), src.size());
|
||||||
static_assert(std::is_same<std::remove_cv_t<T>, std::remove_cv_t<T>>::value);
|
static_assert(std::is_same<std::remove_cv_t<T>, std::remove_cv_t<T>>::value);
|
||||||
|
|
||||||
dh::safe_cuda(cudaMemcpyAsync(out.data(), src.data(),
|
dh::safe_cuda(cudaMemcpyAsync(out.data(), src.data(),
|
||||||
out.size_bytes(),
|
out.size_bytes(),
|
||||||
cudaMemcpyDefault));
|
cudaMemcpyDefault));
|
||||||
@ -162,6 +163,7 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
|
|||||||
// Compute output ptr
|
// Compute output ptr
|
||||||
auto transform_it =
|
auto transform_it =
|
||||||
thrust::make_zip_iterator(thrust::make_tuple(x_ptr.data(), y_ptr.data()));
|
thrust::make_zip_iterator(thrust::make_tuple(x_ptr.data(), y_ptr.data()));
|
||||||
|
|
||||||
thrust::transform(
|
thrust::transform(
|
||||||
thrust::cuda::par(alloc), transform_it, transform_it + x_ptr.size(),
|
thrust::cuda::par(alloc), transform_it, transform_it + x_ptr.size(),
|
||||||
out_ptr.data(),
|
out_ptr.data(),
|
||||||
@ -211,6 +213,7 @@ void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
|
|||||||
Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
|
Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
|
||||||
Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
|
Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
|
||||||
dh::safe_cuda(cudaSetDevice(device));
|
dh::safe_cuda(cudaSetDevice(device));
|
||||||
|
|
||||||
CHECK_EQ(d_x.size() + d_y.size(), out.size());
|
CHECK_EQ(d_x.size() + d_y.size(), out.size());
|
||||||
CHECK_EQ(x_ptr.size(), out_ptr.size());
|
CHECK_EQ(x_ptr.size(), out_ptr.size());
|
||||||
CHECK_EQ(y_ptr.size(), out_ptr.size());
|
CHECK_EQ(y_ptr.size(), out_ptr.size());
|
||||||
@ -309,6 +312,7 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
|
|||||||
common::Span<OffsetT> cuts_ptr,
|
common::Span<OffsetT> cuts_ptr,
|
||||||
size_t total_cuts, Span<float> weights) {
|
size_t total_cuts, Span<float> weights) {
|
||||||
dh::safe_cuda(cudaSetDevice(device_));
|
dh::safe_cuda(cudaSetDevice(device_));
|
||||||
|
|
||||||
Span<SketchEntry> out;
|
Span<SketchEntry> out;
|
||||||
dh::device_vector<SketchEntry> cuts;
|
dh::device_vector<SketchEntry> cuts;
|
||||||
bool first_window = this->Current().empty();
|
bool first_window = this->Current().empty();
|
||||||
@ -378,6 +382,7 @@ size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_col
|
|||||||
});
|
});
|
||||||
// Reverse scan to accumulate weights into first duplicated element on left.
|
// Reverse scan to accumulate weights into first duplicated element on left.
|
||||||
auto val_it = thrust::make_reverse_iterator(dh::tend(entries));
|
auto val_it = thrust::make_reverse_iterator(dh::tend(entries));
|
||||||
|
|
||||||
thrust::inclusive_scan_by_key(
|
thrust::inclusive_scan_by_key(
|
||||||
thrust::cuda::par(alloc), key_it, key_it + entries.size(),
|
thrust::cuda::par(alloc), key_it, key_it + entries.size(),
|
||||||
val_it, val_it,
|
val_it, val_it,
|
||||||
@ -443,6 +448,7 @@ void SketchContainer::Prune(size_t to) {
|
|||||||
void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
|
void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
|
||||||
Span<SketchEntry const> that) {
|
Span<SketchEntry const> that) {
|
||||||
dh::safe_cuda(cudaSetDevice(device_));
|
dh::safe_cuda(cudaSetDevice(device_));
|
||||||
|
|
||||||
timer_.Start(__func__);
|
timer_.Start(__func__);
|
||||||
if (this->Current().size() == 0) {
|
if (this->Current().size() == 0) {
|
||||||
CHECK_EQ(this->columns_ptr_.HostVector().back(), 0);
|
CHECK_EQ(this->columns_ptr_.HostVector().back(), 0);
|
||||||
@ -478,6 +484,7 @@ void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
|
|||||||
|
|
||||||
void SketchContainer::FixError() {
|
void SketchContainer::FixError() {
|
||||||
dh::safe_cuda(cudaSetDevice(device_));
|
dh::safe_cuda(cudaSetDevice(device_));
|
||||||
|
|
||||||
auto d_columns_ptr = this->columns_ptr_.ConstDeviceSpan();
|
auto d_columns_ptr = this->columns_ptr_.ConstDeviceSpan();
|
||||||
auto in = dh::ToSpan(this->Current());
|
auto in = dh::ToSpan(this->Current());
|
||||||
dh::LaunchN(in.size(), [=] __device__(size_t idx) {
|
dh::LaunchN(in.size(), [=] __device__(size_t idx) {
|
||||||
@ -635,10 +642,12 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts, bool is_column_split) {
|
|||||||
CHECK_EQ(num_columns_, d_in_columns_ptr.size() - 1);
|
CHECK_EQ(num_columns_, d_in_columns_ptr.size() - 1);
|
||||||
max_values.resize(d_in_columns_ptr.size() - 1);
|
max_values.resize(d_in_columns_ptr.size() - 1);
|
||||||
dh::caching_device_vector<SketchEntry> d_max_values(d_in_columns_ptr.size() - 1);
|
dh::caching_device_vector<SketchEntry> d_max_values(d_in_columns_ptr.size() - 1);
|
||||||
|
|
||||||
thrust::reduce_by_key(thrust::cuda::par(alloc), key_it, key_it + in_cut_values.size(), val_it,
|
thrust::reduce_by_key(thrust::cuda::par(alloc), key_it, key_it + in_cut_values.size(), val_it,
|
||||||
thrust::make_discard_iterator(), d_max_values.begin(),
|
thrust::make_discard_iterator(), d_max_values.begin(),
|
||||||
thrust::equal_to<bst_feature_t>{},
|
thrust::equal_to<bst_feature_t>{},
|
||||||
[] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
|
[] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
|
||||||
|
|
||||||
dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values));
|
dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values));
|
||||||
auto max_it = MakeIndexTransformIter([&](auto i) {
|
auto max_it = MakeIndexTransformIter([&](auto i) {
|
||||||
if (IsCat(h_feature_types, i)) {
|
if (IsCat(h_feature_types, i)) {
|
||||||
|
|||||||
@ -175,6 +175,7 @@ class SketchContainer {
|
|||||||
template <typename KeyComp = thrust::equal_to<size_t>>
|
template <typename KeyComp = thrust::equal_to<size_t>>
|
||||||
size_t Unique(KeyComp key_comp = thrust::equal_to<size_t>{}) {
|
size_t Unique(KeyComp key_comp = thrust::equal_to<size_t>{}) {
|
||||||
timer_.Start(__func__);
|
timer_.Start(__func__);
|
||||||
|
|
||||||
dh::safe_cuda(cudaSetDevice(device_));
|
dh::safe_cuda(cudaSetDevice(device_));
|
||||||
this->columns_ptr_.SetDevice(device_);
|
this->columns_ptr_.SetDevice(device_);
|
||||||
Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan();
|
Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan();
|
||||||
@ -186,11 +187,13 @@ class SketchContainer {
|
|||||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||||
|
|
||||||
d_column_scan = this->columns_ptr_.DeviceSpan();
|
d_column_scan = this->columns_ptr_.DeviceSpan();
|
||||||
|
|
||||||
size_t n_uniques = dh::SegmentedUnique(
|
size_t n_uniques = dh::SegmentedUnique(
|
||||||
thrust::cuda::par(alloc), d_column_scan.data(),
|
thrust::cuda::par(alloc), d_column_scan.data(),
|
||||||
d_column_scan.data() + d_column_scan.size(), entries.data(),
|
d_column_scan.data() + d_column_scan.size(), entries.data(),
|
||||||
entries.data() + entries.size(), scan_out.DevicePointer(),
|
entries.data() + entries.size(), scan_out.DevicePointer(),
|
||||||
entries.data(), detail::SketchUnique{}, key_comp);
|
entries.data(), detail::SketchUnique{}, key_comp);
|
||||||
|
|
||||||
this->columns_ptr_.Copy(scan_out);
|
this->columns_ptr_.Copy(scan_out);
|
||||||
CHECK(!this->columns_ptr_.HostCanRead());
|
CHECK(!this->columns_ptr_.HostCanRead());
|
||||||
|
|
||||||
|
|||||||
4
src/common/quantile.hip
Normal file
4
src/common/quantile.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "quantile.cu"
|
||||||
|
#endif
|
||||||
@ -62,7 +62,7 @@ common::Span<std::size_t const> RankingCache::MakeRankOnCPU(Context const* ctx,
|
|||||||
return rank;
|
return rank;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
||||||
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,
|
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,
|
||||||
common::Span<float const>) {
|
common::Span<float const>) {
|
||||||
@ -108,9 +108,9 @@ void NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
|
|
||||||
DMLC_REGISTER_PARAMETER(LambdaRankParam);
|
DMLC_REGISTER_PARAMETER(LambdaRankParam);
|
||||||
|
|
||||||
@ -120,7 +120,7 @@ void PreCache::InitOnCPU(Context const*, MetaInfo const& info) {
|
|||||||
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
|
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
void PreCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
void PreCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA)
|
||||||
|
|
||||||
@ -130,9 +130,9 @@ void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {
|
|||||||
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
|
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
|
|
||||||
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
|
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
|
||||||
std::string out_name;
|
std::string out_name;
|
||||||
|
|||||||
@ -23,6 +23,10 @@
|
|||||||
#include "xgboost/logging.h" // for CHECK
|
#include "xgboost/logging.h" // for CHECK
|
||||||
#include "xgboost/span.h" // for Span
|
#include "xgboost/span.h" // for Span
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include <hipcub/hipcub.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace xgboost::ltr {
|
namespace xgboost::ltr {
|
||||||
namespace cuda_impl {
|
namespace cuda_impl {
|
||||||
void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels,
|
void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels,
|
||||||
|
|||||||
4
src/common/ranking_utils.hip
Normal file
4
src/common/ranking_utils.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "ranking_utils.cu"
|
||||||
|
#endif
|
||||||
@ -76,7 +76,7 @@ class RefResourceView {
|
|||||||
|
|
||||||
[[nodiscard]] size_type size() const { return size_; } // NOLINT
|
[[nodiscard]] size_type size() const { return size_; } // NOLINT
|
||||||
[[nodiscard]] size_type size_bytes() const { // NOLINT
|
[[nodiscard]] size_type size_bytes() const { // NOLINT
|
||||||
return Span{data(), size()}.size_bytes();
|
return Span<const value_type>{data(), size()}.size_bytes();
|
||||||
}
|
}
|
||||||
[[nodiscard]] value_type* data() { return ptr_; }; // NOLINT
|
[[nodiscard]] value_type* data() { return ptr_; }; // NOLINT
|
||||||
[[nodiscard]] value_type const* data() const { return ptr_; }; // NOLINT
|
[[nodiscard]] value_type const* data() const { return ptr_; }; // NOLINT
|
||||||
|
|||||||
@ -18,6 +18,7 @@
|
|||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
namespace cuda_impl {
|
namespace cuda_impl {
|
||||||
|
|
||||||
void Median(Context const* ctx, linalg::TensorView<float const, 2> t,
|
void Median(Context const* ctx, linalg::TensorView<float const, 2> t,
|
||||||
common::OptionalWeights weights, linalg::Tensor<float, 1>* out) {
|
common::OptionalWeights weights, linalg::Tensor<float, 1>* out) {
|
||||||
CHECK_GE(t.Shape(1), 1);
|
CHECK_GE(t.Shape(1), 1);
|
||||||
|
|||||||
@ -216,6 +216,7 @@ void SegmentedWeightedQuantile(Context const* ctx, AlphaIt alpha_it, SegIt seg_b
|
|||||||
detail::SegOp<SegIt>{seg_beg, seg_end});
|
detail::SegOp<SegIt>{seg_beg, seg_end});
|
||||||
auto scan_val = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
auto scan_val = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
||||||
detail::WeightOp<WIter>{w_begin, d_sorted_idx});
|
detail::WeightOp<WIter>{w_begin, d_sorted_idx});
|
||||||
|
|
||||||
thrust::inclusive_scan_by_key(thrust::cuda::par(caching), scan_key, scan_key + n_weights,
|
thrust::inclusive_scan_by_key(thrust::cuda::par(caching), scan_key, scan_key + n_weights,
|
||||||
scan_val, weights_cdf.begin());
|
scan_val, weights_cdf.begin());
|
||||||
|
|
||||||
|
|||||||
@ -112,7 +112,7 @@ void Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWe
|
|||||||
|
|
||||||
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
|
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
|
inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
|
||||||
linalg::Tensor<float, 1>*) {
|
linalg::Tensor<float, 1>*) {
|
||||||
common::AssertGPUSupport();
|
common::AssertGPUSupport();
|
||||||
@ -120,7 +120,7 @@ inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalW
|
|||||||
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
|
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
|
||||||
common::AssertGPUSupport();
|
common::AssertGPUSupport();
|
||||||
}
|
}
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
} // namespace cuda_impl
|
} // namespace cuda_impl
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
4
src/common/stats.hip
Normal file
4
src/common/stats.hip
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "stats.cu"
|
||||||
|
#endif
|
||||||
@ -25,12 +25,12 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::common::ProbabilityDistributionType);
|
|||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
|
|
||||||
#ifndef __CUDACC__
|
#if !defined(__CUDACC__) && !defined(__HIPCC__)
|
||||||
|
|
||||||
using std::log;
|
using std::log;
|
||||||
using std::fmax;
|
using std::fmax;
|
||||||
|
|
||||||
#endif // __CUDACC__
|
#endif // __CUDACC__ && __HIPCC__
|
||||||
|
|
||||||
enum class CensoringType : uint8_t {
|
enum class CensoringType : uint8_t {
|
||||||
kUncensored, kRightCensored, kLeftCensored, kIntervalCensored
|
kUncensored, kRightCensored, kLeftCensored, kIntervalCensored
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user