Compare commits

..

6 Commits

Author SHA1 Message Date
Philip Hyunsu Cho
82d846bbeb Update change_scala_version.py to also change scala.version property (#9897) 2023-12-18 23:49:41 -08:00
Philip Hyunsu Cho
71d330afdc Bump version to 2.0.3 (#9895) 2023-12-14 17:54:05 -08:00
Philip Hyunsu Cho
3acbd8692b [jvm-packages] Fix POM for xgboost-jvm metapackage (#9893)
* [jvm-packages] Fix POM for xgboost-jvm metapackage

* Add script for updating the Scala version
2023-12-14 16:50:34 -08:00
Philip Hyunsu Cho
ad524f76ab [backport] [CI] Upload libxgboost4j.dylib (M1) to S3 bucket (#9887)
* [CI] Set up CI for Mac M1 (#9699)

* [CI] Improve CI for Mac M1 (#9748)

* [CI] Build libxgboost4j.dylib with CMAKE_OSX_DEPLOYMENT_TARGET (#9749)

* [CI] Upload libxgboost4j.dylib (M1) to S3 bucket (#9886)
2023-12-13 16:05:40 -08:00
Jiaming Yuan
d2d1751c03 [backport][py] Use the first found native library. (#9860) (#9879) 2023-12-13 14:20:30 +08:00
Jiaming Yuan
e4ee4e79dc [backport][sklearn] Fix loading model attributes. (#9808) (#9880) 2023-12-13 14:20:04 +08:00
277 changed files with 595 additions and 2649 deletions

3
.gitmodules vendored
View File

@@ -5,6 +5,3 @@
[submodule "gputreeshap"] [submodule "gputreeshap"]
path = gputreeshap path = gputreeshap
url = https://github.com/rapidsai/gputreeshap.git url = https://github.com/rapidsai/gputreeshap.git
[submodule "rocgputreeshap"]
path = rocgputreeshap
url = https://github.com/ROCmSoftwarePlatform/rocgputreeshap

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.18 FATAL_ERROR) cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 2.0.2) project(xgboost LANGUAGES CXX C VERSION 2.0.3)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0022 NEW)
@@ -58,7 +58,7 @@ option(ENABLE_ALL_WARNINGS "Enable all compiler warnings. Only effective for GCC
option(LOG_CAPI_INVOCATION "Log all C API invocations for debugging" OFF) option(LOG_CAPI_INVOCATION "Log all C API invocations for debugging" OFF)
option(GOOGLE_TEST "Build google tests" OFF) option(GOOGLE_TEST "Build google tests" OFF)
option(USE_DMLC_GTEST "Use google tests bundled with dmlc-core submodule" OFF) option(USE_DMLC_GTEST "Use google tests bundled with dmlc-core submodule" OFF)
option(USE_DEVICE_DEBUG "Generate CUDA/HIP device debug info." OFF) option(USE_DEVICE_DEBUG "Generate CUDA device debug info." OFF)
option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF) option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF)
set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header") set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header")
option(RABIT_MOCK "Build rabit with mock" OFF) option(RABIT_MOCK "Build rabit with mock" OFF)
@@ -71,10 +71,6 @@ option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF) option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
set(GPU_COMPUTE_VER "" CACHE STRING set(GPU_COMPUTE_VER "" CACHE STRING
"Semicolon separated list of compute versions to be built against, e.g. '35;61'") "Semicolon separated list of compute versions to be built against, e.g. '35;61'")
## HIP
option(USE_HIP "Build with GPU acceleration" OFF)
option(USE_RCCL "Build with RCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_RCCL "Build with shared RCCL library." OFF)
## Copied From dmlc ## Copied From dmlc
option(USE_HDFS "Build with HDFS support" OFF) option(USE_HDFS "Build with HDFS support" OFF)
option(USE_AZURE "Build with AZURE support" OFF) option(USE_AZURE "Build with AZURE support" OFF)
@@ -97,7 +93,6 @@ option(ADD_PKGCONFIG "Add xgboost.pc into system." ON)
if (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug))) if (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
message(SEND_ERROR "Do not enable `USE_DEBUG_OUTPUT' with release build.") message(SEND_ERROR "Do not enable `USE_DEBUG_OUTPUT' with release build.")
endif (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug))) endif (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
if (USE_NCCL AND NOT (USE_CUDA)) if (USE_NCCL AND NOT (USE_CUDA))
message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.") message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
endif (USE_NCCL AND NOT (USE_CUDA)) endif (USE_NCCL AND NOT (USE_CUDA))
@@ -107,17 +102,6 @@ endif (USE_DEVICE_DEBUG AND NOT (USE_CUDA))
if (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL)) if (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
message(SEND_ERROR "Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.") message(SEND_ERROR "Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.")
endif (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL)) endif (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
if (USE_RCCL AND NOT (USE_HIP))
message(SEND_ERROR "`USE_RCCL` must be enabled with `USE_HIP` flag.")
endif (USE_RCCL AND NOT (USE_HIP))
if (USE_DEVICE_DEBUG AND NOT (USE_HIP))
message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_HIP` flag.")
endif (USE_DEVICE_DEBUG AND NOT (USE_HIP))
if (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
message(SEND_ERROR "Build XGBoost with -DUSE_RCCL=ON to enable BUILD_WITH_SHARED_RCCL.")
endif (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
if (JVM_BINDINGS AND R_LIB) if (JVM_BINDINGS AND R_LIB)
message(SEND_ERROR "`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.") message(SEND_ERROR "`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.")
endif (JVM_BINDINGS AND R_LIB) endif (JVM_BINDINGS AND R_LIB)
@@ -131,15 +115,9 @@ endif (USE_AVX)
if (PLUGIN_LZ4) if (PLUGIN_LZ4)
message(SEND_ERROR "The option 'PLUGIN_LZ4' is removed from XGBoost.") message(SEND_ERROR "The option 'PLUGIN_LZ4' is removed from XGBoost.")
endif (PLUGIN_LZ4) endif (PLUGIN_LZ4)
if (PLUGIN_RMM AND NOT (USE_CUDA)) if (PLUGIN_RMM AND NOT (USE_CUDA))
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.") message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
endif (PLUGIN_RMM AND NOT (USE_CUDA)) endif (PLUGIN_RMM AND NOT (USE_CUDA))
if (PLUGIN_RMM AND NOT (USE_HIP))
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_HIP` flag.")
endif (PLUGIN_RMM AND NOT (USE_HIP))
if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))) if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.") message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))) endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
@@ -192,24 +170,6 @@ if (USE_CUDA)
find_package(CUDAToolkit REQUIRED) find_package(CUDAToolkit REQUIRED)
endif (USE_CUDA) endif (USE_CUDA)
if (USE_HIP)
set(USE_OPENMP ON CACHE BOOL "HIP requires OpenMP" FORCE)
# `export CXX=' is ignored by CMake HIP.
set(CMAKE_HIP_HOST_COMPILER ${CMAKE_CXX_COMPILER})
message(STATUS "Configured HIP host compiler: ${CMAKE_HIP_HOST_COMPILER}")
enable_language(HIP)
find_package(hip REQUIRED)
find_package(rocthrust REQUIRED)
find_package(hipcub REQUIRED)
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -I${HIP_INCLUDE_DIRS} -I${HIP_INCLUDE_DIRS}/hip")
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -Wunused-result -w")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_INCLUDE_DIRS}")
add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
endif (USE_HIP)
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))) (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
@@ -249,10 +209,6 @@ if (USE_NCCL)
find_package(Nccl REQUIRED) find_package(Nccl REQUIRED)
endif (USE_NCCL) endif (USE_NCCL)
if (USE_RCCL)
find_package(rccl REQUIRED)
endif (USE_RCCL)
# dmlc-core # dmlc-core
msvc_use_static_runtime() msvc_use_static_runtime()
if (FORCE_SHARED_CRT) if (FORCE_SHARED_CRT)

View File

@@ -1,8 +1,8 @@
Package: xgboost Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 2.0.2.1 Version: 2.0.3.1
Date: 2023-10-12 Date: 2023-12-14
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),

18
R-package/configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for xgboost 2.0.2. # Generated by GNU Autoconf 2.71 for xgboost 2.0.3.
# #
# #
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='xgboost' PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost' PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='2.0.2' PACKAGE_VERSION='2.0.3'
PACKAGE_STRING='xgboost 2.0.2' PACKAGE_STRING='xgboost 2.0.3'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures xgboost 2.0.2 to adapt to many kinds of systems. \`configure' configures xgboost 2.0.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1287,7 +1287,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 2.0.2:";; short | recursive ) echo "Configuration of xgboost 2.0.3:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1367,7 +1367,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
xgboost configure 2.0.2 xgboost configure 2.0.3
generated by GNU Autoconf 2.71 generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc. Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by xgboost $as_me 2.0.2, which was It was created by xgboost $as_me 2.0.3, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw $ $0$ac_configure_args_raw
@@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by xgboost $as_me 2.0.2, which was This file was extended by xgboost $as_me 2.0.3, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped' ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\ ac_cs_version="\\
xgboost config.status 2.0.2 xgboost config.status 2.0.3
configured by $0, generated by GNU Autoconf 2.71, configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -2,7 +2,7 @@
AC_PREREQ(2.69) AC_PREREQ(2.69)
AC_INIT([xgboost],[2.0.2],[],[xgboost],[]) AC_INIT([xgboost],[2.0.3],[],[xgboost],[])
: ${R_HOME=`R RHOME`} : ${R_HOME=`R RHOME`}
if test -z "${R_HOME}"; then if test -z "${R_HOME}"; then

View File

@@ -32,7 +32,7 @@ namespace common {
bool CheckNAN(double v) { bool CheckNAN(double v) {
return ISNAN(v); return ISNAN(v);
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
double LogGamma(double v) { double LogGamma(double v) {
return lgammafn(v); return lgammafn(v);
} }

View File

@@ -181,23 +181,6 @@ function(xgboost_set_cuda_flags target)
CUDA_RUNTIME_LIBRARY Static) CUDA_RUNTIME_LIBRARY Static)
endfunction(xgboost_set_cuda_flags) endfunction(xgboost_set_cuda_flags)
# Set HIP related flags to target.
function(xgboost_set_hip_flags target)
if (USE_DEVICE_DEBUG)
target_compile_options(${target} PRIVATE
$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:HIP>>:-G>)
endif (USE_DEVICE_DEBUG)
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_HIP=1)
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/rocgputreeshap)
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/warp-primitives/include)
set_target_properties(${target} PROPERTIES
HIP_STANDARD 17
HIP_STANDARD_REQUIRED ON
HIP_SEPARABLE_COMPILATION OFF)
endfunction(xgboost_set_hip_flags)
macro(xgboost_link_nccl target) macro(xgboost_link_nccl target)
if (BUILD_STATIC_LIB) if (BUILD_STATIC_LIB)
target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR}) target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR})
@@ -210,20 +193,6 @@ macro(xgboost_link_nccl target)
endif (BUILD_STATIC_LIB) endif (BUILD_STATIC_LIB)
endmacro(xgboost_link_nccl) endmacro(xgboost_link_nccl)
macro(xgboost_link_rccl target)
if(BUILD_STATIC_LIB)
target_include_directories(${target} PUBLIC ${RCCL_INCLUDE_DIR}/rccl)
target_compile_definitions(${target} PUBLIC -DXGBOOST_USE_RCCL=1)
target_link_directories(${target} PUBLIC ${HIP_LIB_INSTALL_DIR})
target_link_libraries(${target} PUBLIC ${RCCL_LIBRARY})
else()
target_include_directories(${target} PRIVATE ${RCCL_INCLUDE_DIR}/rccl)
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_RCCL=1)
target_link_directories(${target} PUBLIC ${HIP_LIB_INSTALL_DIR})
target_link_libraries(${target} PRIVATE ${RCCL_LIBRARY})
endif()
endmacro()
# compile options # compile options
macro(xgboost_target_properties target) macro(xgboost_target_properties target)
set_target_properties(${target} PROPERTIES set_target_properties(${target} PROPERTIES
@@ -246,10 +215,6 @@ macro(xgboost_target_properties target)
-Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined, -Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
-Wall -Wextra -Wno-expansion-to-defined> -Wall -Wextra -Wno-expansion-to-defined>
) )
target_compile_options(${target} PUBLIC
$<IF:$<COMPILE_LANGUAGE:HIP>,
-Wall -Wextra >
)
endif(ENABLE_ALL_WARNINGS) endif(ENABLE_ALL_WARNINGS)
target_compile_options(${target} target_compile_options(${target}
@@ -318,10 +283,6 @@ macro(xgboost_target_link_libraries target)
target_link_libraries(${target} PUBLIC CUDA::cudart_static) target_link_libraries(${target} PUBLIC CUDA::cudart_static)
endif (USE_CUDA) endif (USE_CUDA)
if (USE_HIP)
xgboost_set_hip_flags(${target})
endif (USE_HIP)
if (PLUGIN_RMM) if (PLUGIN_RMM)
target_link_libraries(${target} PRIVATE rmm::rmm) target_link_libraries(${target} PRIVATE rmm::rmm)
endif (PLUGIN_RMM) endif (PLUGIN_RMM)
@@ -330,10 +291,6 @@ macro(xgboost_target_link_libraries target)
xgboost_link_nccl(${target}) xgboost_link_nccl(${target})
endif (USE_NCCL) endif (USE_NCCL)
if(USE_RCCL)
xgboost_link_rccl(${target})
endif()
if (USE_NVTX) if (USE_NVTX)
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt) target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
endif (USE_NVTX) endif (USE_NVTX)

View File

@@ -3,8 +3,6 @@
set(USE_OPENMP @USE_OPENMP@) set(USE_OPENMP @USE_OPENMP@)
set(USE_CUDA @USE_CUDA@) set(USE_CUDA @USE_CUDA@)
set(USE_NCCL @USE_NCCL@) set(USE_NCCL @USE_NCCL@)
set(USE_HIP @USE_HIP@)
set(USE_RCCL @USE_RCCL@)
set(XGBOOST_BUILD_STATIC_LIB @BUILD_STATIC_LIB@) set(XGBOOST_BUILD_STATIC_LIB @BUILD_STATIC_LIB@)
include(CMakeFindDependencyMacro) include(CMakeFindDependencyMacro)
@@ -17,9 +15,6 @@ if (XGBOOST_BUILD_STATIC_LIB)
if(USE_CUDA) if(USE_CUDA)
find_dependency(CUDA) find_dependency(CUDA)
endif() endif()
if(USE_HIP)
find_dependency(HIP)
endif()
# nccl should be linked statically if xgboost is built as static library. # nccl should be linked statically if xgboost is built as static library.
endif (XGBOOST_BUILD_STATIC_LIB) endif (XGBOOST_BUILD_STATIC_LIB)

View File

@@ -4,13 +4,13 @@ python mapfeat.py
# split train and test # split train and test
python mknfold.py machine.txt 1 python mknfold.py machine.txt 1
# training and output the models # training and output the models
../../../xgboost machine.conf ../../xgboost machine.conf
# output predictions of test data # output predictions of test data
../../../xgboost machine.conf task=pred model_in=0002.model ../../xgboost machine.conf task=pred model_in=0002.model
# print the boosters of 0002.model in dump.raw.txt # print the boosters of 0002.model in dump.raw.txt
../../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt ../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
# print the boosters of 0002.model in dump.nice.txt with feature map # print the boosters of 0002.model in dump.nice.txt with feature map
../../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt ../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
# cat the result # cat the result
cat dump.nice.txt cat dump.nice.txt

View File

@@ -0,0 +1,79 @@
import argparse
import pathlib
import re
import shutil
def main(args):
if args.scala_version == "2.12":
scala_ver = "2.12"
scala_patchver = "2.12.18"
elif args.scala_version == "2.13":
scala_ver = "2.13"
scala_patchver = "2.13.11"
else:
raise ValueError(f"Unsupported Scala version: {args.scala_version}")
# Clean artifacts
if args.purge_artifacts:
for target in pathlib.Path("jvm-packages/").glob("**/target"):
if target.is_dir():
print(f"Removing {target}...")
shutil.rmtree(target)
# Update pom.xml
for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"):
print(f"Updating {pom}...")
with open(pom, "r", encoding="utf-8") as f:
lines = f.readlines()
with open(pom, "w", encoding="utf-8") as f:
replaced_scalaver = False
replaced_scala_binver = False
for line in lines:
for artifact in [
"xgboost-jvm",
"xgboost4j",
"xgboost4j-gpu",
"xgboost4j-spark",
"xgboost4j-spark-gpu",
"xgboost4j-flink",
"xgboost4j-example",
]:
line = re.sub(
f"<artifactId>{artifact}_[0-9\\.]*",
f"<artifactId>{artifact}_{scala_ver}",
line,
)
# Only replace the first occurrence of scala.version
if not replaced_scalaver:
line, nsubs = re.subn(
r"<scala.version>[0-9\.]*",
f"<scala.version>{scala_patchver}",
line,
)
if nsubs > 0:
replaced_scalaver = True
# Only replace the first occurrence of scala.binary.version
if not replaced_scala_binver:
line, nsubs = re.subn(
r"<scala.binary.version>[0-9\.]*",
f"<scala.binary.version>{scala_ver}",
line,
)
if nsubs > 0:
replaced_scala_binver = True
f.write(line)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--purge-artifacts", action="store_true")
parser.add_argument(
"--scala-version",
type=str,
required=True,
help="Version of Scala to use in the JVM packages",
choices=["2.12", "2.13"],
)
parsed_args = parser.parse_args()
main(parsed_args)

View File

@@ -2,7 +2,6 @@ import argparse
import errno import errno
import glob import glob
import os import os
import platform
import re import re
import shutil import shutil
import subprocess import subprocess
@@ -88,10 +87,6 @@ def main():
help="Version of the release being prepared", help="Version of the release being prepared",
) )
args = parser.parse_args() args = parser.parse_args()
if sys.platform != "darwin" or platform.machine() != "arm64":
raise NotImplementedError("Please run this script using an M1 Mac")
version = args.release_version version = args.release_version
expected_git_tag = "v" + version expected_git_tag = "v" + version
current_git_tag = get_current_git_tag() current_git_tag = get_current_git_tag()
@@ -141,6 +136,7 @@ def main():
("linux", "x86_64"), ("linux", "x86_64"),
("windows", "x86_64"), ("windows", "x86_64"),
("macos", "x86_64"), ("macos", "x86_64"),
("macos", "aarch64"),
]: ]:
output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}" output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
maybe_makedirs(output_dir) maybe_makedirs(output_dir)
@@ -164,6 +160,10 @@ def main():
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib", url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib",
filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib", filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib",
) )
retrieve(
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib",
filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib",
)
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
# libxgboost4j.so for Linux x86_64, CPU only # libxgboost4j.so for Linux x86_64, CPU only
@@ -211,9 +211,14 @@ def main():
"https://central.sonatype.org/publish/publish-maven/" "https://central.sonatype.org/publish/publish-maven/"
) )
print( print(
"3. Now on a M1 Mac machine, run the following to build Scala 2.12 artifacts:" "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. "
"Make sure to use an Internet connection with fast upload speed:"
)
print(
" # Skip native build, since we have all needed native binaries from CI\n"
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests"
) )
print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests")
print( print(
"4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging " "4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx " "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx "
@@ -221,11 +226,14 @@ def main():
"artifacts to the Maven Central repository. The top-level metapackage should be " "artifacts to the Maven Central repository. The top-level metapackage should be "
"named xgboost-jvm_2.12." "named xgboost-jvm_2.12."
) )
print("5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:")
print(" rm -rf targets/")
print(" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests")
print( print(
"6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts." "5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n"
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
" python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n"
" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests"
)
print(
"6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. "
"The top-level metapackage should be named xgboost-jvm_2.13." "The top-level metapackage should be named xgboost-jvm_2.13."
) )

View File

@@ -58,19 +58,19 @@
/*! /*!
* \brief Tag function as usable by device * \brief Tag function as usable by device
*/ */
#if defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__) #if defined (__CUDA__) || defined(__NVCC__)
#define XGBOOST_DEVICE __host__ __device__ #define XGBOOST_DEVICE __host__ __device__
#else #else
#define XGBOOST_DEVICE #define XGBOOST_DEVICE
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__) #endif // defined (__CUDA__) || defined(__NVCC__)
#if defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__) #if defined(__CUDA__) || defined(__CUDACC__)
#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__ #define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
#define XGBOOST_DEV_INLINE __device__ __forceinline__ #define XGBOOST_DEV_INLINE __device__ __forceinline__
#else #else
#define XGBOOST_HOST_DEV_INLINE #define XGBOOST_HOST_DEV_INLINE
#define XGBOOST_DEV_INLINE #define XGBOOST_DEV_INLINE
#endif // defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__) #endif // defined(__CUDA__) || defined(__CUDACC__)
// These check are for Makefile. // These check are for Makefile.
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT) #if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
@@ -274,8 +274,8 @@ class GradientPairInt64 {
GradientPairInt64(GradientPairInt64 const &g) = default; GradientPairInt64(GradientPairInt64 const &g) = default;
GradientPairInt64 &operator=(GradientPairInt64 const &g) = default; GradientPairInt64 &operator=(GradientPairInt64 const &g) = default;
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedGrad() const { return grad_; } XGBOOST_DEVICE [[nodiscard]] T GetQuantisedGrad() const { return grad_; }
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedHess() const { return hess_; } XGBOOST_DEVICE [[nodiscard]] T GetQuantisedHess() const { return hess_; }
XGBOOST_DEVICE GradientPairInt64 &operator+=(const GradientPairInt64 &rhs) { XGBOOST_DEVICE GradientPairInt64 &operator+=(const GradientPairInt64 &rhs) {
grad_ += rhs.grad_; grad_ += rhs.grad_;

View File

@@ -58,11 +58,11 @@
namespace xgboost { namespace xgboost {
#if defined(__CUDACC__) || defined(__HIPCC__) #ifdef __CUDACC__
// Sets a function to call instead of cudaSetDevice(); // Sets a function to call instead of cudaSetDevice();
// only added for testing // only added for testing
void SetCudaSetDeviceHandler(void (*handler)(int)); void SetCudaSetDeviceHandler(void (*handler)(int));
#endif // __CUDACC__ || __HIPCC__ #endif // __CUDACC__
template <typename T> struct HostDeviceVectorImpl; template <typename T> struct HostDeviceVectorImpl;

View File

@@ -30,11 +30,11 @@
// decouple it from xgboost. // decouple it from xgboost.
#ifndef LINALG_HD #ifndef LINALG_HD
#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIPCC__) #if defined(__CUDA__) || defined(__NVCC__)
#define LINALG_HD __host__ __device__ #define LINALG_HD __host__ __device__
#else #else
#define LINALG_HD #define LINALG_HD
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__) #endif // defined (__CUDA__) || defined(__NVCC__)
#endif // LINALG_HD #endif // LINALG_HD
namespace xgboost::linalg { namespace xgboost::linalg {
@@ -118,7 +118,7 @@ using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
template <int32_t n, typename Fn> template <int32_t n, typename Fn>
LINALG_HD constexpr auto UnrollLoop(Fn fn) { LINALG_HD constexpr auto UnrollLoop(Fn fn) {
#if defined(__CUDA_ARCH__) || defined(__HIPCC__) #if defined __CUDA_ARCH__
#pragma unroll n #pragma unroll n
#endif // defined __CUDA_ARCH__ #endif // defined __CUDA_ARCH__
for (int32_t i = 0; i < n; ++i) { for (int32_t i = 0; i < n; ++i) {
@@ -136,7 +136,7 @@ int32_t NativePopc(T v) {
inline LINALG_HD int Popc(uint32_t v) { inline LINALG_HD int Popc(uint32_t v) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
return __popc(v); return __popc(v);
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__) #elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcount(v); return __builtin_popcount(v);
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
return __popcnt(v); return __popcnt(v);
@@ -148,7 +148,7 @@ inline LINALG_HD int Popc(uint32_t v) {
inline LINALG_HD int Popc(uint64_t v) { inline LINALG_HD int Popc(uint64_t v) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
return __popcll(v); return __popcll(v);
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__) #elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcountll(v); return __builtin_popcountll(v);
#elif defined(_MSC_VER) && defined(_M_X64) #elif defined(_MSC_VER) && defined(_M_X64)
return __popcnt64(v); return __popcnt64(v);

View File

@@ -41,9 +41,7 @@
#if defined(__CUDACC__) #if defined(__CUDACC__)
#include <cuda_runtime.h> #include <cuda_runtime.h>
#elif defined(__HIPCC__) #endif // defined(__CUDACC__)
#include <hip/hip_runtime.h>
#endif
/*! /*!
* The version number 1910 is picked up from GSL. * The version number 1910 is picked up from GSL.
@@ -106,42 +104,7 @@ namespace common {
#define SPAN_CHECK KERNEL_CHECK #define SPAN_CHECK KERNEL_CHECK
#elif defined(__HIPCC__) #else // ------------------------------ not CUDA ----------------------------
// Usual logging facility is not available inside device code.
#if defined(_MSC_VER)
// Windows HIP doesn't have __assert_fail.
#define HIP_KERNEL_CHECK(cond) \
do { \
if (XGBOOST_EXPECT(!(cond), false)) { \
__builtin_trap(); \
} \
} while (0)
#else // defined(_MSC_VER)
#define __ASSERT_STR_HELPER(x) #x
#if 0
#define HIP_KERNEL_CHECK(cond) \
(XGBOOST_EXPECT((cond), true) \
? static_cast<void>(0) \
: __assert_fail(__ASSERT_STR_HELPER((cond)), __FILE__, __LINE__, __PRETTY_FUNCTION__))
#else
#define HIP_KERNEL_CHECK(cond) \
(XGBOOST_EXPECT((cond), true) \
? static_cast<void>(0) \
: __builtin_trap())
#endif
#endif // defined(_MSC_VER)
#define KERNEL_CHECK HIP_KERNEL_CHECK
#define SPAN_CHECK KERNEL_CHECK
#else // ------------------------------ not CUDA or HIP ----------------------------
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1 #if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
@@ -157,7 +120,7 @@ namespace common {
#endif // defined(XGBOOST_STRICT_R_MODE) #endif // defined(XGBOOST_STRICT_R_MODE)
#endif // __CUDA_ARCH__ || __HIPCC__ #endif // __CUDA_ARCH__
#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs)) #define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))
@@ -354,7 +317,7 @@ struct IsSpanOracle<Span<T, Extent>> : std::true_type {};
template <class T> template <class T>
struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {}; struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {};
// Re-implement std algorithms here to adopt CUDA/HIP // Re-implement std algorithms here to adopt CUDA.
template <typename T> template <typename T>
struct Less { struct Less {
XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const { XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {

View File

@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 2 /* NOLINT */ #define XGBOOST_VER_MAJOR 2 /* NOLINT */
#define XGBOOST_VER_MINOR 0 /* NOLINT */ #define XGBOOST_VER_MINOR 0 /* NOLINT */
#define XGBOOST_VER_PATCH 2 /* NOLINT */ #define XGBOOST_VER_PATCH 3 /* NOLINT */
#endif // XGBOOST_VERSION_CONFIG_H_ #endif // XGBOOST_VERSION_CONFIG_H_

View File

@@ -9,11 +9,6 @@ if (USE_CUDA)
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu) ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu)
endif (USE_CUDA) endif (USE_CUDA)
if (USE_HIP)
list(APPEND JVM_SOURCES
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.hip)
endif (USE_HIP)
add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES}) add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})
if (ENABLE_ALL_WARNINGS) if (ENABLE_ALL_WARNINGS)

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import errno
import argparse import argparse
import errno
import glob import glob
import os import os
import platform import platform
@@ -19,13 +19,10 @@ CONFIG = {
"USE_HDFS": "OFF", "USE_HDFS": "OFF",
"USE_AZURE": "OFF", "USE_AZURE": "OFF",
"USE_S3": "OFF", "USE_S3": "OFF",
"USE_CUDA": "OFF", "USE_CUDA": "OFF",
"USE_NCCL": "OFF", "USE_NCCL": "OFF",
"USE_HIP": "OFF",
"USE_RCCL": "OFF",
"JVM_BINDINGS": "ON", "JVM_BINDINGS": "ON",
"LOG_CAPI_INVOCATION": "OFF" "LOG_CAPI_INVOCATION": "OFF",
} }
@@ -72,27 +69,22 @@ def normpath(path):
return normalized return normalized
if __name__ == "__main__": def native_build(args):
parser = argparse.ArgumentParser()
parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF')
parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
parser.add_argument('--use-hip', type=str, choices=['ON', 'OFF'], default='OFF')
cli_args = parser.parse_args()
if sys.platform == "darwin": if sys.platform == "darwin":
# Enable of your compiler supports OpenMP. # Enable of your compiler supports OpenMP.
CONFIG["USE_OPENMP"] = "OFF" CONFIG["USE_OPENMP"] = "OFF"
os.environ["JAVA_HOME"] = subprocess.check_output( os.environ["JAVA_HOME"] = (
"/usr/libexec/java_home").strip().decode() subprocess.check_output("/usr/libexec/java_home").strip().decode()
)
print("building Java wrapper") print("building Java wrapper")
with cd(".."): with cd(".."):
build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'build' build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build"
maybe_makedirs(build_dir) maybe_makedirs(build_dir)
with cd(build_dir): with cd(build_dir):
if sys.platform == "win32": if sys.platform == "win32":
# Force x64 build on Windows. # Force x64 build on Windows.
maybe_generator = ' -A x64' maybe_generator = " -A x64"
else: else:
maybe_generator = "" maybe_generator = ""
if sys.platform == "linux": if sys.platform == "linux":
@@ -100,15 +92,12 @@ if __name__ == "__main__":
else: else:
maybe_parallel_build = "" maybe_parallel_build = ""
if cli_args.log_capi_invocation == 'ON': if cli_args.log_capi_invocation == "ON":
CONFIG['LOG_CAPI_INVOCATION'] = 'ON' CONFIG["LOG_CAPI_INVOCATION"] = "ON"
if cli_args.use_cuda == 'ON': if cli_args.use_cuda == "ON":
CONFIG['USE_CUDA'] = 'ON' CONFIG["USE_CUDA"] = "ON"
CONFIG['USE_NCCL'] = 'ON' CONFIG["USE_NCCL"] = "ON"
elif cli_args.use_hip== 'ON':
CONFIG['USE_HIP'] = 'ON'
CONFIG['USE_RCCL'] = 'ON'
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()] args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
@@ -121,7 +110,7 @@ if __name__ == "__main__":
if gpu_arch_flag is not None: if gpu_arch_flag is not None:
args.append("%s" % gpu_arch_flag) args.append("%s" % gpu_arch_flag)
lib_dir = os.path.join(os.pardir, 'lib') lib_dir = os.path.join(os.pardir, "lib")
if os.path.exists(lib_dir): if os.path.exists(lib_dir):
shutil.rmtree(lib_dir) shutil.rmtree(lib_dir)
run("cmake .. " + " ".join(args) + maybe_generator) run("cmake .. " + " ".join(args) + maybe_generator)
@@ -131,8 +120,10 @@ if __name__ == "__main__":
run(f'"{sys.executable}" mapfeat.py') run(f'"{sys.executable}" mapfeat.py')
run(f'"{sys.executable}" mknfold.py machine.txt 1') run(f'"{sys.executable}" mknfold.py machine.txt 1')
xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip== 'ON' else 'xgboost4j' xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" else "xgboost4j"
xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'xgboost4j-spark' xgboost4j_spark = (
"xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" else "xgboost4j-spark"
)
print("copying native library") print("copying native library")
library_name, os_folder = { library_name, os_folder = {
@@ -147,14 +138,19 @@ if __name__ == "__main__":
"i86pc": "x86_64", # on Solaris x86_64 "i86pc": "x86_64", # on Solaris x86_64
"sun4v": "sparc", # on Solaris sparc "sun4v": "sparc", # on Solaris sparc
"arm64": "aarch64", # on macOS & Windows ARM 64-bit "arm64": "aarch64", # on macOS & Windows ARM 64-bit
"aarch64": "aarch64" "aarch64": "aarch64",
}[platform.machine().lower()] }[platform.machine().lower()]
output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder) output_folder = "{}/src/main/resources/lib/{}/{}".format(
xgboost4j, os_folder, arch_folder
)
maybe_makedirs(output_folder) maybe_makedirs(output_folder)
cp("../lib/" + library_name, output_folder) cp("../lib/" + library_name, output_folder)
print("copying pure-Python tracker") print("copying pure-Python tracker")
cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j)) cp(
"../python-package/xgboost/tracker.py",
"{}/src/main/resources".format(xgboost4j),
)
print("copying train/test files") print("copying train/test files")
maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark)) maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
@@ -170,3 +166,18 @@ if __name__ == "__main__":
maybe_makedirs("{}/src/test/resources".format(xgboost4j)) maybe_makedirs("{}/src/test/resources".format(xgboost4j))
for file in glob.glob("../demo/data/agaricus.*"): for file in glob.glob("../demo/data/agaricus.*"):
cp(file, "{}/src/test/resources".format(xgboost4j)) cp(file, "{}/src/test/resources".format(xgboost4j))
if __name__ == "__main__":
if "MAVEN_SKIP_NATIVE_BUILD" in os.environ:
print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...")
else:
parser = argparse.ArgumentParser()
parser.add_argument(
"--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF"
)
parser.add_argument(
"--use-cuda", type=str, choices=["ON", "OFF"], default="OFF"
)
cli_args = parser.parse_args()
native_build(cli_args)

View File

@@ -5,8 +5,8 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>
@@ -43,7 +43,6 @@
<maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count> <maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count>
<log.capi.invocation>OFF</log.capi.invocation> <log.capi.invocation>OFF</log.capi.invocation>
<use.cuda>OFF</use.cuda> <use.cuda>OFF</use.cuda>
<use.hip>OFF</use.hip>
<cudf.version>23.08.0</cudf.version> <cudf.version>23.08.0</cudf.version>
<spark.rapids.version>23.08.0</spark.rapids.version> <spark.rapids.version>23.08.0</spark.rapids.version>
<cudf.classifier>cuda11</cudf.classifier> <cudf.classifier>cuda11</cudf.classifier>

View File

@@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j-example</name> <name>xgboost4j-example</name>
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId> <artifactId>xgboost4j-example_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>
@@ -26,7 +26,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>
@@ -37,7 +37,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
</dependencies> </dependencies>

View File

@@ -5,13 +5,13 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j-flink</name> <name>xgboost4j-flink</name>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
<properties> <properties>
<flink-ml.version>2.2.0</flink-ml.version> <flink-ml.version>2.2.0</flink-ml.version>
</properties> </properties>
@@ -30,7 +30,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>

View File

@@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
</parent> </parent>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_2.12</artifactId>
<name>xgboost4j-gpu</name> <name>xgboost4j-gpu</name>
<version>2.0.2</version> <version>2.0.3</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>
@@ -104,8 +104,6 @@
<argument>${log.capi.invocation}</argument> <argument>${log.capi.invocation}</argument>
<argument>--use-cuda</argument> <argument>--use-cuda</argument>
<argument>${use.cuda}</argument> <argument>${use.cuda}</argument>
<argument>--use-hip</argument>
<argument>${use.hip}</argument>
</arguments> </arguments>
<workingDirectory>${user.dir}</workingDirectory> <workingDirectory>${user.dir}</workingDirectory>
</configuration> </configuration>

View File

@@ -2,7 +2,7 @@
// Created by bobwang on 2021/9/8. // Created by bobwang on 2021/9/8.
// //
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #ifndef XGBOOST_USE_CUDA
#include <jni.h> #include <jni.h>

View File

@@ -1,10 +1,6 @@
#include <jni.h> #include <jni.h>
#if defined(XGBOOST_USE_CUDA)
#include "../../../../src/common/device_helpers.cuh" #include "../../../../src/common/device_helpers.cuh"
#elif defined(XGBOOST_USE_HIP)
#include "../../../../src/common/device_helpers.hip.h"
#endif
#include "../../../../src/common/cuda_pinned_allocator.h" #include "../../../../src/common/cuda_pinned_allocator.h"
#include "../../../../src/data/array_interface.h" #include "../../../../src/data/array_interface.h"
#include "jvm_utils.h" #include "jvm_utils.h"

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "xgboost4j-gpu.cu"
#endif

View File

@@ -5,11 +5,11 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j-spark-gpu</name> <name>xgboost4j-spark-gpu</name>
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@@ -24,7 +24,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>

View File

@@ -5,11 +5,11 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j-spark</name> <name>xgboost4j-spark</name>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@@ -24,7 +24,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>

View File

@@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j</name> <name>xgboost4j</name>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>2.0.2</version> <version>2.0.3</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@@ -15,10 +15,6 @@ class BuildConfiguration: # pylint: disable=R0902
use_cuda: bool = False use_cuda: bool = False
# Whether to enable NCCL # Whether to enable NCCL
use_nccl: bool = False use_nccl: bool = False
# Whether to enablea HIP
use_hip: bool = False
# Whether to enable RCCL
use_rccl: bool = False
# Whether to enable HDFS # Whether to enable HDFS
use_hdfs: bool = False use_hdfs: bool = False
# Whether to enable Azure Storage # Whether to enable Azure Storage

View File

@@ -7,7 +7,7 @@ build-backend = "packager.pep517"
[project] [project]
name = "xgboost" name = "xgboost"
version = "2.0.2" version = "2.0.3"
authors = [ authors = [
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" }, { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" } { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }

View File

@@ -1 +1 @@
2.0.2 2.0.3

View File

@@ -206,6 +206,7 @@ def _load_lib() -> ctypes.CDLL:
lib = ctypes.cdll.LoadLibrary(lib_path) lib = ctypes.cdll.LoadLibrary(lib_path)
setattr(lib, "path", os.path.normpath(lib_path)) setattr(lib, "path", os.path.normpath(lib_path))
lib_success = True lib_success = True
break
except OSError as e: except OSError as e:
os_error_list.append(str(e)) os_error_list.append(str(e))
continue continue

View File

@@ -78,7 +78,6 @@ from .data import _is_cudf_ser, _is_cupy_array
from .sklearn import ( from .sklearn import (
XGBClassifier, XGBClassifier,
XGBClassifierBase, XGBClassifierBase,
XGBClassifierMixIn,
XGBModel, XGBModel,
XGBRanker, XGBRanker,
XGBRankerMixIn, XGBRankerMixIn,
@@ -1854,7 +1853,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
"Implementation of the scikit-learn API for XGBoost classification.", "Implementation of the scikit-learn API for XGBoost classification.",
["estimators", "model"], ["estimators", "model"],
) )
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBase): class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
# pylint: disable=missing-class-docstring # pylint: disable=missing-class-docstring
async def _fit_async( async def _fit_async(
self, self,
@@ -2036,10 +2035,6 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBa
preds = da.map_blocks(_argmax, pred_probs, drop_axis=1) preds = da.map_blocks(_argmax, pred_probs, drop_axis=1)
return preds return preds
def load_model(self, fname: ModelIn) -> None:
super().load_model(fname)
self._load_model_attributes(self.get_booster())
@xgboost_model_doc( @xgboost_model_doc(
"""Implementation of the Scikit-Learn API for XGBoost Ranking. """Implementation of the Scikit-Learn API for XGBoost Ranking.

View File

@@ -43,19 +43,6 @@ from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df
from .training import train from .training import train
class XGBClassifierMixIn: # pylint: disable=too-few-public-methods
"""MixIn for classification."""
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
def _load_model_attributes(self, booster: Booster) -> None:
config = json.loads(booster.save_config())
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
# binary classification is treated as regression in XGBoost.
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
class XGBRankerMixIn: # pylint: disable=too-few-public-methods class XGBRankerMixIn: # pylint: disable=too-few-public-methods
"""MixIn for ranking, defines the _estimator_type usually defined in scikit-learn """MixIn for ranking, defines the _estimator_type usually defined in scikit-learn
base classes. base classes.
@@ -845,21 +832,38 @@ class XGBModel(XGBModelBase):
self.get_booster().load_model(fname) self.get_booster().load_model(fname)
meta_str = self.get_booster().attr("scikit_learn") meta_str = self.get_booster().attr("scikit_learn")
if meta_str is None: if meta_str is not None:
return meta = json.loads(meta_str)
t = meta.get("_estimator_type", None)
if t is not None and t != self._get_type():
raise TypeError(
"Loading an estimator with different type. Expecting: "
f"{self._get_type()}, got: {t}"
)
meta = json.loads(meta_str)
t = meta.get("_estimator_type", None)
if t is not None and t != self._get_type():
raise TypeError(
"Loading an estimator with different type. Expecting: "
f"{self._get_type()}, got: {t}"
)
self.feature_types = self.get_booster().feature_types self.feature_types = self.get_booster().feature_types
self.get_booster().set_attr(scikit_learn=None) self.get_booster().set_attr(scikit_learn=None)
config = json.loads(self.get_booster().save_config())
self._load_model_attributes(config)
load_model.__doc__ = f"""{Booster.load_model.__doc__}""" load_model.__doc__ = f"""{Booster.load_model.__doc__}"""
def _load_model_attributes(self, config: dict) -> None:
"""Load model attributes without hyper-parameters."""
from sklearn.base import is_classifier
booster = self.get_booster()
self.objective = config["learner"]["objective"]["name"]
self.booster = config["learner"]["gradient_booster"]["name"]
self.base_score = config["learner"]["learner_model_param"]["base_score"]
self.feature_types = booster.feature_types
if is_classifier(self):
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
# binary classification is treated as regression in XGBoost.
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
# pylint: disable=too-many-branches # pylint: disable=too-many-branches
def _configure_fit( def _configure_fit(
self, self,
@@ -1409,7 +1413,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) ->
Number of boosting rounds. Number of boosting rounds.
""", """,
) )
class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase): class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
@_deprecate_positional_args @_deprecate_positional_args
def __init__( def __init__(
@@ -1637,10 +1641,6 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
def classes_(self) -> np.ndarray: def classes_(self) -> np.ndarray:
return np.arange(self.n_classes_) return np.arange(self.n_classes_)
def load_model(self, fname: ModelIn) -> None:
super().load_model(fname)
self._load_model_attributes(self.get_booster())
@xgboost_model_doc( @xgboost_model_doc(
"scikit-learn API for XGBoost random forest classification.", "scikit-learn API for XGBoost random forest classification.",

Submodule rocgputreeshap deleted from 2fea6734e8

View File

@@ -16,11 +16,6 @@ if (USE_CUDA)
target_sources(objxgboost PRIVATE ${CUDA_SOURCES}) target_sources(objxgboost PRIVATE ${CUDA_SOURCES})
endif (USE_CUDA) endif (USE_CUDA)
if (USE_HIP)
file(GLOB_RECURSE HIP_SOURCES *.hip *.hip.h)
target_sources(objxgboost PRIVATE ${HIP_SOURCES})
endif (USE_HIP)
target_include_directories(objxgboost target_include_directories(objxgboost
PRIVATE PRIVATE
${xgboost_SOURCE_DIR}/include ${xgboost_SOURCE_DIR}/include
@@ -38,7 +33,6 @@ msvc_use_static_runtime()
# This grouping organises source files nicely in visual studio # This grouping organises source files nicely in visual studio
auto_source_group("${CUDA_SOURCES}") auto_source_group("${CUDA_SOURCES}")
auto_source_group("${HIP_SOURCES}")
auto_source_group("${CPU_SOURCES}") auto_source_group("${CPU_SOURCES}")
#-- End object library #-- End object library

View File

@@ -70,14 +70,12 @@ XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) {
using GlobalConfigAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>; using GlobalConfigAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
namespace xgboost { namespace xgboost {
void XGBBuildInfoDevice(Json *p_info) { void XGBBuildInfoDevice(Json *p_info) {
auto &info = *p_info; auto &info = *p_info;
info["USE_CUDA"] = Boolean{false}; info["USE_CUDA"] = Boolean{false};
info["USE_NCCL"] = Boolean{false}; info["USE_NCCL"] = Boolean{false};
info["USE_HIP"] = Boolean{false};
info["USE_RCCL"] = Boolean{false};
info["USE_RMM"] = Boolean{false}; info["USE_RMM"] = Boolean{false};
} }
} // namespace xgboost } // namespace xgboost
@@ -281,7 +279,7 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
API_END(); API_END();
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #ifndef XGBOOST_USE_CUDA
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) { XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {
API_BEGIN(); API_BEGIN();
common::AssertGPUSupport(); common::AssertGPUSupport();
@@ -1191,7 +1189,7 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
API_END(); API_END();
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *, XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
DMatrixHandle, xgboost::bst_ulong const **, DMatrixHandle, xgboost::bst_ulong const **,
xgboost::bst_ulong *, const float **) { xgboost::bst_ulong *, const float **) {

View File

@@ -17,11 +17,7 @@ namespace xgboost {
void XGBBuildInfoDevice(Json *p_info) { void XGBBuildInfoDevice(Json *p_info) {
auto &info = *p_info; auto &info = *p_info;
#if defined(XGBOOST_USE_CUDA)
info["USE_CUDA"] = true; info["USE_CUDA"] = true;
#elif defined(XGBOOST_USE_HIP)
info["USE_HIP"] = true;
#endif
std::vector<Json> v{Json{Integer{THRUST_MAJOR_VERSION}}, Json{Integer{THRUST_MINOR_VERSION}}, std::vector<Json> v{Json{Integer{THRUST_MAJOR_VERSION}}, Json{Integer{THRUST_MINOR_VERSION}},
Json{Integer{THRUST_SUBMINOR_VERSION}}}; Json{Integer{THRUST_SUBMINOR_VERSION}}};
@@ -34,13 +30,8 @@ void XGBBuildInfoDevice(Json *p_info) {
info["USE_NCCL"] = Boolean{true}; info["USE_NCCL"] = Boolean{true};
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}}; v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
info["NCCL_VERSION"] = v; info["NCCL_VERSION"] = v;
#elif defined(XGBOOST_USE_RCCL)
info["USE_RCCL"] = Boolean{true};
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
info["RCCL_VERSION"] = v;
#else #else
info["USE_NCCL"] = Boolean{false}; info["USE_NCCL"] = Boolean{false};
info["USE_RCCL"] = Boolean{false};
#endif #endif
#if defined(XGBOOST_USE_RMM) #if defined(XGBOOST_USE_RMM)

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "c_api.cu"
#endif

View File

@@ -175,7 +175,7 @@ inline float GetMissing(Json const &config) {
// Safe guard some global variables from being changed by XGBoost. // Safe guard some global variables from being changed by XGBoost.
class XGBoostAPIGuard { class XGBoostAPIGuard {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA)
int32_t device_id_ {0}; int32_t device_id_ {0};
void SetGPUAttribute(); void SetGPUAttribute();

View File

@@ -51,7 +51,7 @@ void Communicator::Init(Json const& config) {
} }
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #ifndef XGBOOST_USE_CUDA
void Communicator::Finalize() { void Communicator::Finalize() {
communicator_->Shutdown(); communicator_->Shutdown();
communicator_.reset(new NoOpCommunicator()); communicator_.reset(new NoOpCommunicator());

View File

@@ -5,7 +5,7 @@
#include "device_communicator.cuh" #include "device_communicator.cuh"
#include "device_communicator_adapter.cuh" #include "device_communicator_adapter.cuh"
#include "noop_communicator.h" #include "noop_communicator.h"
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL) #ifdef XGBOOST_USE_NCCL
#include "nccl_device_communicator.cuh" #include "nccl_device_communicator.cuh"
#endif #endif
@@ -28,7 +28,7 @@ DeviceCommunicator* Communicator::GetDevice(int device_ordinal) {
communicator_->GetWorldSize() != old_world_size) { communicator_->GetWorldSize() != old_world_size) {
old_device_ordinal = device_ordinal; old_device_ordinal = device_ordinal;
old_world_size = communicator_->GetWorldSize(); old_world_size = communicator_->GetWorldSize();
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL) #ifdef XGBOOST_USE_NCCL
switch (type_) { switch (type_) {
case CommunicatorType::kRabit: case CommunicatorType::kRabit:
device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, false)); device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, false));

View File

@@ -98,7 +98,7 @@ class Communicator {
/** @brief Get the communicator instance. */ /** @brief Get the communicator instance. */
static Communicator *Get() { return communicator_.get(); } static Communicator *Get() { return communicator_.get(); }
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA)
/** /**
* @brief Get the device communicator. * @brief Get the device communicator.
* *
@@ -230,7 +230,7 @@ class Communicator {
static thread_local std::unique_ptr<Communicator> communicator_; static thread_local std::unique_ptr<Communicator> communicator_;
static thread_local CommunicatorType type_; static thread_local CommunicatorType type_;
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA)
static thread_local std::unique_ptr<DeviceCommunicator> device_communicator_; static thread_local std::unique_ptr<DeviceCommunicator> device_communicator_;
#endif #endif

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "communicator.cu"
#endif

View File

@@ -41,6 +41,7 @@ class DeviceCommunicatorAdapter : public DeviceCommunicator {
} }
dh::safe_cuda(cudaSetDevice(device_ordinal_)); dh::safe_cuda(cudaSetDevice(device_ordinal_));
segments->clear(); segments->clear();
segments->resize(world_size_, 0); segments->resize(world_size_, 0);
segments->at(rank_) = length_bytes; segments->at(rank_) = length_bytes;
@@ -59,7 +60,6 @@ class DeviceCommunicatorAdapter : public DeviceCommunicator {
Broadcast(host_buffer_.data() + offset, as_bytes, i); Broadcast(host_buffer_.data() + offset, as_bytes, i);
offset += as_bytes; offset += as_bytes;
} }
dh::safe_cuda(cudaMemcpy(receive_buffer->data().get(), host_buffer_.data(), total_bytes, dh::safe_cuda(cudaMemcpy(receive_buffer->data().get(), host_buffer_.data(), total_bytes,
cudaMemcpyDefault)); cudaMemcpyDefault));
} }

View File

@@ -1,7 +1,7 @@
/*! /*!
* Copyright 2023 XGBoost contributors * Copyright 2023 XGBoost contributors
*/ */
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL) #if defined(XGBOOST_USE_NCCL)
#include "nccl_device_communicator.cuh" #include "nccl_device_communicator.cuh"
namespace xgboost { namespace xgboost {

View File

@@ -35,22 +35,12 @@ class NcclDeviceCommunicator : public DeviceCommunicator {
private: private:
static constexpr std::size_t kUuidLength = static constexpr std::size_t kUuidLength =
#if defined(XGBOOST_USE_CUDA)
sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t); sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
#elif defined(XGBOOST_USE_HIP)
sizeof(hipUUID) / sizeof(uint64_t);
#endif
void GetCudaUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) const { void GetCudaUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) const {
#if defined(XGBOOST_USE_CUDA)
cudaDeviceProp prob{}; cudaDeviceProp prob{};
dh::safe_cuda(cudaGetDeviceProperties(&prob, device_ordinal_)); dh::safe_cuda(cudaGetDeviceProperties(&prob, device_ordinal_));
std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid)); std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
#elif defined(XGBOOST_USE_HIP)
hipUUID id;
hipDeviceGetUuid(&id, device_ordinal_);
std::memcpy(uuid.data(), static_cast<void *>(&id), sizeof(id));
#endif
} }
static std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) { static std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) {

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "nccl_device_communicator.cu"
#endif

View File

@@ -10,13 +10,7 @@
#include <cstddef> // size_t #include <cstddef> // size_t
#include <cstdint> // int32_t #include <cstdint> // int32_t
#if defined(XGBOOST_USE_HIP)
#include <hipcub/hipcub.hpp>
#elif defined(XGBOOST_USE_CUDA)
#include <cub/cub.cuh> // DispatchSegmentedRadixSort,NullType,DoubleBuffer #include <cub/cub.cuh> // DispatchSegmentedRadixSort,NullType,DoubleBuffer
#endif
#include <iterator> // distance #include <iterator> // distance
#include <limits> // numeric_limits #include <limits> // numeric_limits
#include <type_traits> // conditional_t,remove_const_t #include <type_traits> // conditional_t,remove_const_t
@@ -45,7 +39,6 @@ static void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_st
using OffsetT = int; using OffsetT = int;
// Null value type // Null value type
#if defined(XGBOOST_USE_CUDA)
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out); cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
cub::DoubleBuffer<cub::NullType> d_values; cub::DoubleBuffer<cub::NullType> d_values;
@@ -54,20 +47,6 @@ static void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_st
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,
num_segments, d_begin_offsets, d_end_offsets, begin_bit, num_segments, d_begin_offsets, d_end_offsets, begin_bit,
end_bit, false, ctx->Stream(), debug_synchronous))); end_bit, false, ctx->Stream(), debug_synchronous)));
#elif defined(XGBOOST_USE_HIP)
if (IS_DESCENDING) {
rocprim::segmented_radix_sort_pairs_desc<KeyT, hipcub::NullType, BeginOffsetIteratorT>(d_temp_storage,
temp_storage_bytes, d_keys_in, d_keys_out, nullptr, nullptr, num_items,
num_segments, d_begin_offsets, d_end_offsets,
begin_bit, end_bit, ctx->Stream(), debug_synchronous);
}
else {
rocprim::segmented_radix_sort_pairs<KeyT, hipcub::NullType, BeginOffsetIteratorT>(d_temp_storage,
temp_storage_bytes, d_keys_in, d_keys_out, nullptr, nullptr, num_items,
num_segments, d_begin_offsets, d_end_offsets,
begin_bit, end_bit, ctx->Stream(), debug_synchronous);
}
#endif
} }
// Wrapper around cub sort for easier `descending` sort. // Wrapper around cub sort for easier `descending` sort.
@@ -81,18 +60,14 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
BeginOffsetIteratorT d_begin_offsets, BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets, dh::CUDAStreamView stream, EndOffsetIteratorT d_end_offsets, dh::CUDAStreamView stream,
int begin_bit = 0, int end_bit = sizeof(KeyT) * 8) { int begin_bit = 0, int end_bit = sizeof(KeyT) * 8) {
#if defined(XGBOOST_USE_CUDA)
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out); cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in), d_values_out); cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in), d_values_out);
#endif
// In old version of cub, num_items in dispatch is also int32_t, no way to change. // In old version of cub, num_items in dispatch is also int32_t, no way to change.
using OffsetT = std::conditional_t<dh::BuildWithCUDACub() && dh::HasThrustMinorVer<13>(), using OffsetT = std::conditional_t<dh::BuildWithCUDACub() && dh::HasThrustMinorVer<13>(),
std::size_t, std::int32_t>; std::size_t, std::int32_t>;
CHECK_LE(num_items, std::numeric_limits<OffsetT>::max()); CHECK_LE(num_items, std::numeric_limits<OffsetT>::max());
// For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation // For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation
#if defined(XGBOOST_USE_CUDA)
#if THRUST_MAJOR_VERSION >= 2 #if THRUST_MAJOR_VERSION >= 2
dh::safe_cuda((cub::DispatchSegmentedRadixSort< dh::safe_cuda((cub::DispatchSegmentedRadixSort<
descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT, descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
@@ -113,18 +88,6 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
d_begin_offsets, d_end_offsets, begin_bit, d_begin_offsets, d_end_offsets, begin_bit,
end_bit, false, stream, false))); end_bit, false, stream, false)));
#endif #endif
#elif defined(XGBOOST_USE_HIP)
if (descending) {
rocprim::segmented_radix_sort_pairs_desc(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out,
d_values_in, d_values_out, num_items, num_segments,
d_begin_offsets, d_end_offsets, begin_bit, end_bit, stream, false);
}
else {
rocprim::segmented_radix_sort_pairs(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out,
d_values_in, d_values_out, num_items, num_segments, d_begin_offsets, d_end_offsets,
begin_bit, end_bit, stream, false);
}
#endif
} }
} // namespace detail } // namespace detail

View File

@@ -17,18 +17,14 @@
#include <thrust/copy.h> #include <thrust/copy.h>
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
#include "device_helpers.cuh" #include "device_helpers.cuh"
#elif defined(__HIPCC__) #endif // defined(__CUDACC__)
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
#include "device_helpers.hip.h"
#endif // defined(__CUDACC__) || defined(__HIPCC__)
#include "xgboost/span.h" #include "xgboost/span.h"
#include "common.h" #include "common.h"
namespace xgboost { namespace xgboost {
#if defined(__CUDACC__) || defined(__HIPCC__) #if defined(__CUDACC__)
using BitFieldAtomicType = unsigned long long; // NOLINT using BitFieldAtomicType = unsigned long long; // NOLINT
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address, __forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
@@ -52,7 +48,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
return old; return old;
} }
#endif // defined(__CUDACC__) || defined(__HIPCC__) #endif // defined(__CUDACC__)
/** /**
* @brief A non-owning type with auxiliary methods defined for manipulating bits. * @brief A non-owning type with auxiliary methods defined for manipulating bits.
@@ -110,7 +106,7 @@ struct BitFieldContainer {
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) { XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
return common::DivRoundUp(size, kValueSize); return common::DivRoundUp(size, kValueSize);
} }
#if defined(__CUDA_ARCH__) || defined(__HIPCC__) #if defined(__CUDA_ARCH__)
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) { __device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto tid = blockIdx.x * blockDim.x + threadIdx.x;
size_t min_size = min(NumValues(), rhs.NumValues()); size_t min_size = min(NumValues(), rhs.NumValues());
@@ -127,9 +123,9 @@ struct BitFieldContainer {
} }
return *this; return *this;
} }
#endif // #if defined(__CUDA_ARCH__) || defined(__HIPCC__) #endif // #if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__) #if defined(__CUDA_ARCH__)
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) { __device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
size_t min_size = min(NumValues(), rhs.NumValues()); size_t min_size = min(NumValues(), rhs.NumValues());
auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto tid = blockIdx.x * blockDim.x + threadIdx.x;
@@ -148,7 +144,7 @@ struct BitFieldContainer {
} }
#endif // defined(__CUDA_ARCH__) #endif // defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__) #if defined(__CUDA_ARCH__)
__device__ auto Set(index_type pos) noexcept(true) { __device__ auto Set(index_type pos) noexcept(true) {
Pos pos_v = Direction::Shift(ToBitPos(pos)); Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = Data()[pos_v.int_pos]; value_type& value = Data()[pos_v.int_pos];
@@ -163,14 +159,6 @@ struct BitFieldContainer {
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type; using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit); atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
} }
#ifdef __HIPCC__
void Clear(index_type pos) noexcept(true) {
Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = Data()[pos_v.int_pos];
value_type clear_bit = ~(kOne << pos_v.bit_pos);
value &= clear_bit;
}
#endif
#else #else
void Set(index_type pos) noexcept(true) { void Set(index_type pos) noexcept(true) {
Pos pos_v = Direction::Shift(ToBitPos(pos)); Pos pos_v = Direction::Shift(ToBitPos(pos));
@@ -184,7 +172,7 @@ struct BitFieldContainer {
value_type clear_bit = ~(kOne << pos_v.bit_pos); value_type clear_bit = ~(kOne << pos_v.bit_pos);
value &= clear_bit; value &= clear_bit;
} }
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__) #endif // defined(__CUDA_ARCH__)
XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) { XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) {
pos_v = Direction::Shift(pos_v); pos_v = Direction::Shift(pos_v);

View File

@@ -55,7 +55,7 @@ void EscapeU8(std::string const &string, std::string *p_buffer) {
} }
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
int AllVisibleGPUs() { return 0; } int AllVisibleGPUs() { return 0; }
#endif // !defined(XGBOOST_USE_CUDA) #endif // !defined(XGBOOST_USE_CUDA)

View File

@@ -2,7 +2,6 @@
* Copyright 2018-2022 XGBoost contributors * Copyright 2018-2022 XGBoost contributors
*/ */
#include "common.h" #include "common.h"
#include "cuda_to_hip.h"
namespace xgboost { namespace xgboost {
namespace common { namespace common {

View File

@@ -26,12 +26,6 @@
#define WITH_CUDA() true #define WITH_CUDA() true
#elif defined(__HIPCC__)
#include <thrust/system/hip/error.h>
#include <thrust/system_error.h>
#define WITH_CUDA() true
#else #else
#define WITH_CUDA() false #define WITH_CUDA() false
@@ -45,8 +39,8 @@ namespace dh {
*/ */
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__) #define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, int line) inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
{ int line) {
if (code != cudaSuccess) { if (code != cudaSuccess) {
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(), LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
std::string{file} + ": " + // NOLINT std::string{file} + ": " + // NOLINT
@@ -54,23 +48,7 @@ inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, int line
} }
return code; return code;
} }
#endif // defined(__CUDACC__)
#elif defined(__HIPCC__)
/*
* Error handling functions
*/
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
inline hipError_t ThrowOnCudaError(hipError_t code, const char *file, int line)
{
if (code != hipSuccess) {
LOG(FATAL) << thrust::system_error(code, thrust::hip_category(),
std::string{file} + ": " + // NOLINT
std::to_string(line)).what();
}
return code;
}
#endif
} // namespace dh } // namespace dh
namespace xgboost::common { namespace xgboost::common {
@@ -181,9 +159,9 @@ class Range {
int AllVisibleGPUs(); int AllVisibleGPUs();
inline void AssertGPUSupport() { inline void AssertGPUSupport() {
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #ifndef XGBOOST_USE_CUDA
LOG(FATAL) << "XGBoost version not compiled with GPU support."; LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA && XGBOOST_USE_HIP #endif // XGBOOST_USE_CUDA
} }
inline void AssertOneAPISupport() { inline void AssertOneAPISupport() {
@@ -194,7 +172,7 @@ inline void AssertOneAPISupport() {
void SetDevice(std::int32_t device); void SetDevice(std::int32_t device);
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
inline void SetDevice(std::int32_t device) { inline void SetDevice(std::int32_t device) {
if (device >= 0) { if (device >= 0) {
AssertGPUSupport(); AssertGPUSupport();

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "common.cu"
#endif

View File

@@ -11,11 +11,9 @@
#include "common.h" #include "common.h"
#if defined(__CUDACC__) #ifdef __CUDACC__
#include "device_helpers.cuh" #include "device_helpers.cuh"
#elif defined(__HIPCC__) #endif // __CUDACC__
#include "device_helpers.hip.h"
#endif // __CUDACC__ || __HIPCC__
namespace xgboost { namespace xgboost {
namespace common { namespace common {
@@ -107,7 +105,7 @@ class CompressedBufferWriter {
} }
} }
#if defined(__CUDACC__) || defined(__HIPCC__) #ifdef __CUDACC__
__device__ void AtomicWriteSymbol __device__ void AtomicWriteSymbol
(CompressedByteT* buffer, uint64_t symbol, size_t offset) { (CompressedByteT* buffer, uint64_t symbol, size_t offset) {
size_t ibit_start = offset * symbol_bits_; size_t ibit_start = offset * symbol_bits_;
@@ -121,7 +119,7 @@ class CompressedBufferWriter {
symbol >>= 8; symbol >>= 8;
} }
} }
#endif // __CUDACC__ || __HIPCC__ #endif // __CUDACC__
template <typename IterT> template <typename IterT>
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) { void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {

View File

@@ -4,6 +4,7 @@
#ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_ #ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_
#define XGBOOST_COMMON_CUDA_CONTEXT_CUH_ #define XGBOOST_COMMON_CUDA_CONTEXT_CUH_
#include <thrust/execution_policy.h> #include <thrust/execution_policy.h>
#include "device_helpers.cuh" #include "device_helpers.cuh"
namespace xgboost { namespace xgboost {

View File

@@ -72,23 +72,11 @@ class pinned_allocator {
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
pointer result(nullptr); pointer result(nullptr);
#if defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipHostMalloc(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
#else
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type))); dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
#endif
return result; return result;
} }
inline void deallocate(pointer p, size_type) { inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
#if defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipHostFree(p));
#else
dh::safe_cuda(cudaFreeHost(p));
#endif
} // NOLINT
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT

View File

@@ -1,71 +0,0 @@
/**
* Copyright 2017-2023 XGBoost contributors
*/
#pragma once
#if defined(XGBOOST_USE_HIP)
#define cudaSuccess hipSuccess
#define cudaGetLastError hipGetLastError
#define cudaStream_t hipStream_t
#define cudaStreamCreate hipStreamCreate
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
#define cudaStreamDestroy hipStreamDestroy
#define cudaStreamWaitEvent hipStreamWaitEvent
#define cudaStreamSynchronize hipStreamSynchronize
#define cudaStreamPerThread hipStreamPerThread
#define cudaStreamLegacy hipStreamLegacy
#define cudaEvent_t hipEvent_t
#define cudaEventCreate hipEventCreate
#define cudaEventCreateWithFlags hipEventCreateWithFlags
#define cudaEventDestroy hipEventDestroy
#define cudaGetDevice hipGetDevice
#define cudaSetDevice hipSetDevice
#define cudaGetDeviceCount hipGetDeviceCount
#define cudaDeviceSynchronize hipDeviceSynchronize
#define cudaGetDeviceProperties hipGetDeviceProperties
#define cudaDeviceGetAttribute hipDeviceGetAttribute
#define cudaMallocHost hipMallocHost
#define cudaFreeHost hipFreeHost
#define cudaMalloc hipMalloc
#define cudaFree hipFree
#define cudaMemcpy hipMemcpy
#define cudaMemcpyAsync hipMemcpyAsync
#define cudaMemcpyDefault hipMemcpyDefault
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemcpyHostToHost hipMemcpyHostToHost
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
#define cudaMemsetAsync hipMemsetAsync
#define cudaMemset hipMemset
#define cudaPointerAttributes hipPointerAttribute_t
#define cudaPointerGetAttributes hipPointerGetAttributes
#define cudaMemGetInfo hipMemGetInfo
#define cudaFuncSetAttribute hipFuncSetAttribute
#define cudaDevAttrMultiProcessorCount hipDeviceAttributeMultiprocessorCount
#define cudaOccupancyMaxActiveBlocksPerMultiprocessor hipOccupancyMaxActiveBlocksPerMultiprocessor
namespace thrust {
namespace hip {
}
}
namespace thrust {
namespace cuda = thrust::hip;
}
namespace hipcub {
}
namespace cub = hipcub;
#endif

View File

@@ -2,8 +2,6 @@
* Copyright 2017-2023 XGBoost contributors * Copyright 2017-2023 XGBoost contributors
*/ */
#pragma once #pragma once
#if defined(XGBOOST_USE_CUDA)
#include <thrust/binary_search.h> // thrust::upper_bound #include <thrust/binary_search.h> // thrust::upper_bound
#include <thrust/device_malloc_allocator.h> #include <thrust/device_malloc_allocator.h>
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
@@ -1220,7 +1218,3 @@ class LDGIterator {
} }
}; };
} // namespace dh } // namespace dh
#elif defined(XGBOOST_USE_HIP)
#include "device_helpers.hip.h"
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -84,19 +84,10 @@ __global__ void GetColumnSizeSharedMemKernel(IterSpan<BatchIt> batch_iter,
template <std::uint32_t kBlockThreads, typename Kernel> template <std::uint32_t kBlockThreads, typename Kernel>
std::uint32_t EstimateGridSize(std::int32_t device, Kernel kernel, std::size_t shared_mem) { std::uint32_t EstimateGridSize(std::int32_t device, Kernel kernel, std::size_t shared_mem) {
int n_mps = 0; int n_mps = 0;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device)); dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipDeviceGetAttribute(&n_mps, hipDeviceAttributeMultiprocessorCount, device));
#endif
int n_blocks_per_mp = 0; int n_blocks_per_mp = 0;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel, dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
kBlockThreads, shared_mem)); kBlockThreads, shared_mem));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
kBlockThreads, shared_mem));
#endif
std::uint32_t grid_size = n_blocks_per_mp * n_mps; std::uint32_t grid_size = n_blocks_per_mp * n_mps;
return grid_size; return grid_size;
} }
@@ -179,7 +170,6 @@ void GetColumnSizesScan(int device, size_t num_columns, std::size_t num_cuts_per
column_sizes_scan->begin(), [=] __device__(size_t column_size) { column_sizes_scan->begin(), [=] __device__(size_t column_size) {
return thrust::min(num_cuts_per_feature, column_size); return thrust::min(num_cuts_per_feature, column_size);
}); });
thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it, thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer()); cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer());
thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(), thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(),
@@ -304,7 +294,6 @@ void ProcessSlidingWindow(AdapterBatch const &batch, MetaInfo const &info,
&column_sizes_scan, &column_sizes_scan,
&sorted_entries); &sorted_entries);
dh::XGBDeviceAllocator<char> alloc; dh::XGBDeviceAllocator<char> alloc;
thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(), thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
sorted_entries.end(), detail::EntryCompareOp()); sorted_entries.end(), detail::EntryCompareOp());
@@ -364,13 +353,11 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
bst_group_t group_idx = dh::SegmentId(d_group_ptr, ridx); bst_group_t group_idx = dh::SegmentId(d_group_ptr, ridx);
return weights[group_idx]; return weights[group_idx];
}); });
auto retit = thrust::copy_if(thrust::cuda::par(alloc), auto retit = thrust::copy_if(thrust::cuda::par(alloc),
weight_iter + begin, weight_iter + end, weight_iter + begin, weight_iter + end,
batch_iter + begin, batch_iter + begin,
d_temp_weights.data(), // output d_temp_weights.data(), // output
is_valid); is_valid);
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size()); CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
} else { } else {
CHECK_EQ(batch.NumRows(), weights.size()); CHECK_EQ(batch.NumRows(), weights.size());
@@ -379,13 +366,11 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
[=]__device__(size_t idx) -> float { [=]__device__(size_t idx) -> float {
return weights[batch.GetElement(idx).row_idx]; return weights[batch.GetElement(idx).row_idx];
}); });
auto retit = thrust::copy_if(thrust::cuda::par(alloc), auto retit = thrust::copy_if(thrust::cuda::par(alloc),
weight_iter + begin, weight_iter + end, weight_iter + begin, weight_iter + end,
batch_iter + begin, batch_iter + begin,
d_temp_weights.data(), // output d_temp_weights.data(), // output
is_valid); is_valid);
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size()); CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
} }

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "hist_util.cu"
#endif

View File

@@ -1,7 +1,7 @@
/** /**
* Copyright 2017-2023 by XGBoost contributors * Copyright 2017-2023 by XGBoost contributors
*/ */
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #ifndef XGBOOST_USE_CUDA
// dummy implementation of HostDeviceVector in case CUDA is not used // dummy implementation of HostDeviceVector in case CUDA is not used
@@ -199,4 +199,4 @@ template class HostDeviceVector<std::size_t>;
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_USE_CUDA && !defined(XGBOOST_USE_HIP) #endif // XGBOOST_USE_CUDA

View File

@@ -139,7 +139,6 @@ class HostDeviceVectorImpl {
auto ptr = other->ConstDevicePointer(); auto ptr = other->ConstDevicePointer();
SetDevice(); SetDevice();
CHECK_EQ(this->DeviceIdx(), other->DeviceIdx()); CHECK_EQ(this->DeviceIdx(), other->DeviceIdx());
dh::safe_cuda(cudaMemcpyAsync(this->DevicePointer() + ori_size, dh::safe_cuda(cudaMemcpyAsync(this->DevicePointer() + ori_size,
ptr, ptr,
other->Size() * sizeof(T), other->Size() * sizeof(T),
@@ -196,7 +195,6 @@ class HostDeviceVectorImpl {
gpu_access_ = access; gpu_access_ = access;
if (data_h_.size() != data_d_->size()) { data_h_.resize(data_d_->size()); } if (data_h_.size() != data_d_->size()) { data_h_.resize(data_d_->size()); }
SetDevice(); SetDevice();
dh::safe_cuda(cudaMemcpy(data_h_.data(), dh::safe_cuda(cudaMemcpy(data_h_.data(),
data_d_->data().get(), data_d_->data().get(),
data_d_->size() * sizeof(T), data_d_->size() * sizeof(T),
@@ -213,7 +211,6 @@ class HostDeviceVectorImpl {
// data is on the host // data is on the host
LazyResizeDevice(data_h_.size()); LazyResizeDevice(data_h_.size());
SetDevice(); SetDevice();
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(),
data_h_.data(), data_h_.data(),
data_d_->size() * sizeof(T), data_d_->size() * sizeof(T),
@@ -242,7 +239,6 @@ class HostDeviceVectorImpl {
LazyResizeDevice(Size()); LazyResizeDevice(Size());
gpu_access_ = GPUAccess::kWrite; gpu_access_ = GPUAccess::kWrite;
SetDevice(); SetDevice();
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), other->data_d_->data().get(), dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), other->data_d_->data().get(),
data_d_->size() * sizeof(T), cudaMemcpyDefault)); data_d_->size() * sizeof(T), cudaMemcpyDefault));
} }
@@ -252,7 +248,6 @@ class HostDeviceVectorImpl {
LazyResizeDevice(Size()); LazyResizeDevice(Size());
gpu_access_ = GPUAccess::kWrite; gpu_access_ = GPUAccess::kWrite;
SetDevice(); SetDevice();
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), begin, dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), begin,
data_d_->size() * sizeof(T), cudaMemcpyDefault)); data_d_->size() * sizeof(T), cudaMemcpyDefault));
} }

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "host_device_vector.cu"
#endif

View File

@@ -12,8 +12,7 @@
namespace xgboost { namespace xgboost {
namespace linalg { namespace linalg {
template <typename T, int32_t D, typename Fn> template <typename T, int32_t D, typename Fn>
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
{
dh::safe_cuda(cudaSetDevice(t.DeviceIdx())); dh::safe_cuda(cudaSetDevice(t.DeviceIdx()));
static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value, static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
"For function with return, use transform instead."); "For function with return, use transform instead.");
@@ -29,8 +28,7 @@ void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s
} }
template <typename T, int32_t D, typename Fn> template <typename T, int32_t D, typename Fn>
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
{
if (t.Contiguous()) { if (t.Contiguous()) {
auto ptr = t.Values().data(); auto ptr = t.Values().data();
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); }); dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); });

View File

@@ -42,7 +42,7 @@ void ElementWiseKernelHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&& f
} }
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
template <typename T, int32_t D, typename Fn> template <typename T, int32_t D, typename Fn>
void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) { void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
common::AssertGPUSupport(); common::AssertGPUSupport();
@@ -60,7 +60,7 @@ void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn)
} }
ElementWiseKernelHost(t, ctx->Threads(), fn); ElementWiseKernelHost(t, ctx->Threads(), fn);
} }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // !defined(XGBOOST_USE_CUDA)
template <typename T, std::int32_t kDim> template <typename T, std::int32_t kDim>
auto cbegin(TensorView<T, kDim> const& v) { // NOLINT auto cbegin(TensorView<T, kDim> const& v) { // NOLINT

View File

@@ -143,7 +143,7 @@ CheckNAN(T) {
return false; return false;
} }
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIPCC__) #if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
bool CheckNAN(double v); bool CheckNAN(double v);
@@ -152,27 +152,23 @@ bool CheckNAN(double v);
XGBOOST_DEVICE bool inline CheckNAN(float x) { XGBOOST_DEVICE bool inline CheckNAN(float x) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
return isnan(x); return isnan(x);
#elif defined(__HIPCC__)
return __builtin_isnan(x);
#else #else
return std::isnan(x); return std::isnan(x);
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__) #endif // defined(__CUDA_ARCH__)
} }
XGBOOST_DEVICE bool inline CheckNAN(double x) { XGBOOST_DEVICE bool inline CheckNAN(double x) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
return isnan(x); return isnan(x);
#elif defined(__HIPCC__)
return __builtin_isnan(x);
#else #else
return std::isnan(x); return std::isnan(x);
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__) #endif // defined(__CUDA_ARCH__)
} }
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) #endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
// GPU version is not uploaded in CRAN anyway. // GPU version is not uploaded in CRAN anyway.
// Specialize only when using R with CPU. // Specialize only when using R with CPU.
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
double LogGamma(double v); double LogGamma(double v);
#else // Not R or R with GPU. #else // Not R or R with GPU.
@@ -195,7 +191,7 @@ XGBOOST_DEVICE inline T LogGamma(T v) {
#endif // _MSC_VER #endif // _MSC_VER
} }
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
} // namespace common } // namespace common
} // namespace xgboost } // namespace xgboost

View File

@@ -15,7 +15,6 @@ double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
values.SetDevice(ctx->gpu_id); values.SetDevice(ctx->gpu_id);
auto const d_values = values.ConstDeviceSpan(); auto const d_values = values.ConstDeviceSpan();
dh::XGBCachingDeviceAllocator<char> alloc; dh::XGBCachingDeviceAllocator<char> alloc;
return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0, return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
thrust::plus<float>{}); thrust::plus<float>{});
} }

View File

@@ -99,12 +99,12 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
namespace cuda_impl { namespace cuda_impl {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values); double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
inline double Reduce(Context const*, HostDeviceVector<float> const&) { inline double Reduce(Context const*, HostDeviceVector<float> const&) {
AssertGPUSupport(); AssertGPUSupport();
return 0; return 0;
} }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda_impl } // namespace cuda_impl
/** /**

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "numeric.cu"
#endif

View File

@@ -109,7 +109,6 @@ template <typename T, typename U>
void CopyTo(Span<T> out, Span<U> src) { void CopyTo(Span<T> out, Span<U> src) {
CHECK_EQ(out.size(), src.size()); CHECK_EQ(out.size(), src.size());
static_assert(std::is_same<std::remove_cv_t<T>, std::remove_cv_t<T>>::value); static_assert(std::is_same<std::remove_cv_t<T>, std::remove_cv_t<T>>::value);
dh::safe_cuda(cudaMemcpyAsync(out.data(), src.data(), dh::safe_cuda(cudaMemcpyAsync(out.data(), src.data(),
out.size_bytes(), out.size_bytes(),
cudaMemcpyDefault)); cudaMemcpyDefault));
@@ -163,7 +162,6 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
// Compute output ptr // Compute output ptr
auto transform_it = auto transform_it =
thrust::make_zip_iterator(thrust::make_tuple(x_ptr.data(), y_ptr.data())); thrust::make_zip_iterator(thrust::make_tuple(x_ptr.data(), y_ptr.data()));
thrust::transform( thrust::transform(
thrust::cuda::par(alloc), transform_it, transform_it + x_ptr.size(), thrust::cuda::par(alloc), transform_it, transform_it + x_ptr.size(),
out_ptr.data(), out_ptr.data(),
@@ -213,7 +211,6 @@ void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y, Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) { Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
dh::safe_cuda(cudaSetDevice(device)); dh::safe_cuda(cudaSetDevice(device));
CHECK_EQ(d_x.size() + d_y.size(), out.size()); CHECK_EQ(d_x.size() + d_y.size(), out.size());
CHECK_EQ(x_ptr.size(), out_ptr.size()); CHECK_EQ(x_ptr.size(), out_ptr.size());
CHECK_EQ(y_ptr.size(), out_ptr.size()); CHECK_EQ(y_ptr.size(), out_ptr.size());
@@ -312,7 +309,6 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
common::Span<OffsetT> cuts_ptr, common::Span<OffsetT> cuts_ptr,
size_t total_cuts, Span<float> weights) { size_t total_cuts, Span<float> weights) {
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
Span<SketchEntry> out; Span<SketchEntry> out;
dh::device_vector<SketchEntry> cuts; dh::device_vector<SketchEntry> cuts;
bool first_window = this->Current().empty(); bool first_window = this->Current().empty();
@@ -382,7 +378,6 @@ size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_col
}); });
// Reverse scan to accumulate weights into first duplicated element on left. // Reverse scan to accumulate weights into first duplicated element on left.
auto val_it = thrust::make_reverse_iterator(dh::tend(entries)); auto val_it = thrust::make_reverse_iterator(dh::tend(entries));
thrust::inclusive_scan_by_key( thrust::inclusive_scan_by_key(
thrust::cuda::par(alloc), key_it, key_it + entries.size(), thrust::cuda::par(alloc), key_it, key_it + entries.size(),
val_it, val_it, val_it, val_it,
@@ -448,7 +443,6 @@ void SketchContainer::Prune(size_t to) {
void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr, void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
Span<SketchEntry const> that) { Span<SketchEntry const> that) {
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
timer_.Start(__func__); timer_.Start(__func__);
if (this->Current().size() == 0) { if (this->Current().size() == 0) {
CHECK_EQ(this->columns_ptr_.HostVector().back(), 0); CHECK_EQ(this->columns_ptr_.HostVector().back(), 0);
@@ -484,7 +478,6 @@ void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
void SketchContainer::FixError() { void SketchContainer::FixError() {
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
auto d_columns_ptr = this->columns_ptr_.ConstDeviceSpan(); auto d_columns_ptr = this->columns_ptr_.ConstDeviceSpan();
auto in = dh::ToSpan(this->Current()); auto in = dh::ToSpan(this->Current());
dh::LaunchN(in.size(), [=] __device__(size_t idx) { dh::LaunchN(in.size(), [=] __device__(size_t idx) {
@@ -642,12 +635,10 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts, bool is_column_split) {
CHECK_EQ(num_columns_, d_in_columns_ptr.size() - 1); CHECK_EQ(num_columns_, d_in_columns_ptr.size() - 1);
max_values.resize(d_in_columns_ptr.size() - 1); max_values.resize(d_in_columns_ptr.size() - 1);
dh::caching_device_vector<SketchEntry> d_max_values(d_in_columns_ptr.size() - 1); dh::caching_device_vector<SketchEntry> d_max_values(d_in_columns_ptr.size() - 1);
thrust::reduce_by_key(thrust::cuda::par(alloc), key_it, key_it + in_cut_values.size(), val_it, thrust::reduce_by_key(thrust::cuda::par(alloc), key_it, key_it + in_cut_values.size(), val_it,
thrust::make_discard_iterator(), d_max_values.begin(), thrust::make_discard_iterator(), d_max_values.begin(),
thrust::equal_to<bst_feature_t>{}, thrust::equal_to<bst_feature_t>{},
[] __device__(auto l, auto r) { return l.value > r.value ? l : r; }); [] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values)); dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values));
auto max_it = MakeIndexTransformIter([&](auto i) { auto max_it = MakeIndexTransformIter([&](auto i) {
if (IsCat(h_feature_types, i)) { if (IsCat(h_feature_types, i)) {

View File

@@ -175,7 +175,6 @@ class SketchContainer {
template <typename KeyComp = thrust::equal_to<size_t>> template <typename KeyComp = thrust::equal_to<size_t>>
size_t Unique(KeyComp key_comp = thrust::equal_to<size_t>{}) { size_t Unique(KeyComp key_comp = thrust::equal_to<size_t>{}) {
timer_.Start(__func__); timer_.Start(__func__);
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
this->columns_ptr_.SetDevice(device_); this->columns_ptr_.SetDevice(device_);
Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan(); Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan();
@@ -187,13 +186,11 @@ class SketchContainer {
dh::XGBCachingDeviceAllocator<char> alloc; dh::XGBCachingDeviceAllocator<char> alloc;
d_column_scan = this->columns_ptr_.DeviceSpan(); d_column_scan = this->columns_ptr_.DeviceSpan();
size_t n_uniques = dh::SegmentedUnique( size_t n_uniques = dh::SegmentedUnique(
thrust::cuda::par(alloc), d_column_scan.data(), thrust::cuda::par(alloc), d_column_scan.data(),
d_column_scan.data() + d_column_scan.size(), entries.data(), d_column_scan.data() + d_column_scan.size(), entries.data(),
entries.data() + entries.size(), scan_out.DevicePointer(), entries.data() + entries.size(), scan_out.DevicePointer(),
entries.data(), detail::SketchUnique{}, key_comp); entries.data(), detail::SketchUnique{}, key_comp);
this->columns_ptr_.Copy(scan_out); this->columns_ptr_.Copy(scan_out);
CHECK(!this->columns_ptr_.HostCanRead()); CHECK(!this->columns_ptr_.HostCanRead());

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "quantile.cu"
#endif

View File

@@ -62,7 +62,7 @@ common::Span<std::size_t const> RankingCache::MakeRankOnCPU(Context const* ctx,
return rank; return rank;
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); } void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*, common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,
common::Span<float const>) { common::Span<float const>) {
@@ -108,9 +108,9 @@ void NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
}); });
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); } void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // !defined(XGBOOST_USE_CUDA)
DMLC_REGISTER_PARAMETER(LambdaRankParam); DMLC_REGISTER_PARAMETER(LambdaRankParam);
@@ -120,7 +120,7 @@ void PreCache::InitOnCPU(Context const*, MetaInfo const& info) {
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); }); [](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
void PreCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); } void PreCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA) #endif // !defined(XGBOOST_USE_CUDA)
@@ -130,9 +130,9 @@ void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); }); [](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); } void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // !defined(XGBOOST_USE_CUDA)
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) { std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
std::string out_name; std::string out_name;

View File

@@ -23,10 +23,6 @@
#include "xgboost/logging.h" // for CHECK #include "xgboost/logging.h" // for CHECK
#include "xgboost/span.h" // for Span #include "xgboost/span.h" // for Span
#if defined(XGBOOST_USE_HIP)
#include <hipcub/hipcub.hpp>
#endif
namespace xgboost::ltr { namespace xgboost::ltr {
namespace cuda_impl { namespace cuda_impl {
void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels, void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels,

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "ranking_utils.cu"
#endif

View File

@@ -18,7 +18,6 @@
namespace xgboost { namespace xgboost {
namespace common { namespace common {
namespace cuda_impl { namespace cuda_impl {
void Median(Context const* ctx, linalg::TensorView<float const, 2> t, void Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights, linalg::Tensor<float, 1>* out) { common::OptionalWeights weights, linalg::Tensor<float, 1>* out) {
CHECK_GE(t.Shape(1), 1); CHECK_GE(t.Shape(1), 1);

View File

@@ -216,7 +216,6 @@ void SegmentedWeightedQuantile(Context const* ctx, AlphaIt alpha_it, SegIt seg_b
detail::SegOp<SegIt>{seg_beg, seg_end}); detail::SegOp<SegIt>{seg_beg, seg_end});
auto scan_val = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul), auto scan_val = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
detail::WeightOp<WIter>{w_begin, d_sorted_idx}); detail::WeightOp<WIter>{w_begin, d_sorted_idx});
thrust::inclusive_scan_by_key(thrust::cuda::par(caching), scan_key, scan_key + n_weights, thrust::inclusive_scan_by_key(thrust::cuda::par(caching), scan_key, scan_key + n_weights,
scan_val, weights_cdf.begin()); scan_val, weights_cdf.begin());

View File

@@ -112,7 +112,7 @@ void Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWe
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out); void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights, inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
linalg::Tensor<float, 1>*) { linalg::Tensor<float, 1>*) {
common::AssertGPUSupport(); common::AssertGPUSupport();
@@ -120,7 +120,7 @@ inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalW
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) { inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
common::AssertGPUSupport(); common::AssertGPUSupport();
} }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda_impl } // namespace cuda_impl
/** /**

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "stats.cu"
#endif

View File

@@ -25,12 +25,12 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::common::ProbabilityDistributionType);
namespace xgboost { namespace xgboost {
namespace common { namespace common {
#if !defined(__CUDACC__) && !defined(__HIPCC__) #ifndef __CUDACC__
using std::log; using std::log;
using std::fmax; using std::fmax;
#endif // __CUDACC__ && __HIPCC__ #endif // __CUDACC__
enum class CensoringType : uint8_t { enum class CensoringType : uint8_t {
kUncensored, kRightCensored, kLeftCensored, kIntervalCensored kUncensored, kRightCensored, kLeftCensored, kIntervalCensored

View File

@@ -59,11 +59,9 @@ std::size_t SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
}); });
dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(), dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(),
out_group_threads_ptr.size()); out_group_threads_ptr.size());
size_t total = 0; std::size_t total = 0;
dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1, dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
sizeof(total), cudaMemcpyDeviceToHost)); sizeof(total), cudaMemcpyDeviceToHost));
return total; return total;
} }

View File

@@ -19,9 +19,7 @@
#if defined (__CUDACC__) #if defined (__CUDACC__)
#include "device_helpers.cuh" #include "device_helpers.cuh"
#elif defined(__HIPCC__) #endif // defined (__CUDACC__)
#include "device_helpers.hip.h"
#endif // defined (__CUDACC__) || defined(__HIPCC__)
namespace xgboost { namespace xgboost {
namespace common { namespace common {
@@ -30,7 +28,7 @@ constexpr size_t kBlockThreads = 256;
namespace detail { namespace detail {
#if defined(__CUDACC__) || defined(__HIPCC__) #if defined(__CUDACC__)
template <typename Functor, typename... SpanType> template <typename Functor, typename... SpanType>
__global__ void LaunchCUDAKernel(Functor _func, Range _range, __global__ void LaunchCUDAKernel(Functor _func, Range _range,
SpanType... _spans) { SpanType... _spans) {
@@ -38,7 +36,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range,
_func(i, _spans...); _func(i, _spans...);
} }
} }
#endif // defined(__CUDACC__) || defined(__HIPCC__) #endif // defined(__CUDACC__)
} // namespace detail } // namespace detail
@@ -129,7 +127,7 @@ class Transform {
UnpackShard(device, _vectors...); UnpackShard(device, _vectors...);
} }
#if defined(__CUDACC__) || defined(__HIPCC__) #if defined(__CUDACC__)
template <typename std::enable_if<CompiledWithCuda>::type* = nullptr, template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
typename... HDV> typename... HDV>
void LaunchCUDA(Functor _func, HDV*... _vectors) const { void LaunchCUDA(Functor _func, HDV*... _vectors) const {
@@ -142,9 +140,7 @@ class Transform {
// granularity is used in data vector. // granularity is used in data vector.
size_t shard_size = range_size; size_t shard_size = range_size;
Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)}; Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)};
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
const int kGrids = const int kGrids =
static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads)); static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads));
if (kGrids == 0) { if (kGrids == 0) {
@@ -163,7 +159,7 @@ class Transform {
LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA(); LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
} }
#endif // defined(__CUDACC__) || defined(__HIPCC__) #endif // defined(__CUDACC__)
template <typename... HDV> template <typename... HDV>
void LaunchCPU(Functor func, HDV *...vectors) const { void LaunchCPU(Functor func, HDV *...vectors) const {

View File

@@ -28,7 +28,7 @@ Context::Context() : cfs_cpu_count_{common::GetCfsCPUCount()} {}
namespace { namespace {
inline constexpr char const* kDevice = "device"; inline constexpr char const* kDevice = "device";
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
DeviceOrd CUDAOrdinal(DeviceOrd device, bool) { DeviceOrd CUDAOrdinal(DeviceOrd device, bool) {
device = DeviceOrd::CPU(); device = DeviceOrd::CPU();
return device; return device;
@@ -208,10 +208,10 @@ std::int32_t Context::Threads() const {
return n_threads; return n_threads;
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
CUDAContext const* Context::CUDACtx() const { CUDAContext const* Context::CUDACtx() const {
common::AssertGPUSupport(); common::AssertGPUSupport();
return nullptr; return nullptr;
} }
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost } // namespace xgboost

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "context.cu"
#endif

View File

@@ -20,9 +20,7 @@ void ArrayInterfaceHandler::SyncCudaStream(std::int64_t stream) {
* case where 0 might be given should either use None, 1, or 2 instead for * case where 0 might be given should either use None, 1, or 2 instead for
* clarity. * clarity.
*/ */
#ifndef XGBOOST_USE_HIP
LOG(FATAL) << "Invalid stream ID in array interface: " << stream; LOG(FATAL) << "Invalid stream ID in array interface: " << stream;
#endif
case 1: case 1:
// default legacy stream // default legacy stream
break; break;
@@ -40,8 +38,6 @@ bool ArrayInterfaceHandler::IsCudaPtr(void const* ptr) {
if (!ptr) { if (!ptr) {
return false; return false;
} }
#if defined(XGBOOST_USE_CUDA)
cudaPointerAttributes attr; cudaPointerAttributes attr;
auto err = cudaPointerGetAttributes(&attr, ptr); auto err = cudaPointerGetAttributes(&attr, ptr);
// reset error // reset error
@@ -63,34 +59,5 @@ bool ArrayInterfaceHandler::IsCudaPtr(void const* ptr) {
// other errors, `cudaErrorNoDevice`, `cudaErrorInsufficientDriver` etc. // other errors, `cudaErrorNoDevice`, `cudaErrorInsufficientDriver` etc.
return false; return false;
} }
#elif defined(XGBOOST_USE_HIP)
hipPointerAttribute_t attr;
auto err = hipPointerGetAttributes(&attr, ptr);
// reset error
CHECK_EQ(err, hipGetLastError());
if (err == hipErrorInvalidValue) {
return false;
} else if (err == hipSuccess) {
#if HIP_VERSION_MAJOR < 6
switch (attr.memoryType) {
case hipMemoryTypeHost:
return false;
default:
return true;
}
#else
switch (attr.type) {
case hipMemoryTypeUnregistered:
case hipMemoryTypeHost:
return false;
default:
return true;
}
#endif
return true;
} else {
return false;
}
#endif
} }
} // namespace xgboost } // namespace xgboost

View File

@@ -28,8 +28,6 @@
#if defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
#include "cuda_fp16.h" #include "cuda_fp16.h"
#elif defined(XGBOOST_USE_HIP)
#include <hip/hip_fp16.h>
#endif #endif
namespace xgboost { namespace xgboost {
@@ -310,12 +308,12 @@ class ArrayInterfaceHandler {
template <typename T, typename E = void> template <typename T, typename E = void>
struct ToDType; struct ToDType;
// float // float
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA)
template <> template <>
struct ToDType<__half> { struct ToDType<__half> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2; static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
}; };
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #endif // defined(XGBOOST_USE_CUDA)
template <> template <>
struct ToDType<float> { struct ToDType<float> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4; static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4;
@@ -364,10 +362,10 @@ struct ToDType<int64_t> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8; static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8;
}; };
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); } inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; } inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // !defined(XGBOOST_USE_CUDA)
/** /**
* \brief A type erased view over __array_interface__ protocol defined by numpy * \brief A type erased view over __array_interface__ protocol defined by numpy
@@ -465,11 +463,11 @@ class ArrayInterface {
CHECK(sizeof(long double) == 16) << error::NoF128(); CHECK(sizeof(long double) == 16) << error::NoF128();
type = T::kF16; type = T::kF16;
} else if (typestr[1] == 'f' && typestr[2] == '2') { } else if (typestr[1] == 'f' && typestr[2] == '2') {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA)
type = T::kF2; type = T::kF2;
#else #else
LOG(FATAL) << "Half type is not supported."; LOG(FATAL) << "Half type is not supported.";
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #endif // defined(XGBOOST_USE_CUDA)
} else if (typestr[1] == 'f' && typestr[2] == '4') { } else if (typestr[1] == 'f' && typestr[2] == '4') {
type = T::kF4; type = T::kF4;
} else if (typestr[1] == 'f' && typestr[2] == '8') { } else if (typestr[1] == 'f' && typestr[2] == '8') {
@@ -504,15 +502,15 @@ class ArrayInterface {
using T = ArrayInterfaceHandler::Type; using T = ArrayInterfaceHandler::Type;
switch (type) { switch (type) {
case T::kF2: { case T::kF2: {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA)
return func(reinterpret_cast<__half const *>(data)); return func(reinterpret_cast<__half const *>(data));
#endif // defined(XGBOOST_USE_CUDA) || || defined(XGBOOST_USE_HIP) #endif // defined(XGBOOST_USE_CUDA)
} }
case T::kF4: case T::kF4:
return func(reinterpret_cast<float const *>(data)); return func(reinterpret_cast<float const *>(data));
case T::kF8: case T::kF8:
return func(reinterpret_cast<double const *>(data)); return func(reinterpret_cast<double const *>(data));
#if defined(__CUDA_ARCH__ ) || defined(__HIPCC__) #ifdef __CUDA_ARCH__
case T::kF16: { case T::kF16: {
// CUDA device code doesn't support long double. // CUDA device code doesn't support long double.
SPAN_CHECK(false); SPAN_CHECK(false);
@@ -559,7 +557,7 @@ class ArrayInterface {
static_assert(sizeof...(index) <= D, "Invalid index."); static_assert(sizeof...(index) <= D, "Invalid index.");
return this->DispatchCall([=](auto const *p_values) -> T { return this->DispatchCall([=](auto const *p_values) -> T {
std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...); std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...);
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA)
// No operator defined for half -> size_t // No operator defined for half -> size_t
using Type = std::conditional_t< using Type = std::conditional_t<
std::is_same<__half, std::is_same<__half,
@@ -569,7 +567,7 @@ class ArrayInterface {
return static_cast<T>(static_cast<Type>(p_values[offset])); return static_cast<T>(static_cast<Type>(p_values[offset]));
#else #else
return static_cast<T>(p_values[offset]); return static_cast<T>(p_values[offset]);
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #endif // defined(XGBOOST_USE_CUDA)
}); });
} }
@@ -606,7 +604,7 @@ void DispatchDType(ArrayInterface<D> const array, std::int32_t device, Fn fn) {
}; };
switch (array.type) { switch (array.type) {
case ArrayInterfaceHandler::kF2: { case ArrayInterfaceHandler::kF2: {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA)
dispatch(__half{}); dispatch(__half{});
#endif #endif
break; break;

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "array_interface.cu"
#endif

View File

@@ -800,9 +800,9 @@ void MetaInfo::Validate(std::int32_t device) const {
} }
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
void MetaInfo::SetInfoFromCUDA(Context const&, StringView, Json) { common::AssertGPUSupport(); } void MetaInfo::SetInfoFromCUDA(Context const&, StringView, Json) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // !defined(XGBOOST_USE_CUDA)
bool MetaInfo::IsVerticalFederated() const { bool MetaInfo::IsVerticalFederated() const {
return collective::IsFederated() && IsColumnSplit(); return collective::IsFederated() && IsColumnSplit();

View File

@@ -44,7 +44,6 @@ void CopyTensorInfoImpl(CUDAContext const* ctx, Json arr_interface, linalg::Tens
std::copy(array.shape, array.shape + D, shape.data()); std::copy(array.shape, array.shape + D, shape.data());
// set data // set data
data->Resize(array.n); data->Resize(array.n);
dh::safe_cuda(cudaMemcpyAsync(data->DevicePointer(), array.data, array.n * sizeof(T), dh::safe_cuda(cudaMemcpyAsync(data->DevicePointer(), array.data, array.n * sizeof(T),
cudaMemcpyDefault, ctx->Stream())); cudaMemcpyDefault, ctx->Stream()));
}); });
@@ -96,10 +95,8 @@ void CopyQidImpl(ArrayInterface<1> array_interface, std::vector<bst_group_t>* p_
} }
}); });
bool non_dec = true; bool non_dec = true;
dh::safe_cuda(cudaMemcpy(&non_dec, flag.data().get(), sizeof(bool), dh::safe_cuda(cudaMemcpy(&non_dec, flag.data().get(), sizeof(bool),
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
CHECK(non_dec) << "`qid` must be sorted in increasing order along with data."; CHECK(non_dec) << "`qid` must be sorted in increasing order along with data.";
size_t bytes = 0; size_t bytes = 0;
dh::caching_device_vector<uint32_t> out(array_interface.Shape(0)); dh::caching_device_vector<uint32_t> out(array_interface.Shape(0));
@@ -117,10 +114,8 @@ void CopyQidImpl(ArrayInterface<1> array_interface, std::vector<bst_group_t>* p_
group_ptr_.clear(); group_ptr_.clear();
group_ptr_.resize(h_num_runs_out + 1, 0); group_ptr_.resize(h_num_runs_out + 1, 0);
dh::XGBCachingDeviceAllocator<char> alloc; dh::XGBCachingDeviceAllocator<char> alloc;
thrust::inclusive_scan(thrust::cuda::par(alloc), cnt.begin(), thrust::inclusive_scan(thrust::cuda::par(alloc), cnt.begin(),
cnt.begin() + h_num_runs_out, cnt.begin()); cnt.begin() + h_num_runs_out, cnt.begin());
thrust::copy(cnt.begin(), cnt.begin() + h_num_runs_out, thrust::copy(cnt.begin(), cnt.begin() + h_num_runs_out,
group_ptr_.begin() + 1); group_ptr_.begin() + 1);
} }

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "data.cu"
#endif

View File

@@ -122,9 +122,7 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
device_idx_ = dh::CudaGetPointerDevice(first_column.data); device_idx_ = dh::CudaGetPointerDevice(first_column.data);
CHECK_NE(device_idx_, Context::kCpuId); CHECK_NE(device_idx_, Context::kCpuId);
dh::safe_cuda(cudaSetDevice(device_idx_)); dh::safe_cuda(cudaSetDevice(device_idx_));
for (auto& json_col : json_columns) { for (auto& json_col : json_columns) {
auto column = ArrayInterface<1>(get<Object const>(json_col)); auto column = ArrayInterface<1>(get<Object const>(json_col));
columns.push_back(column); columns.push_back(column);
@@ -213,7 +211,6 @@ template <typename AdapterBatchT>
std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offset, int device_idx, std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offset, int device_idx,
float missing) { float missing) {
dh::safe_cuda(cudaSetDevice(device_idx)); dh::safe_cuda(cudaSetDevice(device_idx));
IsValidFunctor is_valid(missing); IsValidFunctor is_valid(missing);
dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes())); dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes()));
@@ -247,7 +244,6 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
&offset[ridx]), &offset[ridx]),
static_cast<unsigned long long>(cnt)); // NOLINT static_cast<unsigned long long>(cnt)); // NOLINT
}); });
dh::XGBCachingDeviceAllocator<char> alloc; dh::XGBCachingDeviceAllocator<char> alloc;
bst_row_t row_stride = bst_row_t row_stride =
dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()), dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),

View File

@@ -1,7 +1,7 @@
/** /**
* Copyright 2019-2023, XGBoost contributors * Copyright 2019-2023, XGBoost contributors
*/ */
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #ifndef XGBOOST_USE_CUDA
#include "ellpack_page.h" #include "ellpack_page.h"
@@ -52,4 +52,5 @@ size_t EllpackPage::Size() const {
return impl_->Cuts(); return impl_->Cuts();
} }
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
#endif // XGBOOST_USE_CUDA

View File

@@ -18,10 +18,6 @@
#include "gradient_index.h" #include "gradient_index.h"
#include "xgboost/data.h" #include "xgboost/data.h"
#if defined(XGBOOST_USE_HIP)
#include <rocprim/rocprim.hpp>
#endif
namespace xgboost { namespace xgboost {
EllpackPage::EllpackPage() : impl_{new EllpackPageImpl()} {} EllpackPage::EllpackPage() : impl_{new EllpackPageImpl()} {}
@@ -106,7 +102,6 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
row_stride(row_stride), row_stride(row_stride),
n_rows(n_rows) { n_rows(n_rows) {
monitor_.Init("ellpack_page"); monitor_.Init("ellpack_page");
dh::safe_cuda(cudaSetDevice(device)); dh::safe_cuda(cudaSetDevice(device));
monitor_.Start("InitCompressedData"); monitor_.Start("InitCompressedData");
@@ -261,8 +256,6 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType cons
// Go one level down into cub::DeviceScan API to set OffsetT as 64 bit // Go one level down into cub::DeviceScan API to set OffsetT as 64 bit
// So we don't crash on n > 2^31 // So we don't crash on n > 2^31
size_t temp_storage_bytes = 0; size_t temp_storage_bytes = 0;
#if defined(__CUDACC__)
using DispatchScan = using DispatchScan =
cub::DispatchScan<decltype(key_value_index_iter), decltype(out), cub::DispatchScan<decltype(key_value_index_iter), decltype(out),
TupleScanOp<Tuple>, cub::NullType, int64_t>; TupleScanOp<Tuple>, cub::NullType, int64_t>;
@@ -285,17 +278,6 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType cons
key_value_index_iter, out, TupleScanOp<Tuple>(), key_value_index_iter, out, TupleScanOp<Tuple>(),
cub::NullType(), batch.Size(), nullptr, false); cub::NullType(), batch.Size(), nullptr, false);
#endif #endif
#elif defined (__HIPCC__)
rocprim::inclusive_scan(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp<Tuple>());
dh::TemporaryArray<char> temp_storage(temp_storage_bytes);
rocprim::inclusive_scan(temp_storage.data().get(), temp_storage_bytes, key_value_index_iter, out, batch.Size(),
TupleScanOp<Tuple>());
#endif
} }
void WriteNullValues(EllpackPageImpl* dst, int device_idx, void WriteNullValues(EllpackPageImpl* dst, int device_idx,
@@ -552,13 +534,11 @@ void EllpackPageImpl::CreateHistIndices(int device,
// copy data entries to device. // copy data entries to device.
if (row_batch.data.DeviceCanRead()) { if (row_batch.data.DeviceCanRead()) {
auto const& d_data = row_batch.data.ConstDeviceSpan(); auto const& d_data = row_batch.data.ConstDeviceSpan();
dh::safe_cuda(cudaMemcpyAsync( dh::safe_cuda(cudaMemcpyAsync(
entries_d.data().get(), d_data.data() + ent_cnt_begin, entries_d.data().get(), d_data.data() + ent_cnt_begin,
n_entries * sizeof(Entry), cudaMemcpyDefault)); n_entries * sizeof(Entry), cudaMemcpyDefault));
} else { } else {
const std::vector<Entry>& data_vec = row_batch.data.ConstHostVector(); const std::vector<Entry>& data_vec = row_batch.data.ConstHostVector();
dh::safe_cuda(cudaMemcpyAsync( dh::safe_cuda(cudaMemcpyAsync(
entries_d.data().get(), data_vec.data() + ent_cnt_begin, entries_d.data().get(), data_vec.data() + ent_cnt_begin,
n_entries * sizeof(Entry), cudaMemcpyDefault)); n_entries * sizeof(Entry), cudaMemcpyDefault));

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "ellpack_page.cu"
#endif

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "ellpack_page_raw_format.cu"
#endif

View File

@@ -45,7 +45,7 @@ class EllpackPageSource : public PageSourceIncMixIn<EllpackPage> {
void Fetch() final; void Fetch() final;
}; };
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
inline void EllpackPageSource::Fetch() { inline void EllpackPageSource::Fetch() {
// silent the warning about unused variables. // silent the warning about unused variables.
(void)(row_stride_); (void)(row_stride_);

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "ellpack_page_source.cu"
#endif

View File

@@ -65,12 +65,12 @@ GHistIndexMatrix::GHistIndexMatrix(MetaInfo const &info, common::HistogramCuts &
max_numeric_bins_per_feat(max_bin_per_feat), max_numeric_bins_per_feat(max_bin_per_feat),
isDense_{info.num_col_ * info.num_row_ == info.num_nonzero_} {} isDense_{info.num_col_ * info.num_row_ == info.num_nonzero_} {}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA)
GHistIndexMatrix::GHistIndexMatrix(Context const *, MetaInfo const &, EllpackPage const &, GHistIndexMatrix::GHistIndexMatrix(Context const *, MetaInfo const &, EllpackPage const &,
BatchParam const &) { BatchParam const &) {
common::AssertGPUSupport(); common::AssertGPUSupport();
} }
#endif // defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // defined(XGBOOST_USE_CUDA)
GHistIndexMatrix::~GHistIndexMatrix() = default; GHistIndexMatrix::~GHistIndexMatrix() = default;

Some files were not shown because too many files have changed in this diff Show More