Compare commits

..

6 Commits

Author SHA1 Message Date
Philip Hyunsu Cho
82d846bbeb Update change_scala_version.py to also change scala.version property (#9897) 2023-12-18 23:49:41 -08:00
Philip Hyunsu Cho
71d330afdc Bump version to 2.0.3 (#9895) 2023-12-14 17:54:05 -08:00
Philip Hyunsu Cho
3acbd8692b [jvm-packages] Fix POM for xgboost-jvm metapackage (#9893)
* [jvm-packages] Fix POM for xgboost-jvm metapackage

* Add script for updating the Scala version
2023-12-14 16:50:34 -08:00
Philip Hyunsu Cho
ad524f76ab [backport] [CI] Upload libxgboost4j.dylib (M1) to S3 bucket (#9887)
* [CI] Set up CI for Mac M1 (#9699)

* [CI] Improve CI for Mac M1 (#9748)

* [CI] Build libxgboost4j.dylib with CMAKE_OSX_DEPLOYMENT_TARGET (#9749)

* [CI] Upload libxgboost4j.dylib (M1) to S3 bucket (#9886)
2023-12-13 16:05:40 -08:00
Jiaming Yuan
d2d1751c03 [backport][py] Use the first found native library. (#9860) (#9879) 2023-12-13 14:20:30 +08:00
Jiaming Yuan
e4ee4e79dc [backport][sklearn] Fix loading model attributes. (#9808) (#9880) 2023-12-13 14:20:04 +08:00
277 changed files with 595 additions and 2649 deletions

3
.gitmodules vendored
View File

@@ -5,6 +5,3 @@
[submodule "gputreeshap"]
path = gputreeshap
url = https://github.com/rapidsai/gputreeshap.git
[submodule "rocgputreeshap"]
path = rocgputreeshap
url = https://github.com/ROCmSoftwarePlatform/rocgputreeshap

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 2.0.2)
project(xgboost LANGUAGES CXX C VERSION 2.0.3)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW)
@@ -58,7 +58,7 @@ option(ENABLE_ALL_WARNINGS "Enable all compiler warnings. Only effective for GCC
option(LOG_CAPI_INVOCATION "Log all C API invocations for debugging" OFF)
option(GOOGLE_TEST "Build google tests" OFF)
option(USE_DMLC_GTEST "Use google tests bundled with dmlc-core submodule" OFF)
option(USE_DEVICE_DEBUG "Generate CUDA/HIP device debug info." OFF)
option(USE_DEVICE_DEBUG "Generate CUDA device debug info." OFF)
option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF)
set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header")
option(RABIT_MOCK "Build rabit with mock" OFF)
@@ -71,10 +71,6 @@ option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
set(GPU_COMPUTE_VER "" CACHE STRING
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
## HIP
option(USE_HIP "Build with GPU acceleration" OFF)
option(USE_RCCL "Build with RCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_RCCL "Build with shared RCCL library." OFF)
## Copied From dmlc
option(USE_HDFS "Build with HDFS support" OFF)
option(USE_AZURE "Build with AZURE support" OFF)
@@ -97,7 +93,6 @@ option(ADD_PKGCONFIG "Add xgboost.pc into system." ON)
if (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
message(SEND_ERROR "Do not enable `USE_DEBUG_OUTPUT' with release build.")
endif (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
if (USE_NCCL AND NOT (USE_CUDA))
message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
endif (USE_NCCL AND NOT (USE_CUDA))
@@ -107,17 +102,6 @@ endif (USE_DEVICE_DEBUG AND NOT (USE_CUDA))
if (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
message(SEND_ERROR "Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.")
endif (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
if (USE_RCCL AND NOT (USE_HIP))
message(SEND_ERROR "`USE_RCCL` must be enabled with `USE_HIP` flag.")
endif (USE_RCCL AND NOT (USE_HIP))
if (USE_DEVICE_DEBUG AND NOT (USE_HIP))
message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_HIP` flag.")
endif (USE_DEVICE_DEBUG AND NOT (USE_HIP))
if (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
message(SEND_ERROR "Build XGBoost with -DUSE_RCCL=ON to enable BUILD_WITH_SHARED_RCCL.")
endif (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
if (JVM_BINDINGS AND R_LIB)
message(SEND_ERROR "`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.")
endif (JVM_BINDINGS AND R_LIB)
@@ -131,15 +115,9 @@ endif (USE_AVX)
if (PLUGIN_LZ4)
message(SEND_ERROR "The option 'PLUGIN_LZ4' is removed from XGBoost.")
endif (PLUGIN_LZ4)
if (PLUGIN_RMM AND NOT (USE_CUDA))
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
endif (PLUGIN_RMM AND NOT (USE_CUDA))
if (PLUGIN_RMM AND NOT (USE_HIP))
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_HIP` flag.")
endif (PLUGIN_RMM AND NOT (USE_HIP))
if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
@@ -192,24 +170,6 @@ if (USE_CUDA)
find_package(CUDAToolkit REQUIRED)
endif (USE_CUDA)
if (USE_HIP)
set(USE_OPENMP ON CACHE BOOL "HIP requires OpenMP" FORCE)
# `export CXX=' is ignored by CMake HIP.
set(CMAKE_HIP_HOST_COMPILER ${CMAKE_CXX_COMPILER})
message(STATUS "Configured HIP host compiler: ${CMAKE_HIP_HOST_COMPILER}")
enable_language(HIP)
find_package(hip REQUIRED)
find_package(rocthrust REQUIRED)
find_package(hipcub REQUIRED)
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -I${HIP_INCLUDE_DIRS} -I${HIP_INCLUDE_DIRS}/hip")
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -Wunused-result -w")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_INCLUDE_DIRS}")
add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
endif (USE_HIP)
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
@@ -249,10 +209,6 @@ if (USE_NCCL)
find_package(Nccl REQUIRED)
endif (USE_NCCL)
if (USE_RCCL)
find_package(rccl REQUIRED)
endif (USE_RCCL)
# dmlc-core
msvc_use_static_runtime()
if (FORCE_SHARED_CRT)

View File

@@ -1,8 +1,8 @@
Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
Version: 2.0.2.1
Date: 2023-10-12
Version: 2.0.3.1
Date: 2023-12-14
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"),

18
R-package/configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for xgboost 2.0.2.
# Generated by GNU Autoconf 2.71 for xgboost 2.0.3.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='2.0.2'
PACKAGE_STRING='xgboost 2.0.2'
PACKAGE_VERSION='2.0.3'
PACKAGE_STRING='xgboost 2.0.3'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures xgboost 2.0.2 to adapt to many kinds of systems.
\`configure' configures xgboost 2.0.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1287,7 +1287,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 2.0.2:";;
short | recursive ) echo "Configuration of xgboost 2.0.3:";;
esac
cat <<\_ACEOF
@@ -1367,7 +1367,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
xgboost configure 2.0.2
xgboost configure 2.0.3
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by xgboost $as_me 2.0.2, which was
It was created by xgboost $as_me 2.0.3, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by xgboost $as_me 2.0.2, which was
This file was extended by xgboost $as_me 2.0.3, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
xgboost config.status 2.0.2
xgboost config.status 2.0.3
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@@ -2,7 +2,7 @@
AC_PREREQ(2.69)
AC_INIT([xgboost],[2.0.2],[],[xgboost],[])
AC_INIT([xgboost],[2.0.3],[],[xgboost],[])
: ${R_HOME=`R RHOME`}
if test -z "${R_HOME}"; then

View File

@@ -32,7 +32,7 @@ namespace common {
bool CheckNAN(double v) {
return ISNAN(v);
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
double LogGamma(double v) {
return lgammafn(v);
}

View File

@@ -181,23 +181,6 @@ function(xgboost_set_cuda_flags target)
CUDA_RUNTIME_LIBRARY Static)
endfunction(xgboost_set_cuda_flags)
# Set HIP related flags to target.
function(xgboost_set_hip_flags target)
if (USE_DEVICE_DEBUG)
target_compile_options(${target} PRIVATE
$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:HIP>>:-G>)
endif (USE_DEVICE_DEBUG)
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_HIP=1)
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/rocgputreeshap)
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/warp-primitives/include)
set_target_properties(${target} PROPERTIES
HIP_STANDARD 17
HIP_STANDARD_REQUIRED ON
HIP_SEPARABLE_COMPILATION OFF)
endfunction(xgboost_set_hip_flags)
macro(xgboost_link_nccl target)
if (BUILD_STATIC_LIB)
target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR})
@@ -210,20 +193,6 @@ macro(xgboost_link_nccl target)
endif (BUILD_STATIC_LIB)
endmacro(xgboost_link_nccl)
macro(xgboost_link_rccl target)
if(BUILD_STATIC_LIB)
target_include_directories(${target} PUBLIC ${RCCL_INCLUDE_DIR}/rccl)
target_compile_definitions(${target} PUBLIC -DXGBOOST_USE_RCCL=1)
target_link_directories(${target} PUBLIC ${HIP_LIB_INSTALL_DIR})
target_link_libraries(${target} PUBLIC ${RCCL_LIBRARY})
else()
target_include_directories(${target} PRIVATE ${RCCL_INCLUDE_DIR}/rccl)
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_RCCL=1)
target_link_directories(${target} PUBLIC ${HIP_LIB_INSTALL_DIR})
target_link_libraries(${target} PRIVATE ${RCCL_LIBRARY})
endif()
endmacro()
# compile options
macro(xgboost_target_properties target)
set_target_properties(${target} PROPERTIES
@@ -246,10 +215,6 @@ macro(xgboost_target_properties target)
-Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
-Wall -Wextra -Wno-expansion-to-defined>
)
target_compile_options(${target} PUBLIC
$<IF:$<COMPILE_LANGUAGE:HIP>,
-Wall -Wextra >
)
endif(ENABLE_ALL_WARNINGS)
target_compile_options(${target}
@@ -318,10 +283,6 @@ macro(xgboost_target_link_libraries target)
target_link_libraries(${target} PUBLIC CUDA::cudart_static)
endif (USE_CUDA)
if (USE_HIP)
xgboost_set_hip_flags(${target})
endif (USE_HIP)
if (PLUGIN_RMM)
target_link_libraries(${target} PRIVATE rmm::rmm)
endif (PLUGIN_RMM)
@@ -330,10 +291,6 @@ macro(xgboost_target_link_libraries target)
xgboost_link_nccl(${target})
endif (USE_NCCL)
if(USE_RCCL)
xgboost_link_rccl(${target})
endif()
if (USE_NVTX)
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
endif (USE_NVTX)

View File

@@ -3,8 +3,6 @@
set(USE_OPENMP @USE_OPENMP@)
set(USE_CUDA @USE_CUDA@)
set(USE_NCCL @USE_NCCL@)
set(USE_HIP @USE_HIP@)
set(USE_RCCL @USE_RCCL@)
set(XGBOOST_BUILD_STATIC_LIB @BUILD_STATIC_LIB@)
include(CMakeFindDependencyMacro)
@@ -17,9 +15,6 @@ if (XGBOOST_BUILD_STATIC_LIB)
if(USE_CUDA)
find_dependency(CUDA)
endif()
if(USE_HIP)
find_dependency(HIP)
endif()
# nccl should be linked statically if xgboost is built as static library.
endif (XGBOOST_BUILD_STATIC_LIB)

View File

@@ -4,13 +4,13 @@ python mapfeat.py
# split train and test
python mknfold.py machine.txt 1
# training and output the models
../../../xgboost machine.conf
../../xgboost machine.conf
# output predictions of test data
../../../xgboost machine.conf task=pred model_in=0002.model
../../xgboost machine.conf task=pred model_in=0002.model
# print the boosters of 0002.model in dump.raw.txt
../../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
# print the boosters of 0002.model in dump.nice.txt with feature map
../../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
# cat the result
cat dump.nice.txt

View File

@@ -0,0 +1,79 @@
import argparse
import pathlib
import re
import shutil
def main(args):
if args.scala_version == "2.12":
scala_ver = "2.12"
scala_patchver = "2.12.18"
elif args.scala_version == "2.13":
scala_ver = "2.13"
scala_patchver = "2.13.11"
else:
raise ValueError(f"Unsupported Scala version: {args.scala_version}")
# Clean artifacts
if args.purge_artifacts:
for target in pathlib.Path("jvm-packages/").glob("**/target"):
if target.is_dir():
print(f"Removing {target}...")
shutil.rmtree(target)
# Update pom.xml
for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"):
print(f"Updating {pom}...")
with open(pom, "r", encoding="utf-8") as f:
lines = f.readlines()
with open(pom, "w", encoding="utf-8") as f:
replaced_scalaver = False
replaced_scala_binver = False
for line in lines:
for artifact in [
"xgboost-jvm",
"xgboost4j",
"xgboost4j-gpu",
"xgboost4j-spark",
"xgboost4j-spark-gpu",
"xgboost4j-flink",
"xgboost4j-example",
]:
line = re.sub(
f"<artifactId>{artifact}_[0-9\\.]*",
f"<artifactId>{artifact}_{scala_ver}",
line,
)
# Only replace the first occurrence of scala.version
if not replaced_scalaver:
line, nsubs = re.subn(
r"<scala.version>[0-9\.]*",
f"<scala.version>{scala_patchver}",
line,
)
if nsubs > 0:
replaced_scalaver = True
# Only replace the first occurrence of scala.binary.version
if not replaced_scala_binver:
line, nsubs = re.subn(
r"<scala.binary.version>[0-9\.]*",
f"<scala.binary.version>{scala_ver}",
line,
)
if nsubs > 0:
replaced_scala_binver = True
f.write(line)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--purge-artifacts", action="store_true")
parser.add_argument(
"--scala-version",
type=str,
required=True,
help="Version of Scala to use in the JVM packages",
choices=["2.12", "2.13"],
)
parsed_args = parser.parse_args()
main(parsed_args)

View File

@@ -2,7 +2,6 @@ import argparse
import errno
import glob
import os
import platform
import re
import shutil
import subprocess
@@ -88,10 +87,6 @@ def main():
help="Version of the release being prepared",
)
args = parser.parse_args()
if sys.platform != "darwin" or platform.machine() != "arm64":
raise NotImplementedError("Please run this script using an M1 Mac")
version = args.release_version
expected_git_tag = "v" + version
current_git_tag = get_current_git_tag()
@@ -141,6 +136,7 @@ def main():
("linux", "x86_64"),
("windows", "x86_64"),
("macos", "x86_64"),
("macos", "aarch64"),
]:
output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
maybe_makedirs(output_dir)
@@ -164,6 +160,10 @@ def main():
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib",
filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib",
)
retrieve(
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib",
filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib",
)
with tempfile.TemporaryDirectory() as tempdir:
# libxgboost4j.so for Linux x86_64, CPU only
@@ -211,9 +211,14 @@ def main():
"https://central.sonatype.org/publish/publish-maven/"
)
print(
"3. Now on a M1 Mac machine, run the following to build Scala 2.12 artifacts:"
"3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. "
"Make sure to use an Internet connection with fast upload speed:"
)
print(
" # Skip native build, since we have all needed native binaries from CI\n"
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests"
)
print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests")
print(
"4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx "
@@ -221,11 +226,14 @@ def main():
"artifacts to the Maven Central repository. The top-level metapackage should be "
"named xgboost-jvm_2.12."
)
print("5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:")
print(" rm -rf targets/")
print(" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests")
print(
"6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts."
"5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n"
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
" python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n"
" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests"
)
print(
"6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. "
"The top-level metapackage should be named xgboost-jvm_2.13."
)

View File

@@ -58,19 +58,19 @@
/*!
* \brief Tag function as usable by device
*/
#if defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
#if defined (__CUDA__) || defined(__NVCC__)
#define XGBOOST_DEVICE __host__ __device__
#else
#define XGBOOST_DEVICE
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
#endif // defined (__CUDA__) || defined(__NVCC__)
#if defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__)
#if defined(__CUDA__) || defined(__CUDACC__)
#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
#define XGBOOST_DEV_INLINE __device__ __forceinline__
#else
#define XGBOOST_HOST_DEV_INLINE
#define XGBOOST_DEV_INLINE
#endif // defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__)
#endif // defined(__CUDA__) || defined(__CUDACC__)
// These check are for Makefile.
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
@@ -274,8 +274,8 @@ class GradientPairInt64 {
GradientPairInt64(GradientPairInt64 const &g) = default;
GradientPairInt64 &operator=(GradientPairInt64 const &g) = default;
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedGrad() const { return grad_; }
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedHess() const { return hess_; }
XGBOOST_DEVICE [[nodiscard]] T GetQuantisedGrad() const { return grad_; }
XGBOOST_DEVICE [[nodiscard]] T GetQuantisedHess() const { return hess_; }
XGBOOST_DEVICE GradientPairInt64 &operator+=(const GradientPairInt64 &rhs) {
grad_ += rhs.grad_;

View File

@@ -58,11 +58,11 @@
namespace xgboost {
#if defined(__CUDACC__) || defined(__HIPCC__)
#ifdef __CUDACC__
// Sets a function to call instead of cudaSetDevice();
// only added for testing
void SetCudaSetDeviceHandler(void (*handler)(int));
#endif // __CUDACC__ || __HIPCC__
#endif // __CUDACC__
template <typename T> struct HostDeviceVectorImpl;

View File

@@ -30,11 +30,11 @@
// decouple it from xgboost.
#ifndef LINALG_HD
#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
#if defined(__CUDA__) || defined(__NVCC__)
#define LINALG_HD __host__ __device__
#else
#define LINALG_HD
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
#endif // defined (__CUDA__) || defined(__NVCC__)
#endif // LINALG_HD
namespace xgboost::linalg {
@@ -118,7 +118,7 @@ using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
template <int32_t n, typename Fn>
LINALG_HD constexpr auto UnrollLoop(Fn fn) {
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
#if defined __CUDA_ARCH__
#pragma unroll n
#endif // defined __CUDA_ARCH__
for (int32_t i = 0; i < n; ++i) {
@@ -136,7 +136,7 @@ int32_t NativePopc(T v) {
inline LINALG_HD int Popc(uint32_t v) {
#if defined(__CUDA_ARCH__)
return __popc(v);
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__)
#elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcount(v);
#elif defined(_MSC_VER)
return __popcnt(v);
@@ -148,7 +148,7 @@ inline LINALG_HD int Popc(uint32_t v) {
inline LINALG_HD int Popc(uint64_t v) {
#if defined(__CUDA_ARCH__)
return __popcll(v);
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__)
#elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcountll(v);
#elif defined(_MSC_VER) && defined(_M_X64)
return __popcnt64(v);

View File

@@ -41,9 +41,7 @@
#if defined(__CUDACC__)
#include <cuda_runtime.h>
#elif defined(__HIPCC__)
#include <hip/hip_runtime.h>
#endif
#endif // defined(__CUDACC__)
/*!
* The version number 1910 is picked up from GSL.
@@ -106,42 +104,7 @@ namespace common {
#define SPAN_CHECK KERNEL_CHECK
#elif defined(__HIPCC__)
// Usual logging facility is not available inside device code.
#if defined(_MSC_VER)
// Windows HIP doesn't have __assert_fail.
#define HIP_KERNEL_CHECK(cond) \
do { \
if (XGBOOST_EXPECT(!(cond), false)) { \
__builtin_trap(); \
} \
} while (0)
#else // defined(_MSC_VER)
#define __ASSERT_STR_HELPER(x) #x
#if 0
#define HIP_KERNEL_CHECK(cond) \
(XGBOOST_EXPECT((cond), true) \
? static_cast<void>(0) \
: __assert_fail(__ASSERT_STR_HELPER((cond)), __FILE__, __LINE__, __PRETTY_FUNCTION__))
#else
#define HIP_KERNEL_CHECK(cond) \
(XGBOOST_EXPECT((cond), true) \
? static_cast<void>(0) \
: __builtin_trap())
#endif
#endif // defined(_MSC_VER)
#define KERNEL_CHECK HIP_KERNEL_CHECK
#define SPAN_CHECK KERNEL_CHECK
#else // ------------------------------ not CUDA or HIP ----------------------------
#else // ------------------------------ not CUDA ----------------------------
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
@@ -157,7 +120,7 @@ namespace common {
#endif // defined(XGBOOST_STRICT_R_MODE)
#endif // __CUDA_ARCH__ || __HIPCC__
#endif // __CUDA_ARCH__
#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))
@@ -354,7 +317,7 @@ struct IsSpanOracle<Span<T, Extent>> : std::true_type {};
template <class T>
struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {};
// Re-implement std algorithms here to adopt CUDA/HIP
// Re-implement std algorithms here to adopt CUDA.
template <typename T>
struct Less {
XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {

View File

@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
#define XGBOOST_VER_MINOR 0 /* NOLINT */
#define XGBOOST_VER_PATCH 2 /* NOLINT */
#define XGBOOST_VER_PATCH 3 /* NOLINT */
#endif // XGBOOST_VERSION_CONFIG_H_

View File

@@ -9,11 +9,6 @@ if (USE_CUDA)
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu)
endif (USE_CUDA)
if (USE_HIP)
list(APPEND JVM_SOURCES
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.hip)
endif (USE_HIP)
add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})
if (ENABLE_ALL_WARNINGS)

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python
import errno
import argparse
import errno
import glob
import os
import platform
@@ -19,13 +19,10 @@ CONFIG = {
"USE_HDFS": "OFF",
"USE_AZURE": "OFF",
"USE_S3": "OFF",
"USE_CUDA": "OFF",
"USE_NCCL": "OFF",
"USE_HIP": "OFF",
"USE_RCCL": "OFF",
"JVM_BINDINGS": "ON",
"LOG_CAPI_INVOCATION": "OFF"
"LOG_CAPI_INVOCATION": "OFF",
}
@@ -72,27 +69,22 @@ def normpath(path):
return normalized
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF')
parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
parser.add_argument('--use-hip', type=str, choices=['ON', 'OFF'], default='OFF')
cli_args = parser.parse_args()
def native_build(args):
if sys.platform == "darwin":
# Enable of your compiler supports OpenMP.
CONFIG["USE_OPENMP"] = "OFF"
os.environ["JAVA_HOME"] = subprocess.check_output(
"/usr/libexec/java_home").strip().decode()
os.environ["JAVA_HOME"] = (
subprocess.check_output("/usr/libexec/java_home").strip().decode()
)
print("building Java wrapper")
with cd(".."):
build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'build'
build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build"
maybe_makedirs(build_dir)
with cd(build_dir):
if sys.platform == "win32":
# Force x64 build on Windows.
maybe_generator = ' -A x64'
maybe_generator = " -A x64"
else:
maybe_generator = ""
if sys.platform == "linux":
@@ -100,15 +92,12 @@ if __name__ == "__main__":
else:
maybe_parallel_build = ""
if cli_args.log_capi_invocation == 'ON':
CONFIG['LOG_CAPI_INVOCATION'] = 'ON'
if cli_args.log_capi_invocation == "ON":
CONFIG["LOG_CAPI_INVOCATION"] = "ON"
if cli_args.use_cuda == 'ON':
CONFIG['USE_CUDA'] = 'ON'
CONFIG['USE_NCCL'] = 'ON'
elif cli_args.use_hip== 'ON':
CONFIG['USE_HIP'] = 'ON'
CONFIG['USE_RCCL'] = 'ON'
if cli_args.use_cuda == "ON":
CONFIG["USE_CUDA"] = "ON"
CONFIG["USE_NCCL"] = "ON"
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
@@ -121,7 +110,7 @@ if __name__ == "__main__":
if gpu_arch_flag is not None:
args.append("%s" % gpu_arch_flag)
lib_dir = os.path.join(os.pardir, 'lib')
lib_dir = os.path.join(os.pardir, "lib")
if os.path.exists(lib_dir):
shutil.rmtree(lib_dir)
run("cmake .. " + " ".join(args) + maybe_generator)
@@ -131,8 +120,10 @@ if __name__ == "__main__":
run(f'"{sys.executable}" mapfeat.py')
run(f'"{sys.executable}" mknfold.py machine.txt 1')
xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip== 'ON' else 'xgboost4j'
xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'xgboost4j-spark'
xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" else "xgboost4j"
xgboost4j_spark = (
"xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" else "xgboost4j-spark"
)
print("copying native library")
library_name, os_folder = {
@@ -147,14 +138,19 @@ if __name__ == "__main__":
"i86pc": "x86_64", # on Solaris x86_64
"sun4v": "sparc", # on Solaris sparc
"arm64": "aarch64", # on macOS & Windows ARM 64-bit
"aarch64": "aarch64"
"aarch64": "aarch64",
}[platform.machine().lower()]
output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder)
output_folder = "{}/src/main/resources/lib/{}/{}".format(
xgboost4j, os_folder, arch_folder
)
maybe_makedirs(output_folder)
cp("../lib/" + library_name, output_folder)
print("copying pure-Python tracker")
cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j))
cp(
"../python-package/xgboost/tracker.py",
"{}/src/main/resources".format(xgboost4j),
)
print("copying train/test files")
maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
@@ -170,3 +166,18 @@ if __name__ == "__main__":
maybe_makedirs("{}/src/test/resources".format(xgboost4j))
for file in glob.glob("../demo/data/agaricus.*"):
cp(file, "{}/src/test/resources".format(xgboost4j))
if __name__ == "__main__":
if "MAVEN_SKIP_NATIVE_BUILD" in os.environ:
print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...")
else:
parser = argparse.ArgumentParser()
parser.add_argument(
"--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF"
)
parser.add_argument(
"--use-cuda", type=str, choices=["ON", "OFF"], default="OFF"
)
cli_args = parser.parse_args()
native_build(cli_args)

View File

@@ -5,8 +5,8 @@
<modelVersion>4.0.0</modelVersion>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
<packaging>pom</packaging>
<name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description>
@@ -43,7 +43,6 @@
<maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count>
<log.capi.invocation>OFF</log.capi.invocation>
<use.cuda>OFF</use.cuda>
<use.hip>OFF</use.hip>
<cudf.version>23.08.0</cudf.version>
<spark.rapids.version>23.08.0</spark.rapids.version>
<cudf.classifier>cuda11</cudf.classifier>

View File

@@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j-example</name>
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost4j-example_2.12</artifactId>
<version>2.0.3</version>
<packaging>jar</packaging>
<build>
<plugins>
@@ -26,7 +26,7 @@
<dependencies>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-spark_2.12</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
@@ -37,7 +37,7 @@
</dependency>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-flink_2.12</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>

View File

@@ -5,13 +5,13 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j-flink</name>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost4j-flink_2.12</artifactId>
<version>2.0.3</version>
<properties>
<flink-ml.version>2.2.0</flink-ml.version>
</properties>
@@ -30,7 +30,7 @@
<dependencies>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<artifactId>xgboost4j_2.12</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>

View File

@@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-gpu_2.12</artifactId>
<name>xgboost4j-gpu</name>
<version>2.0.2</version>
<version>2.0.3</version>
<packaging>jar</packaging>
<dependencies>
@@ -104,8 +104,6 @@
<argument>${log.capi.invocation}</argument>
<argument>--use-cuda</argument>
<argument>${use.cuda}</argument>
<argument>--use-hip</argument>
<argument>${use.hip}</argument>
</arguments>
<workingDirectory>${user.dir}</workingDirectory>
</configuration>

View File

@@ -2,7 +2,7 @@
// Created by bobwang on 2021/9/8.
//
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#ifndef XGBOOST_USE_CUDA
#include <jni.h>

View File

@@ -1,10 +1,6 @@
#include <jni.h>
#if defined(XGBOOST_USE_CUDA)
#include "../../../../src/common/device_helpers.cuh"
#elif defined(XGBOOST_USE_HIP)
#include "../../../../src/common/device_helpers.hip.h"
#endif
#include "../../../../src/common/cuda_pinned_allocator.h"
#include "../../../../src/data/array_interface.h"
#include "jvm_utils.h"

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "xgboost4j-gpu.cu"
#endif

View File

@@ -5,11 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j-spark-gpu</name>
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build>
<plugins>
<plugin>
@@ -24,7 +24,7 @@
<dependencies>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-gpu_2.12</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>

View File

@@ -5,11 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j-spark</name>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-spark_2.12</artifactId>
<build>
<plugins>
<plugin>
@@ -24,7 +24,7 @@
<dependencies>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<artifactId>xgboost4j_2.12</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>

View File

@@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j</name>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>2.0.2</version>
<artifactId>xgboost4j_2.12</artifactId>
<version>2.0.3</version>
<packaging>jar</packaging>
<dependencies>

View File

@@ -15,10 +15,6 @@ class BuildConfiguration: # pylint: disable=R0902
use_cuda: bool = False
# Whether to enable NCCL
use_nccl: bool = False
# Whether to enablea HIP
use_hip: bool = False
# Whether to enable RCCL
use_rccl: bool = False
# Whether to enable HDFS
use_hdfs: bool = False
# Whether to enable Azure Storage

View File

@@ -7,7 +7,7 @@ build-backend = "packager.pep517"
[project]
name = "xgboost"
version = "2.0.2"
version = "2.0.3"
authors = [
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }

View File

@@ -1 +1 @@
2.0.2
2.0.3

View File

@@ -206,6 +206,7 @@ def _load_lib() -> ctypes.CDLL:
lib = ctypes.cdll.LoadLibrary(lib_path)
setattr(lib, "path", os.path.normpath(lib_path))
lib_success = True
break
except OSError as e:
os_error_list.append(str(e))
continue

View File

@@ -78,7 +78,6 @@ from .data import _is_cudf_ser, _is_cupy_array
from .sklearn import (
XGBClassifier,
XGBClassifierBase,
XGBClassifierMixIn,
XGBModel,
XGBRanker,
XGBRankerMixIn,
@@ -1854,7 +1853,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
"Implementation of the scikit-learn API for XGBoost classification.",
["estimators", "model"],
)
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBase):
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
# pylint: disable=missing-class-docstring
async def _fit_async(
self,
@@ -2036,10 +2035,6 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBa
preds = da.map_blocks(_argmax, pred_probs, drop_axis=1)
return preds
def load_model(self, fname: ModelIn) -> None:
super().load_model(fname)
self._load_model_attributes(self.get_booster())
@xgboost_model_doc(
"""Implementation of the Scikit-Learn API for XGBoost Ranking.

View File

@@ -43,19 +43,6 @@ from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df
from .training import train
class XGBClassifierMixIn: # pylint: disable=too-few-public-methods
"""MixIn for classification."""
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
def _load_model_attributes(self, booster: Booster) -> None:
config = json.loads(booster.save_config())
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
# binary classification is treated as regression in XGBoost.
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
class XGBRankerMixIn: # pylint: disable=too-few-public-methods
"""MixIn for ranking, defines the _estimator_type usually defined in scikit-learn
base classes.
@@ -845,21 +832,38 @@ class XGBModel(XGBModelBase):
self.get_booster().load_model(fname)
meta_str = self.get_booster().attr("scikit_learn")
if meta_str is None:
return
if meta_str is not None:
meta = json.loads(meta_str)
t = meta.get("_estimator_type", None)
if t is not None and t != self._get_type():
raise TypeError(
"Loading an estimator with different type. Expecting: "
f"{self._get_type()}, got: {t}"
)
meta = json.loads(meta_str)
t = meta.get("_estimator_type", None)
if t is not None and t != self._get_type():
raise TypeError(
"Loading an estimator with different type. Expecting: "
f"{self._get_type()}, got: {t}"
)
self.feature_types = self.get_booster().feature_types
self.get_booster().set_attr(scikit_learn=None)
config = json.loads(self.get_booster().save_config())
self._load_model_attributes(config)
load_model.__doc__ = f"""{Booster.load_model.__doc__}"""
def _load_model_attributes(self, config: dict) -> None:
"""Load model attributes without hyper-parameters."""
from sklearn.base import is_classifier
booster = self.get_booster()
self.objective = config["learner"]["objective"]["name"]
self.booster = config["learner"]["gradient_booster"]["name"]
self.base_score = config["learner"]["learner_model_param"]["base_score"]
self.feature_types = booster.feature_types
if is_classifier(self):
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
# binary classification is treated as regression in XGBoost.
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
# pylint: disable=too-many-branches
def _configure_fit(
self,
@@ -1409,7 +1413,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) ->
Number of boosting rounds.
""",
)
class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
@_deprecate_positional_args
def __init__(
@@ -1637,10 +1641,6 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
def classes_(self) -> np.ndarray:
return np.arange(self.n_classes_)
def load_model(self, fname: ModelIn) -> None:
super().load_model(fname)
self._load_model_attributes(self.get_booster())
@xgboost_model_doc(
"scikit-learn API for XGBoost random forest classification.",

Submodule rocgputreeshap deleted from 2fea6734e8

View File

@@ -16,11 +16,6 @@ if (USE_CUDA)
target_sources(objxgboost PRIVATE ${CUDA_SOURCES})
endif (USE_CUDA)
if (USE_HIP)
file(GLOB_RECURSE HIP_SOURCES *.hip *.hip.h)
target_sources(objxgboost PRIVATE ${HIP_SOURCES})
endif (USE_HIP)
target_include_directories(objxgboost
PRIVATE
${xgboost_SOURCE_DIR}/include
@@ -38,7 +33,6 @@ msvc_use_static_runtime()
# This grouping organises source files nicely in visual studio
auto_source_group("${CUDA_SOURCES}")
auto_source_group("${HIP_SOURCES}")
auto_source_group("${CPU_SOURCES}")
#-- End object library

View File

@@ -70,14 +70,12 @@ XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) {
using GlobalConfigAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
namespace xgboost {
void XGBBuildInfoDevice(Json *p_info) {
auto &info = *p_info;
info["USE_CUDA"] = Boolean{false};
info["USE_NCCL"] = Boolean{false};
info["USE_HIP"] = Boolean{false};
info["USE_RCCL"] = Boolean{false};
info["USE_RMM"] = Boolean{false};
}
} // namespace xgboost
@@ -281,7 +279,7 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
API_END();
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#ifndef XGBOOST_USE_CUDA
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {
API_BEGIN();
common::AssertGPUSupport();
@@ -1191,7 +1189,7 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
API_END();
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
DMatrixHandle, xgboost::bst_ulong const **,
xgboost::bst_ulong *, const float **) {

View File

@@ -17,11 +17,7 @@ namespace xgboost {
void XGBBuildInfoDevice(Json *p_info) {
auto &info = *p_info;
#if defined(XGBOOST_USE_CUDA)
info["USE_CUDA"] = true;
#elif defined(XGBOOST_USE_HIP)
info["USE_HIP"] = true;
#endif
std::vector<Json> v{Json{Integer{THRUST_MAJOR_VERSION}}, Json{Integer{THRUST_MINOR_VERSION}},
Json{Integer{THRUST_SUBMINOR_VERSION}}};
@@ -34,13 +30,8 @@ void XGBBuildInfoDevice(Json *p_info) {
info["USE_NCCL"] = Boolean{true};
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
info["NCCL_VERSION"] = v;
#elif defined(XGBOOST_USE_RCCL)
info["USE_RCCL"] = Boolean{true};
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
info["RCCL_VERSION"] = v;
#else
info["USE_NCCL"] = Boolean{false};
info["USE_RCCL"] = Boolean{false};
#endif
#if defined(XGBOOST_USE_RMM)

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "c_api.cu"
#endif

View File

@@ -175,7 +175,7 @@ inline float GetMissing(Json const &config) {
// Safe guard some global variables from being changed by XGBoost.
class XGBoostAPIGuard {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#if defined(XGBOOST_USE_CUDA)
int32_t device_id_ {0};
void SetGPUAttribute();

View File

@@ -51,7 +51,7 @@ void Communicator::Init(Json const& config) {
}
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#ifndef XGBOOST_USE_CUDA
void Communicator::Finalize() {
communicator_->Shutdown();
communicator_.reset(new NoOpCommunicator());

View File

@@ -5,7 +5,7 @@
#include "device_communicator.cuh"
#include "device_communicator_adapter.cuh"
#include "noop_communicator.h"
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
#ifdef XGBOOST_USE_NCCL
#include "nccl_device_communicator.cuh"
#endif
@@ -28,7 +28,7 @@ DeviceCommunicator* Communicator::GetDevice(int device_ordinal) {
communicator_->GetWorldSize() != old_world_size) {
old_device_ordinal = device_ordinal;
old_world_size = communicator_->GetWorldSize();
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
#ifdef XGBOOST_USE_NCCL
switch (type_) {
case CommunicatorType::kRabit:
device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, false));

View File

@@ -98,7 +98,7 @@ class Communicator {
/** @brief Get the communicator instance. */
static Communicator *Get() { return communicator_.get(); }
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#if defined(XGBOOST_USE_CUDA)
/**
* @brief Get the device communicator.
*
@@ -230,7 +230,7 @@ class Communicator {
static thread_local std::unique_ptr<Communicator> communicator_;
static thread_local CommunicatorType type_;
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#if defined(XGBOOST_USE_CUDA)
static thread_local std::unique_ptr<DeviceCommunicator> device_communicator_;
#endif

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "communicator.cu"
#endif

View File

@@ -41,6 +41,7 @@ class DeviceCommunicatorAdapter : public DeviceCommunicator {
}
dh::safe_cuda(cudaSetDevice(device_ordinal_));
segments->clear();
segments->resize(world_size_, 0);
segments->at(rank_) = length_bytes;
@@ -59,7 +60,6 @@ class DeviceCommunicatorAdapter : public DeviceCommunicator {
Broadcast(host_buffer_.data() + offset, as_bytes, i);
offset += as_bytes;
}
dh::safe_cuda(cudaMemcpy(receive_buffer->data().get(), host_buffer_.data(), total_bytes,
cudaMemcpyDefault));
}

View File

@@ -1,7 +1,7 @@
/*!
* Copyright 2023 XGBoost contributors
*/
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
#if defined(XGBOOST_USE_NCCL)
#include "nccl_device_communicator.cuh"
namespace xgboost {

View File

@@ -35,22 +35,12 @@ class NcclDeviceCommunicator : public DeviceCommunicator {
private:
static constexpr std::size_t kUuidLength =
#if defined(XGBOOST_USE_CUDA)
sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
#elif defined(XGBOOST_USE_HIP)
sizeof(hipUUID) / sizeof(uint64_t);
#endif
void GetCudaUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) const {
#if defined(XGBOOST_USE_CUDA)
cudaDeviceProp prob{};
dh::safe_cuda(cudaGetDeviceProperties(&prob, device_ordinal_));
std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
#elif defined(XGBOOST_USE_HIP)
hipUUID id;
hipDeviceGetUuid(&id, device_ordinal_);
std::memcpy(uuid.data(), static_cast<void *>(&id), sizeof(id));
#endif
}
static std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) {

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "nccl_device_communicator.cu"
#endif

View File

@@ -10,13 +10,7 @@
#include <cstddef> // size_t
#include <cstdint> // int32_t
#if defined(XGBOOST_USE_HIP)
#include <hipcub/hipcub.hpp>
#elif defined(XGBOOST_USE_CUDA)
#include <cub/cub.cuh> // DispatchSegmentedRadixSort,NullType,DoubleBuffer
#endif
#include <iterator> // distance
#include <limits> // numeric_limits
#include <type_traits> // conditional_t,remove_const_t
@@ -45,7 +39,6 @@ static void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_st
using OffsetT = int;
// Null value type
#if defined(XGBOOST_USE_CUDA)
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
cub::DoubleBuffer<cub::NullType> d_values;
@@ -54,20 +47,6 @@ static void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_st
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,
num_segments, d_begin_offsets, d_end_offsets, begin_bit,
end_bit, false, ctx->Stream(), debug_synchronous)));
#elif defined(XGBOOST_USE_HIP)
if (IS_DESCENDING) {
rocprim::segmented_radix_sort_pairs_desc<KeyT, hipcub::NullType, BeginOffsetIteratorT>(d_temp_storage,
temp_storage_bytes, d_keys_in, d_keys_out, nullptr, nullptr, num_items,
num_segments, d_begin_offsets, d_end_offsets,
begin_bit, end_bit, ctx->Stream(), debug_synchronous);
}
else {
rocprim::segmented_radix_sort_pairs<KeyT, hipcub::NullType, BeginOffsetIteratorT>(d_temp_storage,
temp_storage_bytes, d_keys_in, d_keys_out, nullptr, nullptr, num_items,
num_segments, d_begin_offsets, d_end_offsets,
begin_bit, end_bit, ctx->Stream(), debug_synchronous);
}
#endif
}
// Wrapper around cub sort for easier `descending` sort.
@@ -81,18 +60,14 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets, dh::CUDAStreamView stream,
int begin_bit = 0, int end_bit = sizeof(KeyT) * 8) {
#if defined(XGBOOST_USE_CUDA)
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in), d_values_out);
#endif
// In old version of cub, num_items in dispatch is also int32_t, no way to change.
using OffsetT = std::conditional_t<dh::BuildWithCUDACub() && dh::HasThrustMinorVer<13>(),
std::size_t, std::int32_t>;
CHECK_LE(num_items, std::numeric_limits<OffsetT>::max());
// For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation
#if defined(XGBOOST_USE_CUDA)
#if THRUST_MAJOR_VERSION >= 2
dh::safe_cuda((cub::DispatchSegmentedRadixSort<
descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
@@ -113,18 +88,6 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
d_begin_offsets, d_end_offsets, begin_bit,
end_bit, false, stream, false)));
#endif
#elif defined(XGBOOST_USE_HIP)
if (descending) {
rocprim::segmented_radix_sort_pairs_desc(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out,
d_values_in, d_values_out, num_items, num_segments,
d_begin_offsets, d_end_offsets, begin_bit, end_bit, stream, false);
}
else {
rocprim::segmented_radix_sort_pairs(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out,
d_values_in, d_values_out, num_items, num_segments, d_begin_offsets, d_end_offsets,
begin_bit, end_bit, stream, false);
}
#endif
}
} // namespace detail

View File

@@ -17,18 +17,14 @@
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
#include "device_helpers.cuh"
#elif defined(__HIPCC__)
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
#include "device_helpers.hip.h"
#endif // defined(__CUDACC__) || defined(__HIPCC__)
#endif // defined(__CUDACC__)
#include "xgboost/span.h"
#include "common.h"
namespace xgboost {
#if defined(__CUDACC__) || defined(__HIPCC__)
#if defined(__CUDACC__)
using BitFieldAtomicType = unsigned long long; // NOLINT
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
@@ -52,7 +48,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
return old;
}
#endif // defined(__CUDACC__) || defined(__HIPCC__)
#endif // defined(__CUDACC__)
/**
* @brief A non-owning type with auxiliary methods defined for manipulating bits.
@@ -110,7 +106,7 @@ struct BitFieldContainer {
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
return common::DivRoundUp(size, kValueSize);
}
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
#if defined(__CUDA_ARCH__)
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
size_t min_size = min(NumValues(), rhs.NumValues());
@@ -127,9 +123,9 @@ struct BitFieldContainer {
}
return *this;
}
#endif // #if defined(__CUDA_ARCH__) || defined(__HIPCC__)
#endif // #if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
#if defined(__CUDA_ARCH__)
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
size_t min_size = min(NumValues(), rhs.NumValues());
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
@@ -148,7 +144,7 @@ struct BitFieldContainer {
}
#endif // defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
#if defined(__CUDA_ARCH__)
__device__ auto Set(index_type pos) noexcept(true) {
Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = Data()[pos_v.int_pos];
@@ -163,14 +159,6 @@ struct BitFieldContainer {
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
}
#ifdef __HIPCC__
void Clear(index_type pos) noexcept(true) {
Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = Data()[pos_v.int_pos];
value_type clear_bit = ~(kOne << pos_v.bit_pos);
value &= clear_bit;
}
#endif
#else
void Set(index_type pos) noexcept(true) {
Pos pos_v = Direction::Shift(ToBitPos(pos));
@@ -184,7 +172,7 @@ struct BitFieldContainer {
value_type clear_bit = ~(kOne << pos_v.bit_pos);
value &= clear_bit;
}
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
#endif // defined(__CUDA_ARCH__)
XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) {
pos_v = Direction::Shift(pos_v);

View File

@@ -55,7 +55,7 @@ void EscapeU8(std::string const &string, std::string *p_buffer) {
}
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
int AllVisibleGPUs() { return 0; }
#endif // !defined(XGBOOST_USE_CUDA)

View File

@@ -2,7 +2,6 @@
* Copyright 2018-2022 XGBoost contributors
*/
#include "common.h"
#include "cuda_to_hip.h"
namespace xgboost {
namespace common {

View File

@@ -26,12 +26,6 @@
#define WITH_CUDA() true
#elif defined(__HIPCC__)
#include <thrust/system/hip/error.h>
#include <thrust/system_error.h>
#define WITH_CUDA() true
#else
#define WITH_CUDA() false
@@ -45,8 +39,8 @@ namespace dh {
*/
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, int line)
{
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
int line) {
if (code != cudaSuccess) {
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
std::string{file} + ": " + // NOLINT
@@ -54,23 +48,7 @@ inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, int line
}
return code;
}
#elif defined(__HIPCC__)
/*
* Error handling functions
*/
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
inline hipError_t ThrowOnCudaError(hipError_t code, const char *file, int line)
{
if (code != hipSuccess) {
LOG(FATAL) << thrust::system_error(code, thrust::hip_category(),
std::string{file} + ": " + // NOLINT
std::to_string(line)).what();
}
return code;
}
#endif
#endif // defined(__CUDACC__)
} // namespace dh
namespace xgboost::common {
@@ -181,9 +159,9 @@ class Range {
int AllVisibleGPUs();
inline void AssertGPUSupport() {
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#ifndef XGBOOST_USE_CUDA
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA && XGBOOST_USE_HIP
#endif // XGBOOST_USE_CUDA
}
inline void AssertOneAPISupport() {
@@ -194,7 +172,7 @@ inline void AssertOneAPISupport() {
void SetDevice(std::int32_t device);
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
inline void SetDevice(std::int32_t device) {
if (device >= 0) {
AssertGPUSupport();

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "common.cu"
#endif

View File

@@ -11,11 +11,9 @@
#include "common.h"
#if defined(__CUDACC__)
#ifdef __CUDACC__
#include "device_helpers.cuh"
#elif defined(__HIPCC__)
#include "device_helpers.hip.h"
#endif // __CUDACC__ || __HIPCC__
#endif // __CUDACC__
namespace xgboost {
namespace common {
@@ -107,7 +105,7 @@ class CompressedBufferWriter {
}
}
#if defined(__CUDACC__) || defined(__HIPCC__)
#ifdef __CUDACC__
__device__ void AtomicWriteSymbol
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
size_t ibit_start = offset * symbol_bits_;
@@ -121,7 +119,7 @@ class CompressedBufferWriter {
symbol >>= 8;
}
}
#endif // __CUDACC__ || __HIPCC__
#endif // __CUDACC__
template <typename IterT>
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {

View File

@@ -4,6 +4,7 @@
#ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_
#define XGBOOST_COMMON_CUDA_CONTEXT_CUH_
#include <thrust/execution_policy.h>
#include "device_helpers.cuh"
namespace xgboost {

View File

@@ -72,23 +72,11 @@ class pinned_allocator {
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
pointer result(nullptr);
#if defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipHostMalloc(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
#else
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
#endif
return result;
}
inline void deallocate(pointer p, size_type) {
#if defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipHostFree(p));
#else
dh::safe_cuda(cudaFreeHost(p));
#endif
} // NOLINT
inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT

View File

@@ -1,71 +0,0 @@
/**
* Copyright 2017-2023 XGBoost contributors
*/
#pragma once
#if defined(XGBOOST_USE_HIP)
#define cudaSuccess hipSuccess
#define cudaGetLastError hipGetLastError
#define cudaStream_t hipStream_t
#define cudaStreamCreate hipStreamCreate
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
#define cudaStreamDestroy hipStreamDestroy
#define cudaStreamWaitEvent hipStreamWaitEvent
#define cudaStreamSynchronize hipStreamSynchronize
#define cudaStreamPerThread hipStreamPerThread
#define cudaStreamLegacy hipStreamLegacy
#define cudaEvent_t hipEvent_t
#define cudaEventCreate hipEventCreate
#define cudaEventCreateWithFlags hipEventCreateWithFlags
#define cudaEventDestroy hipEventDestroy
#define cudaGetDevice hipGetDevice
#define cudaSetDevice hipSetDevice
#define cudaGetDeviceCount hipGetDeviceCount
#define cudaDeviceSynchronize hipDeviceSynchronize
#define cudaGetDeviceProperties hipGetDeviceProperties
#define cudaDeviceGetAttribute hipDeviceGetAttribute
#define cudaMallocHost hipMallocHost
#define cudaFreeHost hipFreeHost
#define cudaMalloc hipMalloc
#define cudaFree hipFree
#define cudaMemcpy hipMemcpy
#define cudaMemcpyAsync hipMemcpyAsync
#define cudaMemcpyDefault hipMemcpyDefault
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemcpyHostToHost hipMemcpyHostToHost
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
#define cudaMemsetAsync hipMemsetAsync
#define cudaMemset hipMemset
#define cudaPointerAttributes hipPointerAttribute_t
#define cudaPointerGetAttributes hipPointerGetAttributes
#define cudaMemGetInfo hipMemGetInfo
#define cudaFuncSetAttribute hipFuncSetAttribute
#define cudaDevAttrMultiProcessorCount hipDeviceAttributeMultiprocessorCount
#define cudaOccupancyMaxActiveBlocksPerMultiprocessor hipOccupancyMaxActiveBlocksPerMultiprocessor
namespace thrust {
namespace hip {
}
}
namespace thrust {
namespace cuda = thrust::hip;
}
namespace hipcub {
}
namespace cub = hipcub;
#endif

View File

@@ -2,8 +2,6 @@
* Copyright 2017-2023 XGBoost contributors
*/
#pragma once
#if defined(XGBOOST_USE_CUDA)
#include <thrust/binary_search.h> // thrust::upper_bound
#include <thrust/device_malloc_allocator.h>
#include <thrust/device_ptr.h>
@@ -1220,7 +1218,3 @@ class LDGIterator {
}
};
} // namespace dh
#elif defined(XGBOOST_USE_HIP)
#include "device_helpers.hip.h"
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -84,19 +84,10 @@ __global__ void GetColumnSizeSharedMemKernel(IterSpan<BatchIt> batch_iter,
template <std::uint32_t kBlockThreads, typename Kernel>
std::uint32_t EstimateGridSize(std::int32_t device, Kernel kernel, std::size_t shared_mem) {
int n_mps = 0;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipDeviceGetAttribute(&n_mps, hipDeviceAttributeMultiprocessorCount, device));
#endif
int n_blocks_per_mp = 0;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
kBlockThreads, shared_mem));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
kBlockThreads, shared_mem));
#endif
std::uint32_t grid_size = n_blocks_per_mp * n_mps;
return grid_size;
}
@@ -179,7 +170,6 @@ void GetColumnSizesScan(int device, size_t num_columns, std::size_t num_cuts_per
column_sizes_scan->begin(), [=] __device__(size_t column_size) {
return thrust::min(num_cuts_per_feature, column_size);
});
thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer());
thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(),
@@ -304,7 +294,6 @@ void ProcessSlidingWindow(AdapterBatch const &batch, MetaInfo const &info,
&column_sizes_scan,
&sorted_entries);
dh::XGBDeviceAllocator<char> alloc;
thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
sorted_entries.end(), detail::EntryCompareOp());
@@ -364,13 +353,11 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
bst_group_t group_idx = dh::SegmentId(d_group_ptr, ridx);
return weights[group_idx];
});
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
weight_iter + begin, weight_iter + end,
batch_iter + begin,
d_temp_weights.data(), // output
is_valid);
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
} else {
CHECK_EQ(batch.NumRows(), weights.size());
@@ -379,13 +366,11 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
[=]__device__(size_t idx) -> float {
return weights[batch.GetElement(idx).row_idx];
});
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
weight_iter + begin, weight_iter + end,
batch_iter + begin,
d_temp_weights.data(), // output
is_valid);
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
}

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "hist_util.cu"
#endif

View File

@@ -1,7 +1,7 @@
/**
* Copyright 2017-2023 by XGBoost contributors
*/
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#ifndef XGBOOST_USE_CUDA
// dummy implementation of HostDeviceVector in case CUDA is not used
@@ -199,4 +199,4 @@ template class HostDeviceVector<std::size_t>;
} // namespace xgboost
#endif // XGBOOST_USE_CUDA && !defined(XGBOOST_USE_HIP)
#endif // XGBOOST_USE_CUDA

View File

@@ -139,7 +139,6 @@ class HostDeviceVectorImpl {
auto ptr = other->ConstDevicePointer();
SetDevice();
CHECK_EQ(this->DeviceIdx(), other->DeviceIdx());
dh::safe_cuda(cudaMemcpyAsync(this->DevicePointer() + ori_size,
ptr,
other->Size() * sizeof(T),
@@ -196,7 +195,6 @@ class HostDeviceVectorImpl {
gpu_access_ = access;
if (data_h_.size() != data_d_->size()) { data_h_.resize(data_d_->size()); }
SetDevice();
dh::safe_cuda(cudaMemcpy(data_h_.data(),
data_d_->data().get(),
data_d_->size() * sizeof(T),
@@ -213,7 +211,6 @@ class HostDeviceVectorImpl {
// data is on the host
LazyResizeDevice(data_h_.size());
SetDevice();
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(),
data_h_.data(),
data_d_->size() * sizeof(T),
@@ -242,7 +239,6 @@ class HostDeviceVectorImpl {
LazyResizeDevice(Size());
gpu_access_ = GPUAccess::kWrite;
SetDevice();
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), other->data_d_->data().get(),
data_d_->size() * sizeof(T), cudaMemcpyDefault));
}
@@ -252,7 +248,6 @@ class HostDeviceVectorImpl {
LazyResizeDevice(Size());
gpu_access_ = GPUAccess::kWrite;
SetDevice();
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), begin,
data_d_->size() * sizeof(T), cudaMemcpyDefault));
}

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "host_device_vector.cu"
#endif

View File

@@ -12,8 +12,7 @@
namespace xgboost {
namespace linalg {
template <typename T, int32_t D, typename Fn>
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
{
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
dh::safe_cuda(cudaSetDevice(t.DeviceIdx()));
static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
"For function with return, use transform instead.");
@@ -29,8 +28,7 @@ void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s
}
template <typename T, int32_t D, typename Fn>
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
{
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
if (t.Contiguous()) {
auto ptr = t.Values().data();
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); });

View File

@@ -42,7 +42,7 @@ void ElementWiseKernelHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&& f
}
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
template <typename T, int32_t D, typename Fn>
void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
common::AssertGPUSupport();
@@ -60,7 +60,7 @@ void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn)
}
ElementWiseKernelHost(t, ctx->Threads(), fn);
}
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#endif // !defined(XGBOOST_USE_CUDA)
template <typename T, std::int32_t kDim>
auto cbegin(TensorView<T, kDim> const& v) { // NOLINT

View File

@@ -143,7 +143,7 @@ CheckNAN(T) {
return false;
}
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIPCC__)
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
bool CheckNAN(double v);
@@ -152,27 +152,23 @@ bool CheckNAN(double v);
XGBOOST_DEVICE bool inline CheckNAN(float x) {
#if defined(__CUDA_ARCH__)
return isnan(x);
#elif defined(__HIPCC__)
return __builtin_isnan(x);
#else
return std::isnan(x);
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
#endif // defined(__CUDA_ARCH__)
}
XGBOOST_DEVICE bool inline CheckNAN(double x) {
#if defined(__CUDA_ARCH__)
return isnan(x);
#elif defined(__HIPCC__)
return __builtin_isnan(x);
#else
return std::isnan(x);
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
#endif // defined(__CUDA_ARCH__)
}
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
// GPU version is not uploaded in CRAN anyway.
// Specialize only when using R with CPU.
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
double LogGamma(double v);
#else // Not R or R with GPU.
@@ -195,7 +191,7 @@ XGBOOST_DEVICE inline T LogGamma(T v) {
#endif // _MSC_VER
}
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
} // namespace common
} // namespace xgboost

View File

@@ -15,7 +15,6 @@ double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
values.SetDevice(ctx->gpu_id);
auto const d_values = values.ConstDeviceSpan();
dh::XGBCachingDeviceAllocator<char> alloc;
return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
thrust::plus<float>{});
}

View File

@@ -99,12 +99,12 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
namespace cuda_impl {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
inline double Reduce(Context const*, HostDeviceVector<float> const&) {
AssertGPUSupport();
return 0;
}
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda_impl
/**

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "numeric.cu"
#endif

View File

@@ -109,7 +109,6 @@ template <typename T, typename U>
void CopyTo(Span<T> out, Span<U> src) {
CHECK_EQ(out.size(), src.size());
static_assert(std::is_same<std::remove_cv_t<T>, std::remove_cv_t<T>>::value);
dh::safe_cuda(cudaMemcpyAsync(out.data(), src.data(),
out.size_bytes(),
cudaMemcpyDefault));
@@ -163,7 +162,6 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
// Compute output ptr
auto transform_it =
thrust::make_zip_iterator(thrust::make_tuple(x_ptr.data(), y_ptr.data()));
thrust::transform(
thrust::cuda::par(alloc), transform_it, transform_it + x_ptr.size(),
out_ptr.data(),
@@ -213,7 +211,6 @@ void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
dh::safe_cuda(cudaSetDevice(device));
CHECK_EQ(d_x.size() + d_y.size(), out.size());
CHECK_EQ(x_ptr.size(), out_ptr.size());
CHECK_EQ(y_ptr.size(), out_ptr.size());
@@ -312,7 +309,6 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
common::Span<OffsetT> cuts_ptr,
size_t total_cuts, Span<float> weights) {
dh::safe_cuda(cudaSetDevice(device_));
Span<SketchEntry> out;
dh::device_vector<SketchEntry> cuts;
bool first_window = this->Current().empty();
@@ -382,7 +378,6 @@ size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_col
});
// Reverse scan to accumulate weights into first duplicated element on left.
auto val_it = thrust::make_reverse_iterator(dh::tend(entries));
thrust::inclusive_scan_by_key(
thrust::cuda::par(alloc), key_it, key_it + entries.size(),
val_it, val_it,
@@ -448,7 +443,6 @@ void SketchContainer::Prune(size_t to) {
void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
Span<SketchEntry const> that) {
dh::safe_cuda(cudaSetDevice(device_));
timer_.Start(__func__);
if (this->Current().size() == 0) {
CHECK_EQ(this->columns_ptr_.HostVector().back(), 0);
@@ -484,7 +478,6 @@ void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
void SketchContainer::FixError() {
dh::safe_cuda(cudaSetDevice(device_));
auto d_columns_ptr = this->columns_ptr_.ConstDeviceSpan();
auto in = dh::ToSpan(this->Current());
dh::LaunchN(in.size(), [=] __device__(size_t idx) {
@@ -642,12 +635,10 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts, bool is_column_split) {
CHECK_EQ(num_columns_, d_in_columns_ptr.size() - 1);
max_values.resize(d_in_columns_ptr.size() - 1);
dh::caching_device_vector<SketchEntry> d_max_values(d_in_columns_ptr.size() - 1);
thrust::reduce_by_key(thrust::cuda::par(alloc), key_it, key_it + in_cut_values.size(), val_it,
thrust::make_discard_iterator(), d_max_values.begin(),
thrust::equal_to<bst_feature_t>{},
[] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values));
auto max_it = MakeIndexTransformIter([&](auto i) {
if (IsCat(h_feature_types, i)) {

View File

@@ -175,7 +175,6 @@ class SketchContainer {
template <typename KeyComp = thrust::equal_to<size_t>>
size_t Unique(KeyComp key_comp = thrust::equal_to<size_t>{}) {
timer_.Start(__func__);
dh::safe_cuda(cudaSetDevice(device_));
this->columns_ptr_.SetDevice(device_);
Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan();
@@ -187,13 +186,11 @@ class SketchContainer {
dh::XGBCachingDeviceAllocator<char> alloc;
d_column_scan = this->columns_ptr_.DeviceSpan();
size_t n_uniques = dh::SegmentedUnique(
thrust::cuda::par(alloc), d_column_scan.data(),
d_column_scan.data() + d_column_scan.size(), entries.data(),
entries.data() + entries.size(), scan_out.DevicePointer(),
entries.data(), detail::SketchUnique{}, key_comp);
this->columns_ptr_.Copy(scan_out);
CHECK(!this->columns_ptr_.HostCanRead());

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "quantile.cu"
#endif

View File

@@ -62,7 +62,7 @@ common::Span<std::size_t const> RankingCache::MakeRankOnCPU(Context const* ctx,
return rank;
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,
common::Span<float const>) {
@@ -108,9 +108,9 @@ void NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
});
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#endif // !defined(XGBOOST_USE_CUDA)
DMLC_REGISTER_PARAMETER(LambdaRankParam);
@@ -120,7 +120,7 @@ void PreCache::InitOnCPU(Context const*, MetaInfo const& info) {
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
void PreCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA)
@@ -130,9 +130,9 @@ void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#endif // !defined(XGBOOST_USE_CUDA)
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
std::string out_name;

View File

@@ -23,10 +23,6 @@
#include "xgboost/logging.h" // for CHECK
#include "xgboost/span.h" // for Span
#if defined(XGBOOST_USE_HIP)
#include <hipcub/hipcub.hpp>
#endif
namespace xgboost::ltr {
namespace cuda_impl {
void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels,

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "ranking_utils.cu"
#endif

View File

@@ -18,7 +18,6 @@
namespace xgboost {
namespace common {
namespace cuda_impl {
void Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights, linalg::Tensor<float, 1>* out) {
CHECK_GE(t.Shape(1), 1);

View File

@@ -216,7 +216,6 @@ void SegmentedWeightedQuantile(Context const* ctx, AlphaIt alpha_it, SegIt seg_b
detail::SegOp<SegIt>{seg_beg, seg_end});
auto scan_val = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
detail::WeightOp<WIter>{w_begin, d_sorted_idx});
thrust::inclusive_scan_by_key(thrust::cuda::par(caching), scan_key, scan_key + n_weights,
scan_val, weights_cdf.begin());

View File

@@ -112,7 +112,7 @@ void Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWe
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
linalg::Tensor<float, 1>*) {
common::AssertGPUSupport();
@@ -120,7 +120,7 @@ inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalW
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda_impl
/**

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "stats.cu"
#endif

View File

@@ -25,12 +25,12 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::common::ProbabilityDistributionType);
namespace xgboost {
namespace common {
#if !defined(__CUDACC__) && !defined(__HIPCC__)
#ifndef __CUDACC__
using std::log;
using std::fmax;
#endif // __CUDACC__ && __HIPCC__
#endif // __CUDACC__
enum class CensoringType : uint8_t {
kUncensored, kRightCensored, kLeftCensored, kIntervalCensored

View File

@@ -59,11 +59,9 @@ std::size_t SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
});
dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(),
out_group_threads_ptr.size());
size_t total = 0;
std::size_t total = 0;
dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
sizeof(total), cudaMemcpyDeviceToHost));
sizeof(total), cudaMemcpyDeviceToHost));
return total;
}

View File

@@ -19,9 +19,7 @@
#if defined (__CUDACC__)
#include "device_helpers.cuh"
#elif defined(__HIPCC__)
#include "device_helpers.hip.h"
#endif // defined (__CUDACC__) || defined(__HIPCC__)
#endif // defined (__CUDACC__)
namespace xgboost {
namespace common {
@@ -30,7 +28,7 @@ constexpr size_t kBlockThreads = 256;
namespace detail {
#if defined(__CUDACC__) || defined(__HIPCC__)
#if defined(__CUDACC__)
template <typename Functor, typename... SpanType>
__global__ void LaunchCUDAKernel(Functor _func, Range _range,
SpanType... _spans) {
@@ -38,7 +36,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range,
_func(i, _spans...);
}
}
#endif // defined(__CUDACC__) || defined(__HIPCC__)
#endif // defined(__CUDACC__)
} // namespace detail
@@ -129,7 +127,7 @@ class Transform {
UnpackShard(device, _vectors...);
}
#if defined(__CUDACC__) || defined(__HIPCC__)
#if defined(__CUDACC__)
template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
typename... HDV>
void LaunchCUDA(Functor _func, HDV*... _vectors) const {
@@ -142,9 +140,7 @@ class Transform {
// granularity is used in data vector.
size_t shard_size = range_size;
Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)};
dh::safe_cuda(cudaSetDevice(device_));
const int kGrids =
static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads));
if (kGrids == 0) {
@@ -163,7 +159,7 @@ class Transform {
LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
}
#endif // defined(__CUDACC__) || defined(__HIPCC__)
#endif // defined(__CUDACC__)
template <typename... HDV>
void LaunchCPU(Functor func, HDV *...vectors) const {

View File

@@ -28,7 +28,7 @@ Context::Context() : cfs_cpu_count_{common::GetCfsCPUCount()} {}
namespace {
inline constexpr char const* kDevice = "device";
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
DeviceOrd CUDAOrdinal(DeviceOrd device, bool) {
device = DeviceOrd::CPU();
return device;
@@ -208,10 +208,10 @@ std::int32_t Context::Threads() const {
return n_threads;
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
CUDAContext const* Context::CUDACtx() const {
common::AssertGPUSupport();
return nullptr;
}
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "context.cu"
#endif

View File

@@ -20,9 +20,7 @@ void ArrayInterfaceHandler::SyncCudaStream(std::int64_t stream) {
* case where 0 might be given should either use None, 1, or 2 instead for
* clarity.
*/
#ifndef XGBOOST_USE_HIP
LOG(FATAL) << "Invalid stream ID in array interface: " << stream;
#endif
case 1:
// default legacy stream
break;
@@ -40,8 +38,6 @@ bool ArrayInterfaceHandler::IsCudaPtr(void const* ptr) {
if (!ptr) {
return false;
}
#if defined(XGBOOST_USE_CUDA)
cudaPointerAttributes attr;
auto err = cudaPointerGetAttributes(&attr, ptr);
// reset error
@@ -63,34 +59,5 @@ bool ArrayInterfaceHandler::IsCudaPtr(void const* ptr) {
// other errors, `cudaErrorNoDevice`, `cudaErrorInsufficientDriver` etc.
return false;
}
#elif defined(XGBOOST_USE_HIP)
hipPointerAttribute_t attr;
auto err = hipPointerGetAttributes(&attr, ptr);
// reset error
CHECK_EQ(err, hipGetLastError());
if (err == hipErrorInvalidValue) {
return false;
} else if (err == hipSuccess) {
#if HIP_VERSION_MAJOR < 6
switch (attr.memoryType) {
case hipMemoryTypeHost:
return false;
default:
return true;
}
#else
switch (attr.type) {
case hipMemoryTypeUnregistered:
case hipMemoryTypeHost:
return false;
default:
return true;
}
#endif
return true;
} else {
return false;
}
#endif
}
} // namespace xgboost

View File

@@ -28,8 +28,6 @@
#if defined(XGBOOST_USE_CUDA)
#include "cuda_fp16.h"
#elif defined(XGBOOST_USE_HIP)
#include <hip/hip_fp16.h>
#endif
namespace xgboost {
@@ -310,12 +308,12 @@ class ArrayInterfaceHandler {
template <typename T, typename E = void>
struct ToDType;
// float
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#if defined(XGBOOST_USE_CUDA)
template <>
struct ToDType<__half> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
};
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#endif // defined(XGBOOST_USE_CUDA)
template <>
struct ToDType<float> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4;
@@ -364,10 +362,10 @@ struct ToDType<int64_t> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8;
};
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#endif // !defined(XGBOOST_USE_CUDA)
/**
* \brief A type erased view over __array_interface__ protocol defined by numpy
@@ -465,11 +463,11 @@ class ArrayInterface {
CHECK(sizeof(long double) == 16) << error::NoF128();
type = T::kF16;
} else if (typestr[1] == 'f' && typestr[2] == '2') {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#if defined(XGBOOST_USE_CUDA)
type = T::kF2;
#else
LOG(FATAL) << "Half type is not supported.";
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#endif // defined(XGBOOST_USE_CUDA)
} else if (typestr[1] == 'f' && typestr[2] == '4') {
type = T::kF4;
} else if (typestr[1] == 'f' && typestr[2] == '8') {
@@ -504,15 +502,15 @@ class ArrayInterface {
using T = ArrayInterfaceHandler::Type;
switch (type) {
case T::kF2: {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#if defined(XGBOOST_USE_CUDA)
return func(reinterpret_cast<__half const *>(data));
#endif // defined(XGBOOST_USE_CUDA) || || defined(XGBOOST_USE_HIP)
#endif // defined(XGBOOST_USE_CUDA)
}
case T::kF4:
return func(reinterpret_cast<float const *>(data));
case T::kF8:
return func(reinterpret_cast<double const *>(data));
#if defined(__CUDA_ARCH__ ) || defined(__HIPCC__)
#ifdef __CUDA_ARCH__
case T::kF16: {
// CUDA device code doesn't support long double.
SPAN_CHECK(false);
@@ -559,7 +557,7 @@ class ArrayInterface {
static_assert(sizeof...(index) <= D, "Invalid index.");
return this->DispatchCall([=](auto const *p_values) -> T {
std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...);
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#if defined(XGBOOST_USE_CUDA)
// No operator defined for half -> size_t
using Type = std::conditional_t<
std::is_same<__half,
@@ -569,7 +567,7 @@ class ArrayInterface {
return static_cast<T>(static_cast<Type>(p_values[offset]));
#else
return static_cast<T>(p_values[offset]);
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#endif // defined(XGBOOST_USE_CUDA)
});
}
@@ -606,7 +604,7 @@ void DispatchDType(ArrayInterface<D> const array, std::int32_t device, Fn fn) {
};
switch (array.type) {
case ArrayInterfaceHandler::kF2: {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#if defined(XGBOOST_USE_CUDA)
dispatch(__half{});
#endif
break;

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "array_interface.cu"
#endif

View File

@@ -800,9 +800,9 @@ void MetaInfo::Validate(std::int32_t device) const {
}
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
void MetaInfo::SetInfoFromCUDA(Context const&, StringView, Json) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#endif // !defined(XGBOOST_USE_CUDA)
bool MetaInfo::IsVerticalFederated() const {
return collective::IsFederated() && IsColumnSplit();

View File

@@ -44,7 +44,6 @@ void CopyTensorInfoImpl(CUDAContext const* ctx, Json arr_interface, linalg::Tens
std::copy(array.shape, array.shape + D, shape.data());
// set data
data->Resize(array.n);
dh::safe_cuda(cudaMemcpyAsync(data->DevicePointer(), array.data, array.n * sizeof(T),
cudaMemcpyDefault, ctx->Stream()));
});
@@ -96,10 +95,8 @@ void CopyQidImpl(ArrayInterface<1> array_interface, std::vector<bst_group_t>* p_
}
});
bool non_dec = true;
dh::safe_cuda(cudaMemcpy(&non_dec, flag.data().get(), sizeof(bool),
cudaMemcpyDeviceToHost));
CHECK(non_dec) << "`qid` must be sorted in increasing order along with data.";
size_t bytes = 0;
dh::caching_device_vector<uint32_t> out(array_interface.Shape(0));
@@ -117,10 +114,8 @@ void CopyQidImpl(ArrayInterface<1> array_interface, std::vector<bst_group_t>* p_
group_ptr_.clear();
group_ptr_.resize(h_num_runs_out + 1, 0);
dh::XGBCachingDeviceAllocator<char> alloc;
thrust::inclusive_scan(thrust::cuda::par(alloc), cnt.begin(),
cnt.begin() + h_num_runs_out, cnt.begin());
thrust::copy(cnt.begin(), cnt.begin() + h_num_runs_out,
group_ptr_.begin() + 1);
}

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "data.cu"
#endif

View File

@@ -122,9 +122,7 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
device_idx_ = dh::CudaGetPointerDevice(first_column.data);
CHECK_NE(device_idx_, Context::kCpuId);
dh::safe_cuda(cudaSetDevice(device_idx_));
for (auto& json_col : json_columns) {
auto column = ArrayInterface<1>(get<Object const>(json_col));
columns.push_back(column);
@@ -213,7 +211,6 @@ template <typename AdapterBatchT>
std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offset, int device_idx,
float missing) {
dh::safe_cuda(cudaSetDevice(device_idx));
IsValidFunctor is_valid(missing);
dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes()));
@@ -247,7 +244,6 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
&offset[ridx]),
static_cast<unsigned long long>(cnt)); // NOLINT
});
dh::XGBCachingDeviceAllocator<char> alloc;
bst_row_t row_stride =
dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),

View File

@@ -1,7 +1,7 @@
/**
* Copyright 2019-2023, XGBoost contributors
*/
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#ifndef XGBOOST_USE_CUDA
#include "ellpack_page.h"
@@ -52,4 +52,5 @@ size_t EllpackPage::Size() const {
return impl_->Cuts();
}
} // namespace xgboost
#endif // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
#endif // XGBOOST_USE_CUDA

View File

@@ -18,10 +18,6 @@
#include "gradient_index.h"
#include "xgboost/data.h"
#if defined(XGBOOST_USE_HIP)
#include <rocprim/rocprim.hpp>
#endif
namespace xgboost {
EllpackPage::EllpackPage() : impl_{new EllpackPageImpl()} {}
@@ -106,7 +102,6 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
row_stride(row_stride),
n_rows(n_rows) {
monitor_.Init("ellpack_page");
dh::safe_cuda(cudaSetDevice(device));
monitor_.Start("InitCompressedData");
@@ -261,8 +256,6 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType cons
// Go one level down into cub::DeviceScan API to set OffsetT as 64 bit
// So we don't crash on n > 2^31
size_t temp_storage_bytes = 0;
#if defined(__CUDACC__)
using DispatchScan =
cub::DispatchScan<decltype(key_value_index_iter), decltype(out),
TupleScanOp<Tuple>, cub::NullType, int64_t>;
@@ -285,17 +278,6 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType cons
key_value_index_iter, out, TupleScanOp<Tuple>(),
cub::NullType(), batch.Size(), nullptr, false);
#endif
#elif defined (__HIPCC__)
rocprim::inclusive_scan(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp<Tuple>());
dh::TemporaryArray<char> temp_storage(temp_storage_bytes);
rocprim::inclusive_scan(temp_storage.data().get(), temp_storage_bytes, key_value_index_iter, out, batch.Size(),
TupleScanOp<Tuple>());
#endif
}
void WriteNullValues(EllpackPageImpl* dst, int device_idx,
@@ -552,13 +534,11 @@ void EllpackPageImpl::CreateHistIndices(int device,
// copy data entries to device.
if (row_batch.data.DeviceCanRead()) {
auto const& d_data = row_batch.data.ConstDeviceSpan();
dh::safe_cuda(cudaMemcpyAsync(
entries_d.data().get(), d_data.data() + ent_cnt_begin,
n_entries * sizeof(Entry), cudaMemcpyDefault));
} else {
const std::vector<Entry>& data_vec = row_batch.data.ConstHostVector();
dh::safe_cuda(cudaMemcpyAsync(
entries_d.data().get(), data_vec.data() + ent_cnt_begin,
n_entries * sizeof(Entry), cudaMemcpyDefault));

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "ellpack_page.cu"
#endif

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "ellpack_page_raw_format.cu"
#endif

View File

@@ -45,7 +45,7 @@ class EllpackPageSource : public PageSourceIncMixIn<EllpackPage> {
void Fetch() final;
};
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
inline void EllpackPageSource::Fetch() {
// silent the warning about unused variables.
(void)(row_stride_);

View File

@@ -1,4 +0,0 @@
#if defined(XGBOOST_USE_HIP)
#include "ellpack_page_source.cu"
#endif

View File

@@ -65,12 +65,12 @@ GHistIndexMatrix::GHistIndexMatrix(MetaInfo const &info, common::HistogramCuts &
max_numeric_bins_per_feat(max_bin_per_feat),
isDense_{info.num_col_ * info.num_row_ == info.num_nonzero_} {}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#if !defined(XGBOOST_USE_CUDA)
GHistIndexMatrix::GHistIndexMatrix(Context const *, MetaInfo const &, EllpackPage const &,
BatchParam const &) {
common::AssertGPUSupport();
}
#endif // defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#endif // defined(XGBOOST_USE_CUDA)
GHistIndexMatrix::~GHistIndexMatrix() = default;

Some files were not shown because too many files have changed in this diff Show More