use __HIPCC__ for device code
This commit is contained in:
parent
fd3ad29dc4
commit
74677e4e9d
@ -231,7 +231,8 @@ if (USE_HIP)
|
|||||||
|
|
||||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -I${HIP_INCLUDE_DIRS} -I${HIP_INCLUDE_DIRS}/hip")
|
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -I${HIP_INCLUDE_DIRS} -I${HIP_INCLUDE_DIRS}/hip")
|
||||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -Wunused-result -w")
|
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -Wunused-result -w")
|
||||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -D__HIP_PLATFORM_AMD__")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_INCLUDE_DIRS}")
|
||||||
add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
|
add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
|
||||||
endif (USE_HIP)
|
endif (USE_HIP)
|
||||||
|
|
||||||
|
|||||||
@ -58,19 +58,19 @@
|
|||||||
/*!
|
/*!
|
||||||
* \brief Tag function as usable by device
|
* \brief Tag function as usable by device
|
||||||
*/
|
*/
|
||||||
#if defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
|
||||||
#define XGBOOST_DEVICE __host__ __device__
|
#define XGBOOST_DEVICE __host__ __device__
|
||||||
#else
|
#else
|
||||||
#define XGBOOST_DEVICE
|
#define XGBOOST_DEVICE
|
||||||
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
#if defined(__CUDA__) || defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
|
#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
|
||||||
#define XGBOOST_DEV_INLINE __device__ __forceinline__
|
#define XGBOOST_DEV_INLINE __device__ __forceinline__
|
||||||
#else
|
#else
|
||||||
#define XGBOOST_HOST_DEV_INLINE
|
#define XGBOOST_HOST_DEV_INLINE
|
||||||
#define XGBOOST_DEV_INLINE
|
#define XGBOOST_DEV_INLINE
|
||||||
#endif // defined(__CUDA__) || defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
// These check are for Makefile.
|
// These check are for Makefile.
|
||||||
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
||||||
|
|||||||
@ -58,11 +58,11 @@
|
|||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
// Sets a function to call instead of cudaSetDevice();
|
// Sets a function to call instead of cudaSetDevice();
|
||||||
// only added for testing
|
// only added for testing
|
||||||
void SetCudaSetDeviceHandler(void (*handler)(int));
|
void SetCudaSetDeviceHandler(void (*handler)(int));
|
||||||
#endif // __CUDACC__ || __HIP_PLATFORM_AMD__
|
#endif // __CUDACC__ || __HIPCC__
|
||||||
|
|
||||||
template <typename T> struct HostDeviceVectorImpl;
|
template <typename T> struct HostDeviceVectorImpl;
|
||||||
|
|
||||||
|
|||||||
@ -30,11 +30,11 @@
|
|||||||
|
|
||||||
// decouple it from xgboost.
|
// decouple it from xgboost.
|
||||||
#ifndef LINALG_HD
|
#ifndef LINALG_HD
|
||||||
#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
|
||||||
#define LINALG_HD __host__ __device__
|
#define LINALG_HD __host__ __device__
|
||||||
#else
|
#else
|
||||||
#define LINALG_HD
|
#define LINALG_HD
|
||||||
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
|
||||||
#endif // LINALG_HD
|
#endif // LINALG_HD
|
||||||
|
|
||||||
namespace xgboost::linalg {
|
namespace xgboost::linalg {
|
||||||
@ -118,7 +118,7 @@ using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
|
|||||||
|
|
||||||
template <int32_t n, typename Fn>
|
template <int32_t n, typename Fn>
|
||||||
LINALG_HD constexpr auto UnrollLoop(Fn fn) {
|
LINALG_HD constexpr auto UnrollLoop(Fn fn) {
|
||||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
#pragma unroll n
|
#pragma unroll n
|
||||||
#endif // defined __CUDA_ARCH__
|
#endif // defined __CUDA_ARCH__
|
||||||
for (int32_t i = 0; i < n; ++i) {
|
for (int32_t i = 0; i < n; ++i) {
|
||||||
@ -136,7 +136,7 @@ int32_t NativePopc(T v) {
|
|||||||
inline LINALG_HD int Popc(uint32_t v) {
|
inline LINALG_HD int Popc(uint32_t v) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__)
|
||||||
return __popc(v);
|
return __popc(v);
|
||||||
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__)
|
||||||
return __builtin_popcount(v);
|
return __builtin_popcount(v);
|
||||||
#elif defined(_MSC_VER)
|
#elif defined(_MSC_VER)
|
||||||
return __popcnt(v);
|
return __popcnt(v);
|
||||||
@ -148,7 +148,7 @@ inline LINALG_HD int Popc(uint32_t v) {
|
|||||||
inline LINALG_HD int Popc(uint64_t v) {
|
inline LINALG_HD int Popc(uint64_t v) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__)
|
||||||
return __popcll(v);
|
return __popcll(v);
|
||||||
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__)
|
||||||
return __builtin_popcountll(v);
|
return __builtin_popcountll(v);
|
||||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||||
return __popcnt64(v);
|
return __popcnt64(v);
|
||||||
|
|||||||
@ -41,7 +41,7 @@
|
|||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__HIPCC__)
|
||||||
#include <hip/hip_runtime.h>
|
#include <hip/hip_runtime.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -106,7 +106,7 @@ namespace common {
|
|||||||
|
|
||||||
#define SPAN_CHECK KERNEL_CHECK
|
#define SPAN_CHECK KERNEL_CHECK
|
||||||
|
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__HIPCC__)
|
||||||
// Usual logging facility is not available inside device code.
|
// Usual logging facility is not available inside device code.
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
@ -157,7 +157,7 @@ namespace common {
|
|||||||
|
|
||||||
#endif // defined(XGBOOST_STRICT_R_MODE)
|
#endif // defined(XGBOOST_STRICT_R_MODE)
|
||||||
|
|
||||||
#endif // __CUDA_ARCH__ || __HIP_PLATFORM_AMD__
|
#endif // __CUDA_ARCH__ || __HIPCC__
|
||||||
|
|
||||||
#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))
|
#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))
|
||||||
|
|
||||||
|
|||||||
@ -16,18 +16,18 @@
|
|||||||
#include <thrust/device_ptr.h>
|
#include <thrust/device_ptr.h>
|
||||||
|
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__HIPCC__)
|
||||||
#include <thrust/copy.h>
|
#include <thrust/copy.h>
|
||||||
#include <thrust/device_ptr.h>
|
#include <thrust/device_ptr.h>
|
||||||
#include "device_helpers.hip.h"
|
#include "device_helpers.hip.h"
|
||||||
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "xgboost/span.h" // for Span
|
#include "xgboost/span.h" // for Span
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
using BitFieldAtomicType = unsigned long long; // NOLINT
|
using BitFieldAtomicType = unsigned long long; // NOLINT
|
||||||
|
|
||||||
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
|
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
|
||||||
@ -51,7 +51,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
|
|||||||
|
|
||||||
return old;
|
return old;
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief A non-owning type with auxiliary methods defined for manipulating bits.
|
* @brief A non-owning type with auxiliary methods defined for manipulating bits.
|
||||||
@ -109,7 +109,7 @@ struct BitFieldContainer {
|
|||||||
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
|
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
|
||||||
return common::DivRoundUp(size, kValueSize);
|
return common::DivRoundUp(size, kValueSize);
|
||||||
}
|
}
|
||||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
|
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
|
||||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
size_t min_size = min(NumValues(), rhs.NumValues());
|
size_t min_size = min(NumValues(), rhs.NumValues());
|
||||||
@ -126,9 +126,9 @@ struct BitFieldContainer {
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
#endif // #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // #if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
|
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
|
||||||
size_t min_size = min(NumValues(), rhs.NumValues());
|
size_t min_size = min(NumValues(), rhs.NumValues());
|
||||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -147,7 +147,7 @@ struct BitFieldContainer {
|
|||||||
}
|
}
|
||||||
#endif // defined(__CUDA_ARCH__)
|
#endif // defined(__CUDA_ARCH__)
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
__device__ auto Set(index_type pos) noexcept(true) {
|
__device__ auto Set(index_type pos) noexcept(true) {
|
||||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||||
value_type& value = Data()[pos_v.int_pos];
|
value_type& value = Data()[pos_v.int_pos];
|
||||||
@ -164,7 +164,7 @@ struct BitFieldContainer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* compiler hack */
|
/* compiler hack */
|
||||||
#if defined(__HIP_PLATFORM_AMD__)
|
#if defined(__HIPCC__)
|
||||||
void Clear(index_type pos) noexcept(true) {
|
void Clear(index_type pos) noexcept(true) {
|
||||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||||
value_type& value = Data()[pos_v.int_pos];
|
value_type& value = Data()[pos_v.int_pos];
|
||||||
@ -185,7 +185,7 @@ struct BitFieldContainer {
|
|||||||
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||||
value &= clear_bit;
|
value &= clear_bit;
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
|
|
||||||
XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) {
|
XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) {
|
||||||
pos_v = Direction::Shift(pos_v);
|
pos_v = Direction::Shift(pos_v);
|
||||||
|
|||||||
@ -25,7 +25,7 @@
|
|||||||
|
|
||||||
#define WITH_CUDA() true
|
#define WITH_CUDA() true
|
||||||
|
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__HIPCC__)
|
||||||
#include "cuda_to_hip.h"
|
#include "cuda_to_hip.h"
|
||||||
#include <thrust/system/hip/error.h>
|
#include <thrust/system/hip/error.h>
|
||||||
#include <thrust/system_error.h>
|
#include <thrust/system_error.h>
|
||||||
@ -39,7 +39,7 @@
|
|||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__)
|
||||||
|
|
||||||
namespace dh {
|
namespace dh {
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
/*
|
/*
|
||||||
* Error handling functions
|
* Error handling functions
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -13,9 +13,9 @@
|
|||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__HIPCC__)
|
||||||
#include "device_helpers.hip.h"
|
#include "device_helpers.hip.h"
|
||||||
#endif // __CUDACC__ || __HIP_PLATFORM_AMD__
|
#endif // __CUDACC__ || __HIPCC__
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
@ -107,7 +107,7 @@ class CompressedBufferWriter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
__device__ void AtomicWriteSymbol
|
__device__ void AtomicWriteSymbol
|
||||||
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
|
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
|
||||||
size_t ibit_start = offset * symbol_bits_;
|
size_t ibit_start = offset * symbol_bits_;
|
||||||
@ -121,7 +121,7 @@ class CompressedBufferWriter {
|
|||||||
symbol >>= 8;
|
symbol >>= 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // __CUDACC__ || __HIP_PLATFORM_AMD__
|
#endif // __CUDACC__ || __HIPCC__
|
||||||
|
|
||||||
template <typename IterT>
|
template <typename IterT>
|
||||||
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
|
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
|
||||||
|
|||||||
@ -143,7 +143,7 @@ CheckNAN(T) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIP_PLATFORM_AMD__)
|
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIPCC__)
|
||||||
|
|
||||||
bool CheckNAN(double v);
|
bool CheckNAN(double v);
|
||||||
|
|
||||||
@ -152,21 +152,21 @@ bool CheckNAN(double v);
|
|||||||
XGBOOST_DEVICE bool inline CheckNAN(float x) {
|
XGBOOST_DEVICE bool inline CheckNAN(float x) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__)
|
||||||
return isnan(x);
|
return isnan(x);
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__HIPCC__)
|
||||||
return __builtin_isnan(x);
|
return __builtin_isnan(x);
|
||||||
#else
|
#else
|
||||||
return std::isnan(x);
|
return std::isnan(x);
|
||||||
#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
}
|
}
|
||||||
|
|
||||||
XGBOOST_DEVICE bool inline CheckNAN(double x) {
|
XGBOOST_DEVICE bool inline CheckNAN(double x) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__)
|
||||||
return isnan(x);
|
return isnan(x);
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__HIPCC__)
|
||||||
return __builtin_isnan(x);
|
return __builtin_isnan(x);
|
||||||
#else
|
#else
|
||||||
return std::isnan(x);
|
return std::isnan(x);
|
||||||
#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
|
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
|
||||||
|
|||||||
@ -25,12 +25,12 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::common::ProbabilityDistributionType);
|
|||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
|
|
||||||
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
|
#if !defined(__CUDACC__) && !defined(__HIPCC__)
|
||||||
|
|
||||||
using std::log;
|
using std::log;
|
||||||
using std::fmax;
|
using std::fmax;
|
||||||
|
|
||||||
#endif // __CUDACC__ && __HIP_PLATFORM_AMD__
|
#endif // __CUDACC__ && __HIPCC__
|
||||||
|
|
||||||
enum class CensoringType : uint8_t {
|
enum class CensoringType : uint8_t {
|
||||||
kUncensored, kRightCensored, kLeftCensored, kIntervalCensored
|
kUncensored, kRightCensored, kLeftCensored, kIntervalCensored
|
||||||
|
|||||||
@ -19,9 +19,9 @@
|
|||||||
|
|
||||||
#if defined (__CUDACC__)
|
#if defined (__CUDACC__)
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__HIPCC__)
|
||||||
#include "device_helpers.hip.h"
|
#include "device_helpers.hip.h"
|
||||||
#endif // defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined (__CUDACC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
@ -30,7 +30,7 @@ constexpr size_t kBlockThreads = 256;
|
|||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
template <typename Functor, typename... SpanType>
|
template <typename Functor, typename... SpanType>
|
||||||
__global__ void LaunchCUDAKernel(Functor _func, Range _range,
|
__global__ void LaunchCUDAKernel(Functor _func, Range _range,
|
||||||
SpanType... _spans) {
|
SpanType... _spans) {
|
||||||
@ -38,7 +38,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range,
|
|||||||
_func(i, _spans...);
|
_func(i, _spans...);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
@ -129,7 +129,7 @@ class Transform {
|
|||||||
UnpackShard(device, _vectors...);
|
UnpackShard(device, _vectors...);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
|
template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
|
||||||
typename... HDV>
|
typename... HDV>
|
||||||
void LaunchCUDA(Functor _func, HDV*... _vectors) const {
|
void LaunchCUDA(Functor _func, HDV*... _vectors) const {
|
||||||
@ -161,7 +161,7 @@ class Transform {
|
|||||||
|
|
||||||
LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
|
LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
|
|
||||||
template <typename... HDV>
|
template <typename... HDV>
|
||||||
void LaunchCPU(Functor func, HDV *...vectors) const {
|
void LaunchCPU(Functor func, HDV *...vectors) const {
|
||||||
|
|||||||
@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
#if defined(XGBOOST_USE_CUDA)
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
#include "cuda_fp16.h"
|
#include "cuda_fp16.h"
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
#include <hip/hip_fp16.h>
|
#include <hip/hip_fp16.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -323,7 +323,7 @@ class ArrayInterfaceHandler {
|
|||||||
template <typename T, typename E = void>
|
template <typename T, typename E = void>
|
||||||
struct ToDType;
|
struct ToDType;
|
||||||
// float
|
// float
|
||||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||||
template <>
|
template <>
|
||||||
struct ToDType<__half> {
|
struct ToDType<__half> {
|
||||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
|
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
|
||||||
@ -478,7 +478,7 @@ class ArrayInterface {
|
|||||||
CHECK(sizeof(long double) == 16) << error::NoF128();
|
CHECK(sizeof(long double) == 16) << error::NoF128();
|
||||||
type = T::kF16;
|
type = T::kF16;
|
||||||
} else if (typestr[1] == 'f' && typestr[2] == '2') {
|
} else if (typestr[1] == 'f' && typestr[2] == '2') {
|
||||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||||
type = T::kF2;
|
type = T::kF2;
|
||||||
#else
|
#else
|
||||||
LOG(FATAL) << "Half type is not supported.";
|
LOG(FATAL) << "Half type is not supported.";
|
||||||
@ -517,7 +517,7 @@ class ArrayInterface {
|
|||||||
using T = ArrayInterfaceHandler::Type;
|
using T = ArrayInterfaceHandler::Type;
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case T::kF2: {
|
case T::kF2: {
|
||||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||||
return func(reinterpret_cast<__half const *>(data));
|
return func(reinterpret_cast<__half const *>(data));
|
||||||
#endif // defined(XGBOOST_USE_CUDA)
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
}
|
}
|
||||||
@ -525,7 +525,7 @@ class ArrayInterface {
|
|||||||
return func(reinterpret_cast<float const *>(data));
|
return func(reinterpret_cast<float const *>(data));
|
||||||
case T::kF8:
|
case T::kF8:
|
||||||
return func(reinterpret_cast<double const *>(data));
|
return func(reinterpret_cast<double const *>(data));
|
||||||
#if defined(__CUDA_ARCH__ ) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDA_ARCH__ ) || defined(XGBOOST_USE_HIP)
|
||||||
case T::kF16: {
|
case T::kF16: {
|
||||||
// CUDA device code doesn't support long double.
|
// CUDA device code doesn't support long double.
|
||||||
SPAN_CHECK(false);
|
SPAN_CHECK(false);
|
||||||
@ -572,7 +572,7 @@ class ArrayInterface {
|
|||||||
static_assert(sizeof...(index) <= D, "Invalid index.");
|
static_assert(sizeof...(index) <= D, "Invalid index.");
|
||||||
return this->DispatchCall([=](auto const *p_values) -> T {
|
return this->DispatchCall([=](auto const *p_values) -> T {
|
||||||
std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...);
|
std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...);
|
||||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||||
// No operator defined for half -> size_t
|
// No operator defined for half -> size_t
|
||||||
using Type = std::conditional_t<
|
using Type = std::conditional_t<
|
||||||
std::is_same<__half,
|
std::is_same<__half,
|
||||||
@ -606,7 +606,7 @@ template <typename Fn>
|
|||||||
auto DispatchDType(ArrayInterfaceHandler::Type dtype, Fn dispatch) {
|
auto DispatchDType(ArrayInterfaceHandler::Type dtype, Fn dispatch) {
|
||||||
switch (dtype) {
|
switch (dtype) {
|
||||||
case ArrayInterfaceHandler::kF2: {
|
case ArrayInterfaceHandler::kF2: {
|
||||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||||
return dispatch(__half{});
|
return dispatch(__half{});
|
||||||
#else
|
#else
|
||||||
LOG(FATAL) << "half type is only supported for CUDA input.";
|
LOG(FATAL) << "half type is only supported for CUDA input.";
|
||||||
|
|||||||
@ -281,7 +281,7 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType cons
|
|||||||
cub::NullType(), batch.Size(), nullptr, false);
|
cub::NullType(), batch.Size(), nullptr, false);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#elif defined (__HIP_PLATFORM_AMD__)
|
#elif defined (__HIPCC__)
|
||||||
|
|
||||||
rocprim::inclusive_scan(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp<Tuple>());
|
rocprim::inclusive_scan(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp<Tuple>());
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,7 @@ namespace xgboost {
|
|||||||
namespace data {
|
namespace data {
|
||||||
struct LabelsCheck {
|
struct LabelsCheck {
|
||||||
XGBOOST_DEVICE bool operator()(float y) {
|
XGBOOST_DEVICE bool operator()(float y) {
|
||||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
return ::isnan(y) || ::isinf(y);
|
return ::isnan(y) || ::isinf(y);
|
||||||
#else
|
#else
|
||||||
return std::isnan(y) || std::isinf(y);
|
return std::isnan(y) || std::isinf(y);
|
||||||
|
|||||||
@ -124,7 +124,7 @@ class TreeEvaluator {
|
|||||||
[[nodiscard]] XGBOOST_DEVICE float Divide(float a, float b) const {
|
[[nodiscard]] XGBOOST_DEVICE float Divide(float a, float b) const {
|
||||||
#ifdef __CUDA_ARCH__
|
#ifdef __CUDA_ARCH__
|
||||||
return __fdividef(a, b);
|
return __fdividef(a, b);
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
#elif defined(__HIPCC__)
|
||||||
return a / b;
|
return a / b;
|
||||||
#else
|
#else
|
||||||
return a / b;
|
return a / b;
|
||||||
|
|||||||
@ -15,10 +15,10 @@
|
|||||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||||
#include "../helpers.h"
|
#include "../helpers.h"
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
#include <xgboost/json.h>
|
#include <xgboost/json.h>
|
||||||
#include "../../../src/data/device_adapter.cuh"
|
#include "../../../src/data/device_adapter.cuh"
|
||||||
#endif // __CUDACC__, __HIP_PLATFORM_AMD__
|
#endif // __CUDACC__, __HIPCC__
|
||||||
|
|
||||||
// Some helper functions used to test both GPU and CPU algorithms
|
// Some helper functions used to test both GPU and CPU algorithms
|
||||||
//
|
//
|
||||||
@ -47,7 +47,7 @@ inline std::vector<float> GenerateRandomWeights(int num_rows) {
|
|||||||
return w;
|
return w;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
inline data::CupyAdapter AdapterFromData(const thrust::device_vector<float> &x,
|
inline data::CupyAdapter AdapterFromData(const thrust::device_vector<float> &x,
|
||||||
int num_rows, int num_columns) {
|
int num_rows, int num_columns) {
|
||||||
Json array_interface{Object()};
|
Json array_interface{Object()};
|
||||||
|
|||||||
@ -99,7 +99,7 @@ struct TestRBeginREnd {
|
|||||||
|
|
||||||
Span<float> s (arr);
|
Span<float> s (arr);
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||||
auto rbeg = dh::trbegin(s);
|
auto rbeg = dh::trbegin(s);
|
||||||
auto rend = dh::trend(s);
|
auto rend = dh::trend(s);
|
||||||
#else
|
#else
|
||||||
|
|||||||
@ -14,7 +14,7 @@
|
|||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
namespace {
|
namespace {
|
||||||
constexpr DeviceOrd TransformDevice() {
|
constexpr DeviceOrd TransformDevice() {
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
return DeviceOrd::CUDA(0);
|
return DeviceOrd::CUDA(0);
|
||||||
#else
|
#else
|
||||||
return DeviceOrd::CPU();
|
return DeviceOrd::CPU();
|
||||||
@ -51,7 +51,7 @@ TEST(Transform, DeclareUnifiedTest(Basic)) {
|
|||||||
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
|
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
|
#if !defined(__CUDACC__) && !defined(__HIPCC__)
|
||||||
TEST(TransformDeathTest, Exception) {
|
TEST(TransformDeathTest, Exception) {
|
||||||
size_t const kSize{16};
|
size_t const kSize{16};
|
||||||
std::vector<float> h_in(kSize);
|
std::vector<float> h_in(kSize);
|
||||||
|
|||||||
@ -28,19 +28,19 @@
|
|||||||
#include "filesystem.h" // dmlc::TemporaryDirectory
|
#include "filesystem.h" // dmlc::TemporaryDirectory
|
||||||
#include "xgboost/linalg.h"
|
#include "xgboost/linalg.h"
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
#define DeclareUnifiedTest(name) GPU ## name
|
#define DeclareUnifiedTest(name) GPU ## name
|
||||||
#else
|
#else
|
||||||
#define DeclareUnifiedTest(name) name
|
#define DeclareUnifiedTest(name) name
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
#define GPUIDX (common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank())
|
#define GPUIDX (common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank())
|
||||||
#else
|
#else
|
||||||
#define GPUIDX (-1)
|
#define GPUIDX (-1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
#define DeclareUnifiedDistributedTest(name) MGPU ## name
|
#define DeclareUnifiedDistributedTest(name) MGPU ## name
|
||||||
#else
|
#else
|
||||||
#define DeclareUnifiedDistributedTest(name) name
|
#define DeclareUnifiedDistributedTest(name) name
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
#include "../../src/data/ellpack_page.cuh"
|
#include "../../src/data/ellpack_page.cuh"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -12,7 +12,7 @@
|
|||||||
#include "./helpers.h" // for RandomDataGenerator
|
#include "./helpers.h" // for RandomDataGenerator
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
namespace {
|
namespace {
|
||||||
class HistogramCutsWrapper : public common::HistogramCuts {
|
class HistogramCutsWrapper : public common::HistogramCuts {
|
||||||
public:
|
public:
|
||||||
|
|||||||
@ -20,7 +20,7 @@
|
|||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace metric {
|
namespace metric {
|
||||||
|
|
||||||
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
|
#if !defined(__CUDACC__) && !defined(__HIPCC__)
|
||||||
TEST(Metric, AMS) {
|
TEST(Metric, AMS) {
|
||||||
auto ctx = MakeCUDACtx(GPUIDX);
|
auto ctx = MakeCUDACtx(GPUIDX);
|
||||||
EXPECT_ANY_THROW(Metric::Create("ams", &ctx));
|
EXPECT_ANY_THROW(Metric::Create("ams", &ctx));
|
||||||
|
|||||||
@ -278,7 +278,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
|
|||||||
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"tweedie-nloglik@1.1"});
|
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"tweedie-nloglik@1.1"});
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||||
TEST(Objective, CPU_vs_CUDA) {
|
TEST(Objective, CPU_vs_CUDA) {
|
||||||
Context ctx = MakeCUDACtx(GPUIDX);
|
Context ctx = MakeCUDACtx(GPUIDX);
|
||||||
|
|
||||||
@ -356,7 +356,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// CoxRegression not implemented in GPU code, no need for testing.
|
// CoxRegression not implemented in GPU code, no need for testing.
|
||||||
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
|
#if !defined(__CUDACC__) && !defined(__HIPCC__)
|
||||||
TEST(Objective, CoxRegressionGPair) {
|
TEST(Objective, CoxRegressionGPair) {
|
||||||
Context ctx = MakeCUDACtx(GPUIDX);
|
Context ctx = MakeCUDACtx(GPUIDX);
|
||||||
std::vector<std::pair<std::string, std::string>> args;
|
std::vector<std::pair<std::string, std::string>> args;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user