add HIP flags, common

This commit is contained in:
amdsc21 2023-03-08 03:11:49 +01:00
parent 1e1c7fd8d5
commit 840f15209c
10 changed files with 44 additions and 33 deletions

View File

@ -13,18 +13,18 @@
#include <string> #include <string>
#include <vector> #include <vector>
#if defined(__CUDACC__) #if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
#include <thrust/copy.h> #include <thrust/copy.h>
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
#include "device_helpers.cuh" #include "device_helpers.cuh"
#endif // defined(__CUDACC__) #endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
#include "xgboost/span.h" #include "xgboost/span.h"
#include "common.h" #include "common.h"
namespace xgboost { namespace xgboost {
#if defined(__CUDACC__) #if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
using BitFieldAtomicType = unsigned long long; // NOLINT using BitFieldAtomicType = unsigned long long; // NOLINT
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address, __forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
@ -48,7 +48,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
return old; return old;
} }
#endif // defined(__CUDACC__) #endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
/*! /*!
* \brief A non-owning type with auxiliary methods defined for manipulating bits. * \brief A non-owning type with auxiliary methods defined for manipulating bits.
@ -100,7 +100,7 @@ struct BitFieldContainer {
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) { XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
return common::DivRoundUp(size, kValueSize); return common::DivRoundUp(size, kValueSize);
} }
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) { __device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto tid = blockIdx.x * blockDim.x + threadIdx.x;
size_t min_size = min(bits_.size(), rhs.bits_.size()); size_t min_size = min(bits_.size(), rhs.bits_.size());
@ -117,9 +117,9 @@ struct BitFieldContainer {
} }
return *this; return *this;
} }
#endif // #if defined(__CUDA_ARCH__) #endif // #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) { __device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
size_t min_size = min(bits_.size(), rhs.bits_.size()); size_t min_size = min(bits_.size(), rhs.bits_.size());
auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto tid = blockIdx.x * blockDim.x + threadIdx.x;
@ -138,7 +138,7 @@ struct BitFieldContainer {
} }
#endif // defined(__CUDA_ARCH__) #endif // defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
__device__ auto Set(index_type pos) { __device__ auto Set(index_type pos) {
Pos pos_v = Direction::Shift(ToBitPos(pos)); Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = bits_[pos_v.int_pos]; value_type& value = bits_[pos_v.int_pos];
@ -166,7 +166,7 @@ struct BitFieldContainer {
value_type clear_bit = ~(kOne << pos_v.bit_pos); value_type clear_bit = ~(kOne << pos_v.bit_pos);
value &= clear_bit; value &= clear_bit;
} }
#endif // defined(__CUDA_ARCH__) #endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
XGBOOST_DEVICE bool Check(Pos pos_v) const { XGBOOST_DEVICE bool Check(Pos pos_v) const {
pos_v = Direction::Shift(pos_v); pos_v = Direction::Shift(pos_v);

View File

@ -27,6 +27,12 @@
#define WITH_CUDA() true #define WITH_CUDA() true
#elif defined(__HIP_PLATFORM_AMD__)
#include <thrust/system/hip/error.h>
#include <thrust/system_error.h>
#define WITH_CUDA() true
#else #else
#define WITH_CUDA() false #define WITH_CUDA() false
@ -34,7 +40,7 @@
#endif // defined(__CUDACC__) #endif // defined(__CUDACC__)
namespace dh { namespace dh {
#if defined(__CUDACC__) #if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
/* /*
* Error handling functions * Error handling functions
*/ */
@ -49,7 +55,7 @@ inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
} }
return code; return code;
} }
#endif // defined(__CUDACC__) #endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
} // namespace dh } // namespace dh
namespace xgboost { namespace xgboost {
@ -167,7 +173,7 @@ class Range {
int AllVisibleGPUs(); int AllVisibleGPUs();
inline void AssertGPUSupport() { inline void AssertGPUSupport() {
#ifndef XGBOOST_USE_CUDA #if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
LOG(FATAL) << "XGBoost version not compiled with GPU support."; LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA #endif // XGBOOST_USE_CUDA
} }
@ -180,7 +186,7 @@ inline void AssertOneAPISupport() {
void SetDevice(std::int32_t device); void SetDevice(std::int32_t device);
#if !defined(XGBOOST_USE_CUDA) #if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
inline void SetDevice(std::int32_t device) { inline void SetDevice(std::int32_t device) {
if (device >= 0) { if (device >= 0) {
AssertGPUSupport(); AssertGPUSupport();

View File

@ -11,9 +11,9 @@
#include "common.h" #include "common.h"
#ifdef __CUDACC__ #if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
#include "device_helpers.cuh" #include "device_helpers.cuh"
#endif // __CUDACC__ #endif // __CUDACC__ || __HIP_PLATFORM_AMD__
namespace xgboost { namespace xgboost {
namespace common { namespace common {
@ -105,7 +105,7 @@ class CompressedBufferWriter {
} }
} }
#ifdef __CUDACC__ #if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
__device__ void AtomicWriteSymbol __device__ void AtomicWriteSymbol
(CompressedByteT* buffer, uint64_t symbol, size_t offset) { (CompressedByteT* buffer, uint64_t symbol, size_t offset) {
size_t ibit_start = offset * symbol_bits_; size_t ibit_start = offset * symbol_bits_;
@ -119,7 +119,7 @@ class CompressedBufferWriter {
symbol >>= 8; symbol >>= 8;
} }
} }
#endif // __CUDACC__ #endif // __CUDACC__ || __HIP_PLATFORM_AMD__
template <typename IterT> template <typename IterT>
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) { void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {

View File

@ -53,7 +53,7 @@
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 #endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__) #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
#else // In device code and CUDA < 600 #else // In device code and CUDA < 600
__device__ __forceinline__ double atomicAdd(double* address, double val) { // NOLINT __device__ __forceinline__ double atomicAdd(double* address, double val) { // NOLINT
@ -702,6 +702,8 @@ typename std::iterator_traits<T>::value_type SumReduction(T in, int nVals) {
constexpr std::pair<int, int> CUDAVersion() { constexpr std::pair<int, int> CUDAVersion() {
#if defined(__CUDACC_VER_MAJOR__) #if defined(__CUDACC_VER_MAJOR__)
return std::make_pair(__CUDACC_VER_MAJOR__, __CUDACC_VER_MINOR__); return std::make_pair(__CUDACC_VER_MAJOR__, __CUDACC_VER_MINOR__);
#elif defined(__HIP_PLATFORM_AMD__)
return std::make_pair(HIP_LIBRARY_MAJOR_VERSION, HIP_VERSION_MINOR);
#else #else
// clang/clang-tidy // clang/clang-tidy
return std::make_pair((CUDA_VERSION) / 1000, (CUDA_VERSION) % 100 / 10); return std::make_pair((CUDA_VERSION) / 1000, (CUDA_VERSION) % 100 / 10);
@ -1329,6 +1331,9 @@ class CUDAStreamView {
// CUDA > 11.0 // CUDA > 11.0
dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault)); dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
#endif // __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0: #endif // __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0:
#elif defined(__HIP_PLATFORM_AMD__)
dh::safe_cuda(hipStreamWaitEvent(stream_, hipEvent_t{e}, hipEventWaitDefault));
#else // clang #else // clang
dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault)); dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
#endif // defined(__CUDACC_VER_MAJOR__) #endif // defined(__CUDACC_VER_MAJOR__)

View File

@ -148,32 +148,32 @@ CheckNAN(T) {
return false; return false;
} }
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) #if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIP_PLATFORM_AMD__)
bool CheckNAN(double v); bool CheckNAN(double v);
#else #else
XGBOOST_DEVICE bool inline CheckNAN(float x) { XGBOOST_DEVICE bool inline CheckNAN(float x) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
return isnan(x); return isnan(x);
#else #else
return std::isnan(x); return std::isnan(x);
#endif // defined(__CUDA_ARCH__) #endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
} }
XGBOOST_DEVICE bool inline CheckNAN(double x) { XGBOOST_DEVICE bool inline CheckNAN(double x) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
return isnan(x); return isnan(x);
#else #else
return std::isnan(x); return std::isnan(x);
#endif // defined(__CUDA_ARCH__) #endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
} }
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) #endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
// GPU version is not uploaded in CRAN anyway. // GPU version is not uploaded in CRAN anyway.
// Specialize only when using R with CPU. // Specialize only when using R with CPU.
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) #if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
double LogGamma(double v); double LogGamma(double v);
#else // Not R or R with GPU. #else // Not R or R with GPU.
@ -196,7 +196,7 @@ XGBOOST_DEVICE inline T LogGamma(T v) {
#endif // _MSC_VER #endif // _MSC_VER
} }
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) #endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
} // namespace common } // namespace common
} // namespace xgboost } // namespace xgboost

View File

@ -112,7 +112,7 @@ void Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWe
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out); void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
#if !defined(XGBOOST_USE_CUDA) #if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights, inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
linalg::Tensor<float, 1>*) { linalg::Tensor<float, 1>*) {
common::AssertGPUSupport(); common::AssertGPUSupport();
@ -120,7 +120,7 @@ inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalW
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) { inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
common::AssertGPUSupport(); common::AssertGPUSupport();
} }
#endif // !defined(XGBOOST_USE_CUDA) #endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
} // namespace cuda_impl } // namespace cuda_impl
/** /**

View File

@ -17,9 +17,9 @@
#include "xgboost/host_device_vector.h" #include "xgboost/host_device_vector.h"
#include "xgboost/span.h" #include "xgboost/span.h"
#if defined (__CUDACC__) #if defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
#include "device_helpers.cuh" #include "device_helpers.cuh"
#endif // defined (__CUDACC__) #endif // defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
namespace xgboost { namespace xgboost {
namespace common { namespace common {
@ -28,7 +28,7 @@ constexpr size_t kBlockThreads = 256;
namespace detail { namespace detail {
#if defined(__CUDACC__) #if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
template <typename Functor, typename... SpanType> template <typename Functor, typename... SpanType>
__global__ void LaunchCUDAKernel(Functor _func, Range _range, __global__ void LaunchCUDAKernel(Functor _func, Range _range,
SpanType... _spans) { SpanType... _spans) {
@ -36,7 +36,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range,
_func(i, _spans...); _func(i, _spans...);
} }
} }
#endif // defined(__CUDACC__) #endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
} // namespace detail } // namespace detail
@ -127,7 +127,7 @@ class Transform {
UnpackShard(device, _vectors...); UnpackShard(device, _vectors...);
} }
#if defined(__CUDACC__) #if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
template <typename std::enable_if<CompiledWithCuda>::type* = nullptr, template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
typename... HDV> typename... HDV>
void LaunchCUDA(Functor _func, HDV*... _vectors) const { void LaunchCUDA(Functor _func, HDV*... _vectors) const {
@ -159,7 +159,7 @@ class Transform {
LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA(); LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
} }
#endif // defined(__CUDACC__) #endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
template <typename... HDV> template <typename... HDV>
void LaunchCPU(Functor func, HDV *...vectors) const { void LaunchCPU(Functor func, HDV *...vectors) const {