diff --git a/src/common/algorithm.hip.h b/src/common/algorithm.hip.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/common/bitfield.h b/src/common/bitfield.h index 6bb5f3404..0c726f70f 100644 --- a/src/common/bitfield.h +++ b/src/common/bitfield.h @@ -13,18 +13,18 @@ #include #include -#if defined(__CUDACC__) +#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) #include #include #include "device_helpers.cuh" -#endif // defined(__CUDACC__) +#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) #include "xgboost/span.h" #include "common.h" namespace xgboost { -#if defined(__CUDACC__) +#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) using BitFieldAtomicType = unsigned long long; // NOLINT __forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address, @@ -48,7 +48,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr return old; } -#endif // defined(__CUDACC__) +#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) /*! * \brief A non-owning type with auxiliary methods defined for manipulating bits. @@ -100,7 +100,7 @@ struct BitFieldContainer { XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) { return common::DivRoundUp(size, kValueSize); } -#if defined(__CUDA_ARCH__) +#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) __device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; size_t min_size = min(bits_.size(), rhs.bits_.size()); @@ -117,9 +117,9 @@ struct BitFieldContainer { } return *this; } -#endif // #if defined(__CUDA_ARCH__) +#endif // #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) -#if defined(__CUDA_ARCH__) +#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) __device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) { size_t min_size = min(bits_.size(), rhs.bits_.size()); auto tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -138,7 +138,7 @@ struct BitFieldContainer { } #endif // defined(__CUDA_ARCH__) -#if defined(__CUDA_ARCH__) +#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) __device__ auto Set(index_type pos) { Pos pos_v = Direction::Shift(ToBitPos(pos)); value_type& value = bits_[pos_v.int_pos]; @@ -166,7 +166,7 @@ struct BitFieldContainer { value_type clear_bit = ~(kOne << pos_v.bit_pos); value &= clear_bit; } -#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) XGBOOST_DEVICE bool Check(Pos pos_v) const { pos_v = Direction::Shift(pos_v); diff --git a/src/common/common.h b/src/common/common.h index 35c807bef..6ea342232 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -27,6 +27,12 @@ #define WITH_CUDA() true +#elif defined(__HIP_PLATFORM_AMD__) +#include +#include + +#define WITH_CUDA() true + #else #define WITH_CUDA() false @@ -34,7 +40,7 @@ #endif // defined(__CUDACC__) namespace dh { -#if defined(__CUDACC__) +#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) /* * Error handling functions */ @@ -49,7 +55,7 @@ inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, } return code; } -#endif // defined(__CUDACC__) +#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) } // namespace dh namespace xgboost { @@ -167,7 +173,7 @@ class Range { int AllVisibleGPUs(); inline void AssertGPUSupport() { -#ifndef XGBOOST_USE_CUDA +#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) LOG(FATAL) << "XGBoost version not compiled with GPU support."; #endif // XGBOOST_USE_CUDA } @@ -180,7 +186,7 @@ inline void AssertOneAPISupport() { void SetDevice(std::int32_t device); -#if !defined(XGBOOST_USE_CUDA) +#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) inline void SetDevice(std::int32_t device) { if (device >= 0) { AssertGPUSupport(); diff --git a/src/common/compressed_iterator.h b/src/common/compressed_iterator.h index 5a5b5f252..9e7b7b22a 100644 --- a/src/common/compressed_iterator.h +++ b/src/common/compressed_iterator.h @@ -11,9 +11,9 @@ #include "common.h" -#ifdef __CUDACC__ +#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) #include "device_helpers.cuh" -#endif // __CUDACC__ +#endif // __CUDACC__ || __HIP_PLATFORM_AMD__ namespace xgboost { namespace common { @@ -105,7 +105,7 @@ class CompressedBufferWriter { } } -#ifdef __CUDACC__ +#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) __device__ void AtomicWriteSymbol (CompressedByteT* buffer, uint64_t symbol, size_t offset) { size_t ibit_start = offset * symbol_bits_; @@ -119,7 +119,7 @@ class CompressedBufferWriter { symbol >>= 8; } } -#endif // __CUDACC__ +#endif // __CUDACC__ || __HIP_PLATFORM_AMD__ template void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) { diff --git a/src/common/cuda_context.hip.h b/src/common/cuda_context.hip.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index 58300d06c..3fb18f493 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -53,7 +53,7 @@ #endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 -#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__) +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__) || defined(__HIP_PLATFORM_AMD__) #else // In device code and CUDA < 600 __device__ __forceinline__ double atomicAdd(double* address, double val) { // NOLINT @@ -702,6 +702,8 @@ typename std::iterator_traits::value_type SumReduction(T in, int nVals) { constexpr std::pair CUDAVersion() { #if defined(__CUDACC_VER_MAJOR__) return std::make_pair(__CUDACC_VER_MAJOR__, __CUDACC_VER_MINOR__); +#elif defined(__HIP_PLATFORM_AMD__) + return std::make_pair(HIP_LIBRARY_MAJOR_VERSION, HIP_VERSION_MINOR); #else // clang/clang-tidy return std::make_pair((CUDA_VERSION) / 1000, (CUDA_VERSION) % 100 / 10); @@ -1329,6 +1331,9 @@ class CUDAStreamView { // CUDA > 11.0 dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault)); #endif // __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0: + +#elif defined(__HIP_PLATFORM_AMD__) + dh::safe_cuda(hipStreamWaitEvent(stream_, hipEvent_t{e}, hipEventWaitDefault)); #else // clang dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault)); #endif // defined(__CUDACC_VER_MAJOR__) diff --git a/src/common/math.h b/src/common/math.h index 71a494544..9c9ee604d 100644 --- a/src/common/math.h +++ b/src/common/math.h @@ -148,32 +148,32 @@ CheckNAN(T) { return false; } -#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) +#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIP_PLATFORM_AMD__) bool CheckNAN(double v); #else XGBOOST_DEVICE bool inline CheckNAN(float x) { -#if defined(__CUDA_ARCH__) +#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) return isnan(x); #else return std::isnan(x); -#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) } XGBOOST_DEVICE bool inline CheckNAN(double x) { -#if defined(__CUDA_ARCH__) +#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) return isnan(x); #else return std::isnan(x); -#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) } #endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) // GPU version is not uploaded in CRAN anyway. // Specialize only when using R with CPU. -#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) +#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) double LogGamma(double v); #else // Not R or R with GPU. @@ -196,7 +196,7 @@ XGBOOST_DEVICE inline T LogGamma(T v) { #endif // _MSC_VER } -#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) +#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) } // namespace common } // namespace xgboost diff --git a/src/common/stats.h b/src/common/stats.h index 2f42a698e..a72545896 100644 --- a/src/common/stats.h +++ b/src/common/stats.h @@ -112,7 +112,7 @@ void Median(Context const* ctx, linalg::TensorView t, OptionalWe void Mean(Context const* ctx, linalg::VectorView v, linalg::VectorView out); -#if !defined(XGBOOST_USE_CUDA) +#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) inline void Median(Context const*, linalg::TensorView, OptionalWeights, linalg::Tensor*) { common::AssertGPUSupport(); @@ -120,7 +120,7 @@ inline void Median(Context const*, linalg::TensorView, OptionalW inline void Mean(Context const*, linalg::VectorView, linalg::VectorView) { common::AssertGPUSupport(); } -#endif // !defined(XGBOOST_USE_CUDA) +#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) } // namespace cuda_impl /** diff --git a/src/common/threading_utils.hip.h b/src/common/threading_utils.hip.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/common/transform.h b/src/common/transform.h index a7b96766c..5f9c3f1bf 100644 --- a/src/common/transform.h +++ b/src/common/transform.h @@ -17,9 +17,9 @@ #include "xgboost/host_device_vector.h" #include "xgboost/span.h" -#if defined (__CUDACC__) +#if defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__) #include "device_helpers.cuh" -#endif // defined (__CUDACC__) +#endif // defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__) namespace xgboost { namespace common { @@ -28,7 +28,7 @@ constexpr size_t kBlockThreads = 256; namespace detail { -#if defined(__CUDACC__) +#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) template __global__ void LaunchCUDAKernel(Functor _func, Range _range, SpanType... _spans) { @@ -36,7 +36,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range, _func(i, _spans...); } } -#endif // defined(__CUDACC__) +#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) } // namespace detail @@ -127,7 +127,7 @@ class Transform { UnpackShard(device, _vectors...); } -#if defined(__CUDACC__) +#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) template ::type* = nullptr, typename... HDV> void LaunchCUDA(Functor _func, HDV*... _vectors) const { @@ -159,7 +159,7 @@ class Transform { LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA(); } -#endif // defined(__CUDACC__) +#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) template void LaunchCPU(Functor func, HDV *...vectors) const {