add HIP flags, common
This commit is contained in:
parent
1e1c7fd8d5
commit
840f15209c
@ -13,18 +13,18 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
#include <thrust/copy.h>
|
#include <thrust/copy.h>
|
||||||
#include <thrust/device_ptr.h>
|
#include <thrust/device_ptr.h>
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
#include "xgboost/span.h"
|
#include "xgboost/span.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
using BitFieldAtomicType = unsigned long long; // NOLINT
|
using BitFieldAtomicType = unsigned long long; // NOLINT
|
||||||
|
|
||||||
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
|
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
|
||||||
@ -48,7 +48,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
|
|||||||
|
|
||||||
return old;
|
return old;
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief A non-owning type with auxiliary methods defined for manipulating bits.
|
* \brief A non-owning type with auxiliary methods defined for manipulating bits.
|
||||||
@ -100,7 +100,7 @@ struct BitFieldContainer {
|
|||||||
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
|
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
|
||||||
return common::DivRoundUp(size, kValueSize);
|
return common::DivRoundUp(size, kValueSize);
|
||||||
}
|
}
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
|
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
|
||||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
size_t min_size = min(bits_.size(), rhs.bits_.size());
|
size_t min_size = min(bits_.size(), rhs.bits_.size());
|
||||||
@ -117,9 +117,9 @@ struct BitFieldContainer {
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
#endif // #if defined(__CUDA_ARCH__)
|
#endif // #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
|
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
|
||||||
size_t min_size = min(bits_.size(), rhs.bits_.size());
|
size_t min_size = min(bits_.size(), rhs.bits_.size());
|
||||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -138,7 +138,7 @@ struct BitFieldContainer {
|
|||||||
}
|
}
|
||||||
#endif // defined(__CUDA_ARCH__)
|
#endif // defined(__CUDA_ARCH__)
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
__device__ auto Set(index_type pos) {
|
__device__ auto Set(index_type pos) {
|
||||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||||
value_type& value = bits_[pos_v.int_pos];
|
value_type& value = bits_[pos_v.int_pos];
|
||||||
@ -166,7 +166,7 @@ struct BitFieldContainer {
|
|||||||
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||||
value &= clear_bit;
|
value &= clear_bit;
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDA_ARCH__)
|
#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
XGBOOST_DEVICE bool Check(Pos pos_v) const {
|
XGBOOST_DEVICE bool Check(Pos pos_v) const {
|
||||||
pos_v = Direction::Shift(pos_v);
|
pos_v = Direction::Shift(pos_v);
|
||||||
|
|||||||
@ -27,6 +27,12 @@
|
|||||||
|
|
||||||
#define WITH_CUDA() true
|
#define WITH_CUDA() true
|
||||||
|
|
||||||
|
#elif defined(__HIP_PLATFORM_AMD__)
|
||||||
|
#include <thrust/system/hip/error.h>
|
||||||
|
#include <thrust/system_error.h>
|
||||||
|
|
||||||
|
#define WITH_CUDA() true
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define WITH_CUDA() false
|
#define WITH_CUDA() false
|
||||||
@ -34,7 +40,7 @@
|
|||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__)
|
||||||
|
|
||||||
namespace dh {
|
namespace dh {
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
/*
|
/*
|
||||||
* Error handling functions
|
* Error handling functions
|
||||||
*/
|
*/
|
||||||
@ -49,7 +55,7 @@ inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
|
|||||||
}
|
}
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
} // namespace dh
|
} // namespace dh
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
@ -167,7 +173,7 @@ class Range {
|
|||||||
int AllVisibleGPUs();
|
int AllVisibleGPUs();
|
||||||
|
|
||||||
inline void AssertGPUSupport() {
|
inline void AssertGPUSupport() {
|
||||||
#ifndef XGBOOST_USE_CUDA
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
||||||
#endif // XGBOOST_USE_CUDA
|
#endif // XGBOOST_USE_CUDA
|
||||||
}
|
}
|
||||||
@ -180,7 +186,7 @@ inline void AssertOneAPISupport() {
|
|||||||
|
|
||||||
void SetDevice(std::int32_t device);
|
void SetDevice(std::int32_t device);
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
inline void SetDevice(std::int32_t device) {
|
inline void SetDevice(std::int32_t device) {
|
||||||
if (device >= 0) {
|
if (device >= 0) {
|
||||||
AssertGPUSupport();
|
AssertGPUSupport();
|
||||||
|
|||||||
@ -11,9 +11,9 @@
|
|||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
#endif // __CUDACC__
|
#endif // __CUDACC__ || __HIP_PLATFORM_AMD__
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
@ -105,7 +105,7 @@ class CompressedBufferWriter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
__device__ void AtomicWriteSymbol
|
__device__ void AtomicWriteSymbol
|
||||||
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
|
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
|
||||||
size_t ibit_start = offset * symbol_bits_;
|
size_t ibit_start = offset * symbol_bits_;
|
||||||
@ -119,7 +119,7 @@ class CompressedBufferWriter {
|
|||||||
symbol >>= 8;
|
symbol >>= 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // __CUDACC__
|
#endif // __CUDACC__ || __HIP_PLATFORM_AMD__
|
||||||
|
|
||||||
template <typename IterT>
|
template <typename IterT>
|
||||||
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
|
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
|
||||||
|
|||||||
@ -53,7 +53,7 @@
|
|||||||
|
|
||||||
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
|
|
||||||
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
|
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
#else // In device code and CUDA < 600
|
#else // In device code and CUDA < 600
|
||||||
__device__ __forceinline__ double atomicAdd(double* address, double val) { // NOLINT
|
__device__ __forceinline__ double atomicAdd(double* address, double val) { // NOLINT
|
||||||
@ -702,6 +702,8 @@ typename std::iterator_traits<T>::value_type SumReduction(T in, int nVals) {
|
|||||||
constexpr std::pair<int, int> CUDAVersion() {
|
constexpr std::pair<int, int> CUDAVersion() {
|
||||||
#if defined(__CUDACC_VER_MAJOR__)
|
#if defined(__CUDACC_VER_MAJOR__)
|
||||||
return std::make_pair(__CUDACC_VER_MAJOR__, __CUDACC_VER_MINOR__);
|
return std::make_pair(__CUDACC_VER_MAJOR__, __CUDACC_VER_MINOR__);
|
||||||
|
#elif defined(__HIP_PLATFORM_AMD__)
|
||||||
|
return std::make_pair(HIP_LIBRARY_MAJOR_VERSION, HIP_VERSION_MINOR);
|
||||||
#else
|
#else
|
||||||
// clang/clang-tidy
|
// clang/clang-tidy
|
||||||
return std::make_pair((CUDA_VERSION) / 1000, (CUDA_VERSION) % 100 / 10);
|
return std::make_pair((CUDA_VERSION) / 1000, (CUDA_VERSION) % 100 / 10);
|
||||||
@ -1329,6 +1331,9 @@ class CUDAStreamView {
|
|||||||
// CUDA > 11.0
|
// CUDA > 11.0
|
||||||
dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
|
dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
|
||||||
#endif // __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0:
|
#endif // __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0:
|
||||||
|
|
||||||
|
#elif defined(__HIP_PLATFORM_AMD__)
|
||||||
|
dh::safe_cuda(hipStreamWaitEvent(stream_, hipEvent_t{e}, hipEventWaitDefault));
|
||||||
#else // clang
|
#else // clang
|
||||||
dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
|
dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
|
||||||
#endif // defined(__CUDACC_VER_MAJOR__)
|
#endif // defined(__CUDACC_VER_MAJOR__)
|
||||||
|
|||||||
@ -148,32 +148,32 @@ CheckNAN(T) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
|
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
bool CheckNAN(double v);
|
bool CheckNAN(double v);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
XGBOOST_DEVICE bool inline CheckNAN(float x) {
|
XGBOOST_DEVICE bool inline CheckNAN(float x) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
return isnan(x);
|
return isnan(x);
|
||||||
#else
|
#else
|
||||||
return std::isnan(x);
|
return std::isnan(x);
|
||||||
#endif // defined(__CUDA_ARCH__)
|
#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
}
|
}
|
||||||
|
|
||||||
XGBOOST_DEVICE bool inline CheckNAN(double x) {
|
XGBOOST_DEVICE bool inline CheckNAN(double x) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
return isnan(x);
|
return isnan(x);
|
||||||
#else
|
#else
|
||||||
return std::isnan(x);
|
return std::isnan(x);
|
||||||
#endif // defined(__CUDA_ARCH__)
|
#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
|
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
|
||||||
// GPU version is not uploaded in CRAN anyway.
|
// GPU version is not uploaded in CRAN anyway.
|
||||||
// Specialize only when using R with CPU.
|
// Specialize only when using R with CPU.
|
||||||
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
|
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
double LogGamma(double v);
|
double LogGamma(double v);
|
||||||
|
|
||||||
#else // Not R or R with GPU.
|
#else // Not R or R with GPU.
|
||||||
@ -196,7 +196,7 @@ XGBOOST_DEVICE inline T LogGamma(T v) {
|
|||||||
#endif // _MSC_VER
|
#endif // _MSC_VER
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
|
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
|
|
||||||
} // namespace common
|
} // namespace common
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -112,7 +112,7 @@ void Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWe
|
|||||||
|
|
||||||
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
|
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
|
inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
|
||||||
linalg::Tensor<float, 1>*) {
|
linalg::Tensor<float, 1>*) {
|
||||||
common::AssertGPUSupport();
|
common::AssertGPUSupport();
|
||||||
@ -120,7 +120,7 @@ inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalW
|
|||||||
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
|
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
|
||||||
common::AssertGPUSupport();
|
common::AssertGPUSupport();
|
||||||
}
|
}
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||||
} // namespace cuda_impl
|
} // namespace cuda_impl
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -17,9 +17,9 @@
|
|||||||
#include "xgboost/host_device_vector.h"
|
#include "xgboost/host_device_vector.h"
|
||||||
#include "xgboost/span.h"
|
#include "xgboost/span.h"
|
||||||
|
|
||||||
#if defined (__CUDACC__)
|
#if defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
#endif // defined (__CUDACC__)
|
#endif // defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
@ -28,7 +28,7 @@ constexpr size_t kBlockThreads = 256;
|
|||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
template <typename Functor, typename... SpanType>
|
template <typename Functor, typename... SpanType>
|
||||||
__global__ void LaunchCUDAKernel(Functor _func, Range _range,
|
__global__ void LaunchCUDAKernel(Functor _func, Range _range,
|
||||||
SpanType... _spans) {
|
SpanType... _spans) {
|
||||||
@ -36,7 +36,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range,
|
|||||||
_func(i, _spans...);
|
_func(i, _spans...);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
@ -127,7 +127,7 @@ class Transform {
|
|||||||
UnpackShard(device, _vectors...);
|
UnpackShard(device, _vectors...);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
|
template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
|
||||||
typename... HDV>
|
typename... HDV>
|
||||||
void LaunchCUDA(Functor _func, HDV*... _vectors) const {
|
void LaunchCUDA(Functor _func, HDV*... _vectors) const {
|
||||||
@ -159,7 +159,7 @@ class Transform {
|
|||||||
|
|
||||||
LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
|
LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
|
||||||
}
|
}
|
||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
template <typename... HDV>
|
template <typename... HDV>
|
||||||
void LaunchCPU(Functor func, HDV *...vectors) const {
|
void LaunchCPU(Functor func, HDV *...vectors) const {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user