enable HIP flags
This commit is contained in:
parent
ed45aa2816
commit
75712b9c3c
@ -1 +1 @@
|
|||||||
Subproject commit 81db539486ce6525b31b971545edffee2754aced
|
Subproject commit dfd9365264a060a5096734b7d892e1858b6d2722
|
||||||
@ -57,19 +57,19 @@
|
|||||||
/*!
|
/*!
|
||||||
* \brief Tag function as usable by device
|
* \brief Tag function as usable by device
|
||||||
*/
|
*/
|
||||||
#if defined (__CUDA__) || defined(__NVCC__)
|
#if defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
#define XGBOOST_DEVICE __host__ __device__
|
#define XGBOOST_DEVICE __host__ __device__
|
||||||
#else
|
#else
|
||||||
#define XGBOOST_DEVICE
|
#define XGBOOST_DEVICE
|
||||||
#endif // defined (__CUDA__) || defined(__NVCC__)
|
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
#if defined(__CUDA__) || defined(__CUDACC__)
|
#if defined(__CUDA__) || defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
|
#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
|
||||||
#define XGBOOST_DEV_INLINE __device__ __forceinline__
|
#define XGBOOST_DEV_INLINE __device__ __forceinline__
|
||||||
#else
|
#else
|
||||||
#define XGBOOST_HOST_DEV_INLINE
|
#define XGBOOST_HOST_DEV_INLINE
|
||||||
#define XGBOOST_DEV_INLINE
|
#define XGBOOST_DEV_INLINE
|
||||||
#endif // defined(__CUDA__) || defined(__CUDACC__)
|
#endif // defined(__CUDA__) || defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
// These check are for Makefile.
|
// These check are for Makefile.
|
||||||
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
||||||
|
|||||||
@ -57,11 +57,11 @@
|
|||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
// Sets a function to call instead of cudaSetDevice();
|
// Sets a function to call instead of cudaSetDevice();
|
||||||
// only added for testing
|
// only added for testing
|
||||||
void SetCudaSetDeviceHandler(void (*handler)(int));
|
void SetCudaSetDeviceHandler(void (*handler)(int));
|
||||||
#endif // __CUDACC__
|
#endif // __CUDACC__ || __HIP_PLATFORM_AMD__
|
||||||
|
|
||||||
template <typename T> struct HostDeviceVectorImpl;
|
template <typename T> struct HostDeviceVectorImpl;
|
||||||
|
|
||||||
|
|||||||
@ -30,11 +30,11 @@
|
|||||||
|
|
||||||
// decouple it from xgboost.
|
// decouple it from xgboost.
|
||||||
#ifndef LINALG_HD
|
#ifndef LINALG_HD
|
||||||
#if defined(__CUDA__) || defined(__NVCC__)
|
#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
#define LINALG_HD __host__ __device__
|
#define LINALG_HD __host__ __device__
|
||||||
#else
|
#else
|
||||||
#define LINALG_HD
|
#define LINALG_HD
|
||||||
#endif // defined (__CUDA__) || defined(__NVCC__)
|
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
#endif // LINALG_HD
|
#endif // LINALG_HD
|
||||||
|
|
||||||
namespace xgboost::linalg {
|
namespace xgboost::linalg {
|
||||||
@ -118,9 +118,9 @@ using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
|
|||||||
|
|
||||||
template <int32_t n, typename Fn>
|
template <int32_t n, typename Fn>
|
||||||
LINALG_HD constexpr auto UnrollLoop(Fn fn) {
|
LINALG_HD constexpr auto UnrollLoop(Fn fn) {
|
||||||
#if defined __CUDA_ARCH__
|
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
#pragma unroll n
|
#pragma unroll n
|
||||||
#endif // defined __CUDA_ARCH__
|
#endif // defined __CUDA_ARCH__ || defined(__HIP_PLATFORM_AMD__)
|
||||||
for (int32_t i = 0; i < n; ++i) {
|
for (int32_t i = 0; i < n; ++i) {
|
||||||
fn(i);
|
fn(i);
|
||||||
}
|
}
|
||||||
@ -134,7 +134,7 @@ int32_t NativePopc(T v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline LINALG_HD int Popc(uint32_t v) {
|
inline LINALG_HD int Popc(uint32_t v) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
return __popc(v);
|
return __popc(v);
|
||||||
#elif defined(__GNUC__) || defined(__clang__)
|
#elif defined(__GNUC__) || defined(__clang__)
|
||||||
return __builtin_popcount(v);
|
return __builtin_popcount(v);
|
||||||
@ -146,7 +146,7 @@ inline LINALG_HD int Popc(uint32_t v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline LINALG_HD int Popc(uint64_t v) {
|
inline LINALG_HD int Popc(uint64_t v) {
|
||||||
#if defined(__CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||||
return __popcll(v);
|
return __popcll(v);
|
||||||
#elif defined(__GNUC__) || defined(__clang__)
|
#elif defined(__GNUC__) || defined(__clang__)
|
||||||
return __builtin_popcountll(v);
|
return __builtin_popcountll(v);
|
||||||
|
|||||||
@ -40,7 +40,9 @@
|
|||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#endif // defined(__CUDACC__)
|
#elif defined(__HIP_PLATFORM_AMD__)
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* The version number 1910 is picked up from GSL.
|
* The version number 1910 is picked up from GSL.
|
||||||
@ -103,7 +105,35 @@ namespace common {
|
|||||||
|
|
||||||
#define SPAN_CHECK KERNEL_CHECK
|
#define SPAN_CHECK KERNEL_CHECK
|
||||||
|
|
||||||
#else // ------------------------------ not CUDA ----------------------------
|
#elif defined(__HIP_PLATFORM_AMD__)
|
||||||
|
// Usual logging facility is not available inside device code.
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
|
||||||
|
// Windows HIP doesn't have __assert_fail.
|
||||||
|
#define HIP_KERNEL_CHECK(cond) \
|
||||||
|
do { \
|
||||||
|
if (XGBOOST_EXPECT(!(cond), false)) { \
|
||||||
|
__trap(); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#else // defined(_MSC_VER)
|
||||||
|
|
||||||
|
#define __ASSERT_STR_HELPER(x) #x
|
||||||
|
|
||||||
|
#define HIP_KERNEL_CHECK(cond) \
|
||||||
|
(XGBOOST_EXPECT((cond), true) \
|
||||||
|
? static_cast<void>(0) \
|
||||||
|
: __assert_fail(__ASSERT_STR_HELPER((cond)), __FILE__, __LINE__, __PRETTY_FUNCTION__))
|
||||||
|
|
||||||
|
#endif // defined(_MSC_VER)
|
||||||
|
|
||||||
|
#define KERNEL_CHECK HIP_KERNEL_CHECK
|
||||||
|
|
||||||
|
#define SPAN_CHECK KERNEL_CHECK
|
||||||
|
|
||||||
|
#else // ------------------------------ not CUDA or HIP ----------------------------
|
||||||
|
|
||||||
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
|
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
|
||||||
|
|
||||||
@ -119,7 +149,7 @@ namespace common {
|
|||||||
|
|
||||||
#endif // defined(XGBOOST_STRICT_R_MODE)
|
#endif // defined(XGBOOST_STRICT_R_MODE)
|
||||||
|
|
||||||
#endif // __CUDA_ARCH__
|
#endif // __CUDA_ARCH__ || __HIP_PLATFORM_AMD__
|
||||||
|
|
||||||
#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))
|
#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))
|
||||||
|
|
||||||
@ -316,7 +346,7 @@ struct IsSpanOracle<Span<T, Extent>> : std::true_type {};
|
|||||||
template <class T>
|
template <class T>
|
||||||
struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {};
|
struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {};
|
||||||
|
|
||||||
// Re-implement std algorithms here to adopt CUDA.
|
// Re-implement std algorithms here to adopt CUDA/HIP
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct Less {
|
struct Less {
|
||||||
XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {
|
XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user