diff --git a/dmlc-core b/dmlc-core index 81db53948..dfd936526 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit 81db539486ce6525b31b971545edffee2754aced +Subproject commit dfd9365264a060a5096734b7d892e1858b6d2722 diff --git a/include/xgboost/base.h b/include/xgboost/base.h index d12e71a3a..731cb10e9 100644 --- a/include/xgboost/base.h +++ b/include/xgboost/base.h @@ -57,19 +57,19 @@ /*! * \brief Tag function as usable by device */ -#if defined (__CUDA__) || defined(__NVCC__) +#if defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__) #define XGBOOST_DEVICE __host__ __device__ #else #define XGBOOST_DEVICE -#endif // defined (__CUDA__) || defined(__NVCC__) +#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__) -#if defined(__CUDA__) || defined(__CUDACC__) +#if defined(__CUDA__) || defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) #define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__ #define XGBOOST_DEV_INLINE __device__ __forceinline__ #else #define XGBOOST_HOST_DEV_INLINE #define XGBOOST_DEV_INLINE -#endif // defined(__CUDA__) || defined(__CUDACC__) +#endif // defined(__CUDA__) || defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) // These check are for Makefile. #if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT) diff --git a/include/xgboost/host_device_vector.h b/include/xgboost/host_device_vector.h index b9fb15104..53726b1bd 100644 --- a/include/xgboost/host_device_vector.h +++ b/include/xgboost/host_device_vector.h @@ -57,11 +57,11 @@ namespace xgboost { -#ifdef __CUDACC__ +#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__) // Sets a function to call instead of cudaSetDevice(); // only added for testing void SetCudaSetDeviceHandler(void (*handler)(int)); -#endif // __CUDACC__ +#endif // __CUDACC__ || __HIP_PLATFORM_AMD__ template struct HostDeviceVectorImpl; diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index 3d6bcc962..18314b89f 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -30,11 +30,11 @@ // decouple it from xgboost. #ifndef LINALG_HD -#if defined(__CUDA__) || defined(__NVCC__) +#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__) #define LINALG_HD __host__ __device__ #else #define LINALG_HD -#endif // defined (__CUDA__) || defined(__NVCC__) +#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__) #endif // LINALG_HD namespace xgboost::linalg { @@ -118,9 +118,9 @@ using IndexToTag = std::conditional_t>::value, template LINALG_HD constexpr auto UnrollLoop(Fn fn) { -#if defined __CUDA_ARCH__ +#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) #pragma unroll n -#endif // defined __CUDA_ARCH__ +#endif // defined __CUDA_ARCH__ || defined(__HIP_PLATFORM_AMD__) for (int32_t i = 0; i < n; ++i) { fn(i); } @@ -134,7 +134,7 @@ int32_t NativePopc(T v) { } inline LINALG_HD int Popc(uint32_t v) { -#if defined(__CUDA_ARCH__) +#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) return __popc(v); #elif defined(__GNUC__) || defined(__clang__) return __builtin_popcount(v); @@ -146,7 +146,7 @@ inline LINALG_HD int Popc(uint32_t v) { } inline LINALG_HD int Popc(uint64_t v) { -#if defined(__CUDA_ARCH__) +#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) return __popcll(v); #elif defined(__GNUC__) || defined(__clang__) return __builtin_popcountll(v); diff --git a/include/xgboost/span.h b/include/xgboost/span.h index 0b543b537..ee11b1d4e 100644 --- a/include/xgboost/span.h +++ b/include/xgboost/span.h @@ -40,7 +40,9 @@ #if defined(__CUDACC__) #include -#endif // defined(__CUDACC__) +#elif defined(__HIP_PLATFORM_AMD__) +#include +#endif /*! * The version number 1910 is picked up from GSL. @@ -103,7 +105,35 @@ namespace common { #define SPAN_CHECK KERNEL_CHECK -#else // ------------------------------ not CUDA ---------------------------- +#elif defined(__HIP_PLATFORM_AMD__) +// Usual logging facility is not available inside device code. + +#if defined(_MSC_VER) + +// Windows HIP doesn't have __assert_fail. +#define HIP_KERNEL_CHECK(cond) \ + do { \ + if (XGBOOST_EXPECT(!(cond), false)) { \ + __trap(); \ + } \ + } while (0) + +#else // defined(_MSC_VER) + +#define __ASSERT_STR_HELPER(x) #x + +#define HIP_KERNEL_CHECK(cond) \ + (XGBOOST_EXPECT((cond), true) \ + ? static_cast(0) \ + : __assert_fail(__ASSERT_STR_HELPER((cond)), __FILE__, __LINE__, __PRETTY_FUNCTION__)) + +#endif // defined(_MSC_VER) + +#define KERNEL_CHECK HIP_KERNEL_CHECK + +#define SPAN_CHECK KERNEL_CHECK + +#else // ------------------------------ not CUDA or HIP ---------------------------- #if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1 @@ -119,7 +149,7 @@ namespace common { #endif // defined(XGBOOST_STRICT_R_MODE) -#endif // __CUDA_ARCH__ +#endif // __CUDA_ARCH__ || __HIP_PLATFORM_AMD__ #define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs)) @@ -316,7 +346,7 @@ struct IsSpanOracle> : std::true_type {}; template struct IsSpan : public IsSpanOracle::type> {}; -// Re-implement std algorithms here to adopt CUDA. +// Re-implement std algorithms here to adopt CUDA/HIP template struct Less { XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {