initial merge, fix linalg.h

This commit is contained in:
amdsc21 2023-03-25 04:37:43 +01:00
parent 7fbc561e17
commit e1d050f64e

View File

@ -30,11 +30,11 @@
// decouple it from xgboost. // decouple it from xgboost.
#ifndef LINALG_HD #ifndef LINALG_HD
#if defined(__CUDA__) || defined(__NVCC__) #if defined(__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
#define LINALG_HD __host__ __device__ #define LINALG_HD __host__ __device__
#else #else
#define LINALG_HD #define LINALG_HD
#endif // defined (__CUDA__) || defined(__NVCC__) #endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
#endif // LINALG_HD #endif // LINALG_HD
namespace xgboost::linalg { namespace xgboost::linalg {
@ -118,7 +118,7 @@ using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
template <int32_t n, typename Fn> template <int32_t n, typename Fn>
LINALG_HD constexpr auto UnrollLoop(Fn fn) { LINALG_HD constexpr auto UnrollLoop(Fn fn) {
#if defined __CUDA_ARCH__ #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
#pragma unroll n #pragma unroll n
#endif // defined __CUDA_ARCH__ #endif // defined __CUDA_ARCH__
for (int32_t i = 0; i < n; ++i) { for (int32_t i = 0; i < n; ++i) {
@ -136,7 +136,7 @@ int32_t NativePopc(T v) {
inline LINALG_HD int Popc(uint32_t v) { inline LINALG_HD int Popc(uint32_t v) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
return __popc(v); return __popc(v);
#elif defined(__GNUC__) || defined(__clang__) #elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
return __builtin_popcount(v); return __builtin_popcount(v);
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
return __popcnt(v); return __popcnt(v);
@ -148,7 +148,7 @@ inline LINALG_HD int Popc(uint32_t v) {
inline LINALG_HD int Popc(uint64_t v) { inline LINALG_HD int Popc(uint64_t v) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
return __popcll(v); return __popcll(v);
#elif defined(__GNUC__) || defined(__clang__) #elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
return __builtin_popcountll(v); return __builtin_popcountll(v);
#elif defined(_MSC_VER) && _defined(_M_X64) #elif defined(_MSC_VER) && _defined(_M_X64)
return __popcnt64(v); return __popcnt64(v);