use __HIPCC__ for device code
This commit is contained in:
@@ -10,10 +10,6 @@
|
||||
#elif defined(XGBOOST_USE_RCCL)
|
||||
#include "../common/cuda_to_hip.h"
|
||||
|
||||
#ifndef __HIP_PLATFORM_AMD__
|
||||
#define __HIP_PLATFORM_AMD__
|
||||
#endif
|
||||
|
||||
#ifndef THRUST_DEVICE_SYSTEM
|
||||
#define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP
|
||||
#endif
|
||||
|
||||
@@ -16,18 +16,18 @@
|
||||
#include <thrust/device_ptr.h>
|
||||
|
||||
#include "device_helpers.cuh"
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#elif defined(__HIPCC__)
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/device_ptr.h>
|
||||
#include "device_helpers.hip.h"
|
||||
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||
|
||||
#include "common.h"
|
||||
#include "xgboost/span.h" // for Span
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||
using BitFieldAtomicType = unsigned long long; // NOLINT
|
||||
|
||||
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
|
||||
@@ -51,7 +51,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
|
||||
|
||||
return old;
|
||||
}
|
||||
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||
|
||||
/**
|
||||
* @brief A non-owning type with auxiliary methods defined for manipulating bits.
|
||||
@@ -109,7 +109,7 @@ struct BitFieldContainer {
|
||||
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
|
||||
return common::DivRoundUp(size, kValueSize);
|
||||
}
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
|
||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t min_size = min(NumValues(), rhs.NumValues());
|
||||
@@ -126,9 +126,9 @@ struct BitFieldContainer {
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif // #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // #if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
|
||||
size_t min_size = min(NumValues(), rhs.NumValues());
|
||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@@ -147,7 +147,7 @@ struct BitFieldContainer {
|
||||
}
|
||||
#endif // defined(__CUDA_ARCH__)
|
||||
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||
__device__ auto Set(index_type pos) noexcept(true) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
value_type& value = Data()[pos_v.int_pos];
|
||||
@@ -164,7 +164,7 @@ struct BitFieldContainer {
|
||||
}
|
||||
|
||||
/* compiler hack */
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__HIPCC__)
|
||||
void Clear(index_type pos) noexcept(true) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
value_type& value = Data()[pos_v.int_pos];
|
||||
@@ -185,7 +185,7 @@ struct BitFieldContainer {
|
||||
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||
value &= clear_bit;
|
||||
}
|
||||
#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||
|
||||
XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) {
|
||||
pos_v = Direction::Shift(pos_v);
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
#define WITH_CUDA() true
|
||||
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#elif defined(__HIPCC__)
|
||||
#include "cuda_to_hip.h"
|
||||
#include <thrust/system/hip/error.h>
|
||||
#include <thrust/system_error.h>
|
||||
@@ -39,7 +39,7 @@
|
||||
#endif // defined(__CUDACC__)
|
||||
|
||||
namespace dh {
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||
/*
|
||||
* Error handling functions
|
||||
*/
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#include "device_helpers.cuh"
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#elif defined(__HIPCC__)
|
||||
#include "device_helpers.hip.h"
|
||||
#endif // __CUDACC__ || __HIP_PLATFORM_AMD__
|
||||
#endif // __CUDACC__ || __HIPCC__
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -107,7 +107,7 @@ class CompressedBufferWriter {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||
__device__ void AtomicWriteSymbol
|
||||
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
|
||||
size_t ibit_start = offset * symbol_bits_;
|
||||
@@ -121,7 +121,7 @@ class CompressedBufferWriter {
|
||||
symbol >>= 8;
|
||||
}
|
||||
}
|
||||
#endif // __CUDACC__ || __HIP_PLATFORM_AMD__
|
||||
#endif // __CUDACC__ || __HIPCC__
|
||||
|
||||
template <typename IterT>
|
||||
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
|
||||
|
||||
@@ -143,7 +143,7 @@ CheckNAN(T) {
|
||||
return false;
|
||||
}
|
||||
|
||||
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIP_PLATFORM_AMD__)
|
||||
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIPCC__)
|
||||
|
||||
bool CheckNAN(double v);
|
||||
|
||||
@@ -152,21 +152,21 @@ bool CheckNAN(double v);
|
||||
XGBOOST_DEVICE bool inline CheckNAN(float x) {
|
||||
#if defined(__CUDA_ARCH__)
|
||||
return isnan(x);
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#elif defined(__HIPCC__)
|
||||
return __builtin_isnan(x);
|
||||
#else
|
||||
return std::isnan(x);
|
||||
#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bool inline CheckNAN(double x) {
|
||||
#if defined(__CUDA_ARCH__)
|
||||
return isnan(x);
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#elif defined(__HIPCC__)
|
||||
return __builtin_isnan(x);
|
||||
#else
|
||||
return std::isnan(x);
|
||||
#endif // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||
}
|
||||
|
||||
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
|
||||
|
||||
@@ -25,12 +25,12 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::common::ProbabilityDistributionType);
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
|
||||
#if !defined(__CUDACC__) && !defined(__HIPCC__)
|
||||
|
||||
using std::log;
|
||||
using std::fmax;
|
||||
|
||||
#endif // __CUDACC__ && __HIP_PLATFORM_AMD__
|
||||
#endif // __CUDACC__ && __HIPCC__
|
||||
|
||||
enum class CensoringType : uint8_t {
|
||||
kUncensored, kRightCensored, kLeftCensored, kIntervalCensored
|
||||
|
||||
@@ -19,9 +19,9 @@
|
||||
|
||||
#if defined (__CUDACC__)
|
||||
#include "device_helpers.cuh"
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#elif defined(__HIPCC__)
|
||||
#include "device_helpers.hip.h"
|
||||
#endif // defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined (__CUDACC__) || defined(__HIPCC__)
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -30,7 +30,7 @@ constexpr size_t kBlockThreads = 256;
|
||||
|
||||
namespace detail {
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||
template <typename Functor, typename... SpanType>
|
||||
__global__ void LaunchCUDAKernel(Functor _func, Range _range,
|
||||
SpanType... _spans) {
|
||||
@@ -38,7 +38,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range,
|
||||
_func(i, _spans...);
|
||||
}
|
||||
}
|
||||
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@@ -129,7 +129,7 @@ class Transform {
|
||||
UnpackShard(device, _vectors...);
|
||||
}
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||
template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
|
||||
typename... HDV>
|
||||
void LaunchCUDA(Functor _func, HDV*... _vectors) const {
|
||||
@@ -161,7 +161,7 @@ class Transform {
|
||||
|
||||
LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
|
||||
}
|
||||
#endif // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(__CUDACC__) || defined(__HIPCC__)
|
||||
|
||||
template <typename... HDV>
|
||||
void LaunchCPU(Functor func, HDV *...vectors) const {
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "cuda_fp16.h"
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include <hip/hip_fp16.h>
|
||||
#endif
|
||||
|
||||
@@ -323,7 +323,7 @@ class ArrayInterfaceHandler {
|
||||
template <typename T, typename E = void>
|
||||
struct ToDType;
|
||||
// float
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
template <>
|
||||
struct ToDType<__half> {
|
||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
|
||||
@@ -473,7 +473,7 @@ class ArrayInterface {
|
||||
CHECK(sizeof(long double) == 16) << error::NoF128();
|
||||
type = T::kF16;
|
||||
} else if (typestr[1] == 'f' && typestr[2] == '2') {
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
type = T::kF2;
|
||||
#else
|
||||
LOG(FATAL) << "Half type is not supported.";
|
||||
@@ -512,7 +512,7 @@ class ArrayInterface {
|
||||
using T = ArrayInterfaceHandler::Type;
|
||||
switch (type) {
|
||||
case T::kF2: {
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
return func(reinterpret_cast<__half const *>(data));
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
@@ -520,7 +520,7 @@ class ArrayInterface {
|
||||
return func(reinterpret_cast<float const *>(data));
|
||||
case T::kF8:
|
||||
return func(reinterpret_cast<double const *>(data));
|
||||
#if defined(__CUDA_ARCH__ ) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDA_ARCH__ ) || defined(__HIPCC__)
|
||||
case T::kF16: {
|
||||
// CUDA device code doesn't support long double.
|
||||
SPAN_CHECK(false);
|
||||
@@ -567,7 +567,7 @@ class ArrayInterface {
|
||||
static_assert(sizeof...(index) <= D, "Invalid index.");
|
||||
return this->DispatchCall([=](auto const *p_values) -> T {
|
||||
std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...);
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
// No operator defined for half -> size_t
|
||||
using Type = std::conditional_t<
|
||||
std::is_same<__half,
|
||||
@@ -601,7 +601,7 @@ template <typename Fn>
|
||||
auto DispatchDType(ArrayInterfaceHandler::Type dtype, Fn dispatch) {
|
||||
switch (dtype) {
|
||||
case ArrayInterfaceHandler::kF2: {
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
return dispatch(__half{});
|
||||
#else
|
||||
LOG(FATAL) << "half type is only supported for CUDA input.";
|
||||
|
||||
@@ -281,7 +281,7 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType cons
|
||||
cub::NullType(), batch.Size(), nullptr, false);
|
||||
#endif
|
||||
|
||||
#elif defined (__HIP_PLATFORM_AMD__)
|
||||
#elif defined (__HIPCC__)
|
||||
|
||||
rocprim::inclusive_scan(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp<Tuple>());
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ namespace xgboost {
|
||||
namespace data {
|
||||
struct LabelsCheck {
|
||||
XGBOOST_DEVICE bool operator()(float y) {
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||||
return ::isnan(y) || ::isinf(y);
|
||||
#else
|
||||
return std::isnan(y) || std::isinf(y);
|
||||
|
||||
@@ -124,7 +124,7 @@ class TreeEvaluator {
|
||||
[[nodiscard]] XGBOOST_DEVICE float Divide(float a, float b) const {
|
||||
#ifdef __CUDA_ARCH__
|
||||
return __fdividef(a, b);
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#elif defined(__HIPCC__)
|
||||
return a / b;
|
||||
#else
|
||||
return a / b;
|
||||
|
||||
Reference in New Issue
Block a user