sync Jun 1

This commit is contained in:
Your Name
2023-06-01 15:55:06 -07:00
76 changed files with 1424 additions and 595 deletions

View File

@@ -26,6 +26,12 @@
#include "xgboost/logging.h"
#include "xgboost/span.h"
#if defined(XGBOOST_USE_CUDA)
#include "cuda_fp16.h"
#elif defined(__HIP_PLATFORM_AMD__)
#include <hip/hip_fp16.h>
#endif
namespace xgboost {
// Common errors in parsing columnar format.
struct ArrayInterfaceErrors {
@@ -304,12 +310,12 @@ class ArrayInterfaceHandler {
template <typename T, typename E = void>
struct ToDType;
// float
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
template <>
struct ToDType<__half> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
};
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
template <>
struct ToDType<float> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4;
@@ -459,11 +465,11 @@ class ArrayInterface {
CHECK(sizeof(long double) == 16) << error::NoF128();
type = T::kF16;
} else if (typestr[1] == 'f' && typestr[2] == '2') {
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
type = T::kF2;
#else
LOG(FATAL) << "Half type is not supported.";
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
} else if (typestr[1] == 'f' && typestr[2] == '4') {
type = T::kF4;
} else if (typestr[1] == 'f' && typestr[2] == '8') {
@@ -490,20 +496,17 @@ class ArrayInterface {
}
}
XGBOOST_DEVICE size_t Shape(size_t i) const { return shape[i]; }
XGBOOST_DEVICE size_t Stride(size_t i) const { return strides[i]; }
[[nodiscard]] XGBOOST_DEVICE std::size_t Shape(size_t i) const { return shape[i]; }
[[nodiscard]] XGBOOST_DEVICE std::size_t Stride(size_t i) const { return strides[i]; }
template <typename Fn>
XGBOOST_HOST_DEV_INLINE decltype(auto) DispatchCall(Fn func) const {
using T = ArrayInterfaceHandler::Type;
switch (type) {
case T::kF2: {
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
return func(reinterpret_cast<__half const *>(data));
#else
SPAN_CHECK(false);
return func(reinterpret_cast<float const *>(data));
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#endif // defined(XGBOOST_USE_CUDA) || || defined(__HIP_PLATFORM_AMD__)
}
case T::kF4:
return func(reinterpret_cast<float const *>(data));
@@ -540,23 +543,23 @@ class ArrayInterface {
return func(reinterpret_cast<uint64_t const *>(data));
}
XGBOOST_DEVICE std::size_t ElementSize() const {
[[nodiscard]] XGBOOST_DEVICE std::size_t ElementSize() const {
return this->DispatchCall([](auto *typed_data_ptr) {
return sizeof(std::remove_pointer_t<decltype(typed_data_ptr)>);
});
}
XGBOOST_DEVICE std::size_t ElementAlignment() const {
[[nodiscard]] XGBOOST_DEVICE std::size_t ElementAlignment() const {
return this->DispatchCall([](auto *typed_data_ptr) {
return std::alignment_of<std::remove_pointer_t<decltype(typed_data_ptr)>>::value;
});
}
template <typename T = float, typename... Index>
XGBOOST_DEVICE T operator()(Index &&...index) const {
XGBOOST_HOST_DEV_INLINE T operator()(Index &&...index) const {
static_assert(sizeof...(index) <= D, "Invalid index.");
return this->DispatchCall([=](auto const *p_values) -> T {
std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...);
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
// No operator defined for half -> size_t
using Type = std::conditional_t<
std::is_same<__half,
@@ -566,7 +569,7 @@ class ArrayInterface {
return static_cast<T>(static_cast<Type>(p_values[offset]));
#else
return static_cast<T>(p_values[offset]);
#endif
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
});
}
@@ -603,7 +606,7 @@ void DispatchDType(ArrayInterface<D> const array, std::int32_t device, Fn fn) {
};
switch (array.type) {
case ArrayInterfaceHandler::kF2: {
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
dispatch(__half{});
#endif
break;