diff --git a/src/common/cuda_to_hip.h b/src/common/cuda_to_hip.h index c6f30e303..4f6f6bc0c 100644 --- a/src/common/cuda_to_hip.h +++ b/src/common/cuda_to_hip.h @@ -56,7 +56,7 @@ #define cudaPointerGetAttributes hipPointerGetAttributes /* hipMemoryTypeUnregistered not supported */ -#define cudaMemoryTypeUnregistered hipMemoryTypeUnregistered +#define cudaMemoryTypeUnregistered hipMemoryTypeUnified #define cudaMemoryTypeUnified hipMemoryTypeUnified #define cudaMemoryTypeHost hipMemoryTypeHost diff --git a/src/common/device_helpers.hip.h b/src/common/device_helpers.hip.h index 05cc09c72..222ea25d2 100644 --- a/src/common/device_helpers.hip.h +++ b/src/common/device_helpers.hip.h @@ -19,10 +19,6 @@ #include #include -#include -#include -#include - #include #include #include // for size_t @@ -398,14 +394,13 @@ inline void ThrowOOMError(std::string const& err, size_t bytes) { template struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator { using SuperT = XGBBaseDeviceAllocator; - using pointer = thrust::device_ptr; - + using pointer = thrust::device_ptr; // NOLINT template - struct rebind { - using other = XGBDefaultDeviceAllocatorImpl; + struct rebind // NOLINT + { + using other = XGBDefaultDeviceAllocatorImpl; // NOLINT }; - - pointer allocate(size_t n) { + pointer allocate(size_t n) { // NOLINT pointer ptr; try { ptr = SuperT::allocate(n); @@ -416,15 +411,14 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator { GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T)); return ptr; } - - void deallocate(pointer ptr, size_t n) { + void deallocate(pointer ptr, size_t n) { // NOLINT GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T)); SuperT::deallocate(ptr, n); } - - #if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 - XGBDefaultDeviceAllocatorImpl() : SuperT(rmm::cuda_stream_default, rmm::mr::get_current_device_resource()) {} - #endif +#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 + XGBDefaultDeviceAllocatorImpl() + : SuperT(rmm::cuda_stream_default, rmm::mr::get_current_device_resource()) {} +#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 }; /** @@ -447,11 +441,12 @@ struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator { std::make_unique(2, 9, 29)}; return *allocator; } - pointer allocate(size_t n) { + pointer allocate(size_t n) { // NOLINT pointer thrust_ptr; if (use_cub_allocator_) { T* raw_ptr{nullptr}; - auto errc = GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast(&raw_ptr), n * sizeof(T)); + auto errc = GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast(&raw_ptr), + n * sizeof(T)); if (errc != hipSuccess) { ThrowOOMError("Caching allocator", n * sizeof(T)); } @@ -467,7 +462,7 @@ struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator { GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T)); return thrust_ptr; } - void deallocate(pointer ptr, size_t n) { + void deallocate(pointer ptr, size_t n) { // NOLINT GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T)); if (use_cub_allocator_) { GetGlobalCachingAllocator().DeviceFree(ptr.get()); @@ -606,9 +601,10 @@ void CopyToD(HContainer const &h, DContainer *d) { d->resize(h.size()); using HVT = std::remove_cv_t; using DVT = std::remove_cv_t; - static_assert(std::is_same::value, "Host and device containers must have same value type."); - dh::safe_cuda(hipMemcpyAsync(d->data().get(), h.data(), h.size() * sizeof(HVT), - hipMemcpyHostToDevice)); + static_assert(std::is_same::value, + "Host and device containers must have same value type."); + dh::safe_cuda(hipMemcpyAsync(d->data().get(), h.data(), h.size() * sizeof(HVT), + hipMemcpyHostToDevice)); } // Keep track of pinned memory allocation @@ -658,22 +654,20 @@ struct PinnedMemory { template typename std::iterator_traits::value_type SumReduction(T in, int nVals) { using ValueT = typename std::iterator_traits::value_type; - size_t tmpSize {0}; ValueT *dummy_out = nullptr; - - try { - dh::safe_cuda(hipcub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals)); - TemporaryArray temp(tmpSize + sizeof(ValueT)); - auto ptr = reinterpret_cast(temp.data().get()) + 1; - dh::safe_cuda(hipcub::DeviceReduce::Sum( - reinterpret_cast(ptr), tmpSize, in, reinterpret_cast(temp.data().get()), nVals)); - ValueT sum; - dh::safe_cuda(hipMemcpy(&sum, temp.data().get(), sizeof(ValueT), hipMemcpyDeviceToHost)); - return sum; - } catch (const std::exception& e) { - throw; - } + dh::safe_cuda(hipcub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals)); + + TemporaryArray temp(tmpSize + sizeof(ValueT)); + auto ptr = reinterpret_cast(temp.data().get()) + 1; + dh::safe_cuda(hipcub::DeviceReduce::Sum( + reinterpret_cast(ptr), tmpSize, in, + reinterpret_cast(temp.data().get()), + nVals)); + ValueT sum; + dh::safe_cuda(hipMemcpy(&sum, temp.data().get(), sizeof(ValueT), + hipMemcpyDeviceToHost)); + return sum; } constexpr std::pair CUDAVersion() { @@ -961,24 +955,15 @@ template auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce_op) { size_t constexpr kLimit = std::numeric_limits::max() / 2; size_t size = std::distance(first, second); - using Ty = std::remove_cv_t; Ty aggregate = init; - for (size_t offset = 0; offset < size; offset += kLimit) { auto begin_it = first + offset; auto end_it = first + std::min(offset + kLimit, size); size_t batch_size = std::distance(begin_it, end_it); CHECK_LE(batch_size, size); - try { - // Print the iterator types - auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op); - aggregate = reduce_op(aggregate, ret); - } catch (const thrust::system_error& e) { - throw; - } catch (const std::exception& e) { - throw; - } + auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op); + aggregate = reduce_op(aggregate, ret); } return aggregate; } @@ -1066,7 +1051,7 @@ inline CUDAStreamView DefaultStream() { #ifdef HIP_API_PER_THREAD_DEFAULT_STREAM return CUDAStreamView{hipStreamPerThread}; #else - return CUDAStreamView{hipStreamDefault}; + return CUDAStreamView{hipStreamLegacy}; #endif } @@ -1103,4 +1088,4 @@ class LDGIterator { return *reinterpret_cast(tmp); } }; -} // namespace dh +} // namespace dh \ No newline at end of file