diff --git a/src/common/cuda_to_hip.h b/src/common/cuda_to_hip.h
index c6f30e303..4f6f6bc0c 100644
--- a/src/common/cuda_to_hip.h
+++ b/src/common/cuda_to_hip.h
@@ -56,7 +56,7 @@
 #define cudaPointerGetAttributes                            hipPointerGetAttributes
 
 /* hipMemoryTypeUnregistered not supported */
-#define cudaMemoryTypeUnregistered                          hipMemoryTypeUnregistered
+#define cudaMemoryTypeUnregistered                          hipMemoryTypeUnified
 #define cudaMemoryTypeUnified                               hipMemoryTypeUnified
 #define cudaMemoryTypeHost                                  hipMemoryTypeHost
 
diff --git a/src/common/device_helpers.hip.h b/src/common/device_helpers.hip.h
index 05cc09c72..222ea25d2 100644
--- a/src/common/device_helpers.hip.h
+++ b/src/common/device_helpers.hip.h
@@ -19,10 +19,6 @@
 #include <thrust/transform_scan.h>
 #include <thrust/unique.h>
 
-#include <hip/hip_runtime.h>
-#include <thrust/system/hip/execution_policy.h>
-#include <thrust/system/hip/detail/get_value.h>
-
 #include <algorithm>
 #include <chrono>
 #include <cstddef>  // for size_t
@@ -398,14 +394,13 @@ inline void ThrowOOMError(std::string const& err, size_t bytes) {
 template <class T>
 struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
   using SuperT = XGBBaseDeviceAllocator<T>;
-  using pointer = thrust::device_ptr<T>;
-
+  using pointer = thrust::device_ptr<T>;  // NOLINT
   template<typename U>
-  struct rebind {
-    using other = XGBDefaultDeviceAllocatorImpl<U>;
+  struct rebind  // NOLINT
+  {
+    using other = XGBDefaultDeviceAllocatorImpl<U>;  // NOLINT
   };
-
-  pointer allocate(size_t n) {
+  pointer allocate(size_t n) {  // NOLINT
     pointer ptr;
     try {
       ptr = SuperT::allocate(n);
@@ -416,15 +411,14 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
     GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T));
     return ptr;
   }
-
-  void deallocate(pointer ptr, size_t n) {
+  void deallocate(pointer ptr, size_t n) {  // NOLINT
     GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
     SuperT::deallocate(ptr, n);
   }
-
-  #if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
-  XGBDefaultDeviceAllocatorImpl() : SuperT(rmm::cuda_stream_default, rmm::mr::get_current_device_resource()) {}
-  #endif
+#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+  XGBDefaultDeviceAllocatorImpl()
+    : SuperT(rmm::cuda_stream_default, rmm::mr::get_current_device_resource()) {}
+#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 };
 
 /**
@@ -447,11 +441,12 @@ struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
         std::make_unique<hipcub::CachingDeviceAllocator>(2, 9, 29)};
     return *allocator;
   }
-  pointer allocate(size_t n) {
+  pointer allocate(size_t n) {  // NOLINT
     pointer thrust_ptr;
     if (use_cub_allocator_) {
       T* raw_ptr{nullptr};
-      auto errc = GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&raw_ptr), n * sizeof(T));
+      auto errc =  GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&raw_ptr),
+                                                              n * sizeof(T));
       if (errc != hipSuccess) {
         ThrowOOMError("Caching allocator", n * sizeof(T));
       }
@@ -467,7 +462,7 @@ struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
     GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T));
     return thrust_ptr;
   }
-  void deallocate(pointer ptr, size_t n) {
+  void deallocate(pointer ptr, size_t n) {  // NOLINT
     GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
     if (use_cub_allocator_) {
       GetGlobalCachingAllocator().DeviceFree(ptr.get());
@@ -606,9 +601,10 @@ void CopyToD(HContainer const &h, DContainer *d) {
   d->resize(h.size());
   using HVT = std::remove_cv_t<typename HContainer::value_type>;
   using DVT = std::remove_cv_t<typename DContainer::value_type>;
-  static_assert(std::is_same<HVT, DVT>::value, "Host and device containers must have same value type.");
-  dh::safe_cuda(hipMemcpyAsync(d->data().get(), h.data(), h.size() * sizeof(HVT), 
-                               hipMemcpyHostToDevice));
+  static_assert(std::is_same<HVT, DVT>::value,
+                "Host and device containers must have same value type.");
+  dh::safe_cuda(hipMemcpyAsync(d->data().get(), h.data(), h.size() * sizeof(HVT),
+                                hipMemcpyHostToDevice));
 }
 
 // Keep track of pinned memory allocation
@@ -658,22 +654,20 @@ struct PinnedMemory {
 template <typename T>
 typename std::iterator_traits<T>::value_type SumReduction(T in, int nVals) {
   using ValueT = typename std::iterator_traits<T>::value_type;
-
   size_t tmpSize {0};
   ValueT *dummy_out = nullptr;
-  
-  try {
-    dh::safe_cuda(hipcub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals));
-    TemporaryArray<char> temp(tmpSize + sizeof(ValueT));
-    auto ptr = reinterpret_cast<ValueT *>(temp.data().get()) + 1;
-    dh::safe_cuda(hipcub::DeviceReduce::Sum(
-      reinterpret_cast<void *>(ptr), tmpSize, in, reinterpret_cast<ValueT *>(temp.data().get()), nVals));
-    ValueT sum;
-    dh::safe_cuda(hipMemcpy(&sum, temp.data().get(), sizeof(ValueT), hipMemcpyDeviceToHost));
-    return sum;
-  } catch (const std::exception& e) {
-    throw;
-  }
+  dh::safe_cuda(hipcub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals));
+
+  TemporaryArray<char> temp(tmpSize + sizeof(ValueT));
+  auto ptr = reinterpret_cast<ValueT *>(temp.data().get()) + 1;
+  dh::safe_cuda(hipcub::DeviceReduce::Sum(
+      reinterpret_cast<void *>(ptr), tmpSize, in,
+      reinterpret_cast<ValueT *>(temp.data().get()),
+      nVals));
+  ValueT sum;
+  dh::safe_cuda(hipMemcpy(&sum, temp.data().get(), sizeof(ValueT),
+                           hipMemcpyDeviceToHost));
+  return sum;
 }
 
 constexpr std::pair<int, int> CUDAVersion() {
@@ -961,24 +955,15 @@ template <typename Policy, typename InputIt, typename Init, typename Func>
 auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce_op) {
   size_t constexpr kLimit = std::numeric_limits<int32_t>::max() / 2;
   size_t size = std::distance(first, second);
-
   using Ty = std::remove_cv_t<Init>;
   Ty aggregate = init;
-
   for (size_t offset = 0; offset < size; offset += kLimit) {
     auto begin_it = first + offset;
     auto end_it = first + std::min(offset + kLimit, size);
     size_t batch_size = std::distance(begin_it, end_it);
     CHECK_LE(batch_size, size);
- try {
-      // Print the iterator types
-      auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op);
-      aggregate = reduce_op(aggregate, ret);
-    } catch (const thrust::system_error& e) {
-      throw;
-    } catch (const std::exception& e) {
-      throw;
-    }
+    auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op);
+    aggregate = reduce_op(aggregate, ret);
   }
   return aggregate;
 }
@@ -1066,7 +1051,7 @@ inline CUDAStreamView DefaultStream() {
 #ifdef HIP_API_PER_THREAD_DEFAULT_STREAM
   return CUDAStreamView{hipStreamPerThread};
 #else
-  return CUDAStreamView{hipStreamDefault};
+  return CUDAStreamView{hipStreamLegacy};
 #endif
 }
 
@@ -1103,4 +1088,4 @@ class LDGIterator {
     return *reinterpret_cast<const T *>(tmp);
   }
 };
-}  // namespace dh
+}  // namespace dh
\ No newline at end of file