diff --git a/src/common/cuda_context.cuh b/src/common/cuda_context.cuh
index dce5a9858..17896460f 100644
--- a/src/common/cuda_context.cuh
+++ b/src/common/cuda_context.cuh
@@ -6,6 +6,12 @@
 #include <thrust/execution_policy.h>
 #include "device_helpers.cuh"
 
+#ifdef XGBOOST_USE_HIP
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 struct CUDAContext {
  private:
@@ -17,37 +23,21 @@ struct CUDAContext {
    * \brief Caching thrust policy.
    */
   auto CTP() const {
-#if defined(XGBOOST_USE_CUDA)
 #if THRUST_MAJOR_VERSION >= 2
     return thrust::cuda::par_nosync(caching_alloc_).on(dh::DefaultStream());
 #else
     return thrust::cuda::par(caching_alloc_).on(dh::DefaultStream());
 #endif  // THRUST_MAJOR_VERSION >= 2
-#elif defined(XGBOOST_USE_HIP)
-#if THRUST_MAJOR_VERSION >= 2
-    return thrust::hip::par_nosync(caching_alloc_).on(dh::DefaultStream());
-#else
-    return thrust::hip::par(caching_alloc_).on(dh::DefaultStream());
-#endif  // THRUST_MAJOR_VERSION >= 2
-#endif
   }
   /**
    * \brief Thrust policy without caching allocator.
    */
   auto TP() const {
-#if defined(XGBOOST_USE_CUDA)
 #if THRUST_MAJOR_VERSION >= 2
     return thrust::cuda::par_nosync(alloc_).on(dh::DefaultStream());
 #else
     return thrust::cuda::par(alloc_).on(dh::DefaultStream());
 #endif  // THRUST_MAJOR_VERSION >= 2
-#elif defined(XGBOOST_USE_HIP)
-#if THRUST_MAJOR_VERSION >= 2
-    return thrust::hip::par_nosync(alloc_).on(dh::DefaultStream());
-#else
-    return thrust::hip::par(alloc_).on(dh::DefaultStream());
-#endif  // THRUST_MAJOR_VERSION >= 2
-#endif
   }
   auto Stream() const { return dh::DefaultStream(); }
 };
diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu
index 7bdd90eb9..bd0c894f0 100644
--- a/src/common/hist_util.cu
+++ b/src/common/hist_util.cu
@@ -26,6 +26,12 @@
 #include "quantile.h"
 #include "xgboost/host_device_vector.h"
 
+#ifdef XGBOOST_USE_HIP
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost::common {
 constexpr float SketchContainer::kFactor;
 
@@ -112,7 +118,6 @@ void SortByWeight(dh::device_vector<float>* weights, dh::device_vector<Entry>* s
   // Sort both entries and wegihts.
   dh::XGBDeviceAllocator<char> alloc;
   CHECK_EQ(weights->size(), sorted_entries->size());
-#if defined(XGBOOST_USE_CUDA)
   thrust::sort_by_key(thrust::cuda::par(alloc), sorted_entries->begin(), sorted_entries->end(),
                       weights->begin(), detail::EntryCompareOp());
 
@@ -122,17 +127,6 @@ void SortByWeight(dh::device_vector<float>* weights, dh::device_vector<Entry>* s
       thrust::cuda::par(caching), sorted_entries->begin(), sorted_entries->end(), weights->begin(),
       weights->begin(),
       [=] __device__(const Entry& a, const Entry& b) { return a.index == b.index; });
-#elif defined(XGBOOST_USE_HIP)
-  thrust::sort_by_key(thrust::hip::par(alloc), sorted_entries->begin(), sorted_entries->end(),
-                      weights->begin(), detail::EntryCompareOp());
-
-  // Scan weights
-  dh::XGBCachingDeviceAllocator<char> caching;
-  thrust::inclusive_scan_by_key(
-      thrust::hip::par(caching), sorted_entries->begin(), sorted_entries->end(), weights->begin(),
-      weights->begin(),
-      [=] __device__(const Entry& a, const Entry& b) { return a.index == b.index; });
-#endif
 }
 
 void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_row_t> d_cuts_ptr,
diff --git a/src/common/hist_util.cuh b/src/common/hist_util.cuh
index c4112ee13..aec733ddc 100644
--- a/src/common/hist_util.cuh
+++ b/src/common/hist_util.cuh
@@ -19,6 +19,10 @@
 
 #if defined(XGBOOST_USE_HIP)
 namespace cub = hipcub;
+
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
 #endif
 
 namespace xgboost::common {
@@ -175,18 +179,10 @@ void GetColumnSizesScan(DeviceOrd device, size_t num_columns, std::size_t num_cu
         return thrust::min(num_cuts_per_feature, column_size);
       });
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
                          cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer());
   thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(),
                          column_sizes_scan->end(), column_sizes_scan->begin());
-#elif defined(XGBOOST_USE_HIP)
-  thrust::exclusive_scan(thrust::hip::par(alloc), cut_ptr_it,
-                         cut_ptr_it + column_sizes_scan->size(),
-                         cuts_ptr->DevicePointer());
-  thrust::exclusive_scan(thrust::hip::par(alloc), column_sizes_scan->begin(),
-                         column_sizes_scan->end(), column_sizes_scan->begin());
-#endif
 }
 
 inline size_t constexpr BytesPerElement(bool has_weight) {
@@ -309,13 +305,8 @@ void ProcessSlidingWindow(AdapterBatch const &batch, MetaInfo const &info,
                                  &sorted_entries);
   dh::XGBDeviceAllocator<char> alloc;
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
                sorted_entries.end(), detail::EntryCompareOp());
-#elif defined(XGBOOST_USE_HIP)
-  thrust::sort(thrust::hip::par(alloc), sorted_entries.begin(),
-               sorted_entries.end(), detail::EntryCompareOp());
-#endif
 
   if (sketch_container->HasCategorical()) {
     auto d_cuts_ptr = cuts_ptr.DeviceSpan();
@@ -374,19 +365,11 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
           return weights[group_idx];
         });
 
-#if defined(XGBOOST_USE_CUDA)
     auto retit = thrust::copy_if(thrust::cuda::par(alloc),
                                  weight_iter + begin, weight_iter + end,
                                  batch_iter + begin,
                                  d_temp_weights.data(),  // output
                                  is_valid);
-#elif defined(XGBOOST_USE_HIP)
-    auto retit = thrust::copy_if(thrust::hip::par(alloc),
-                                 weight_iter + begin, weight_iter + end,
-                                 batch_iter + begin,
-                                 d_temp_weights.data(),  // output
-                                 is_valid);
-#endif
 
     CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
   } else {
@@ -397,19 +380,11 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
           return weights[batch.GetElement(idx).row_idx];
         });
 
-#if defined(XGBOOST_USE_CUDA)
     auto retit = thrust::copy_if(thrust::cuda::par(alloc),
                                  weight_iter + begin, weight_iter + end,
                                  batch_iter + begin,
                                  d_temp_weights.data(),  // output
                                  is_valid);
-#elif defined(XGBOOST_USE_HIP)
-    auto retit = thrust::copy_if(thrust::hip::par(alloc),
-                                 weight_iter + begin, weight_iter + end,
-                                 batch_iter + begin,
-                                 d_temp_weights.data(),  // output
-                                 is_valid);
-#endif
 
     CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
   }
diff --git a/src/common/numeric.cu b/src/common/numeric.cu
index 8d115506a..c25ee2c6a 100644
--- a/src/common/numeric.cu
+++ b/src/common/numeric.cu
@@ -8,18 +8,19 @@
 #include "xgboost/context.h"             // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 
+#ifdef XGBOOST_USE_HIP
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost::common::cuda_impl {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
   values.SetDevice(ctx->Device());
   auto const d_values = values.ConstDeviceSpan();
   dh::XGBCachingDeviceAllocator<char> alloc;
 
-#if defined(XGBOOST_USE_CUDA)
   return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
                     thrust::plus<float>{});
-#elif defined(XGBOOST_USE_HIP)
-  return dh::Reduce(thrust::hip::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
-                    thrust::plus<float>{});
-#endif
 }
 }  // namespace xgboost::common::cuda_impl
diff --git a/src/common/quantile.cu b/src/common/quantile.cu
index 6040e266f..849b19480 100644
--- a/src/common/quantile.cu
+++ b/src/common/quantile.cu
@@ -22,6 +22,12 @@
 #include "transform_iterator.h"  // MakeIndexTransformIter
 #include "xgboost/span.h"
 
+#ifdef XGBOOST_USE_HIP
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 namespace common {
 
@@ -147,7 +153,6 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
   // We reuse the memory for storing merge path.
   common::Span<Tuple> merge_path{reinterpret_cast<Tuple *>(out.data()), out.size()};
   // Determine the merge path, 0 if element is from x, 1 if it's from y.
-#if defined(XGBOOST_USE_CUDA)
   thrust::merge_by_key(
       thrust::cuda::par(alloc), x_merge_key_it, x_merge_key_it + d_x.size(),
       y_merge_key_it, y_merge_key_it + d_y.size(), x_merge_val_it,
@@ -160,36 +165,15 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
         }
         return l_column_id < r_column_id;
       });
-#elif defined(XGBOOST_USE_HIP)
-  thrust::merge_by_key(
-      thrust::hip::par(alloc), x_merge_key_it, x_merge_key_it + d_x.size(),
-      y_merge_key_it, y_merge_key_it + d_y.size(), x_merge_val_it,
-      y_merge_val_it, thrust::make_discard_iterator(), merge_path.data(),
-      [=] __device__(auto const &l, auto const &r) -> bool {
-        auto l_column_id = thrust::get<0>(l);
-        auto r_column_id = thrust::get<0>(r);
-        if (l_column_id == r_column_id) {
-          return thrust::get<1>(l).value < thrust::get<1>(r).value;
-        }
-        return l_column_id < r_column_id;
-      });
-#endif
 
   // Compute output ptr
   auto transform_it =
       thrust::make_zip_iterator(thrust::make_tuple(x_ptr.data(), y_ptr.data()));
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::transform(
       thrust::cuda::par(alloc), transform_it, transform_it + x_ptr.size(),
       out_ptr.data(),
       [] __device__(auto const& t) { return thrust::get<0>(t) + thrust::get<1>(t); });
-#elif defined(XGBOOST_USE_HIP)
-  thrust::transform(
-      thrust::hip::par(alloc), transform_it, transform_it + x_ptr.size(),
-      out_ptr.data(),
-      [] __device__(auto const& t) { return thrust::get<0>(t) + thrust::get<1>(t); });
-#endif
 
   // 0^th is the indicator, 1^th is placeholder
   auto get_ind = []XGBOOST_DEVICE(Tuple const& t) { return thrust::get<0>(t); };
@@ -215,7 +199,6 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
   // comparison, index of y is incremented by 1 from y_0 to y_1, and at the same time, y_0
   // is landed into output as the first element in merge result.  The scan result is the
   // subscript of x and y.
-#if defined(XGBOOST_USE_CUDA)
   thrust::exclusive_scan_by_key(
       thrust::cuda::par(alloc), scan_key_it, scan_key_it + merge_path.size(),
       scan_val_it, merge_path.data(),
@@ -224,16 +207,6 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
       [=] __device__(Tuple const &l, Tuple const &r) -> Tuple {
         return thrust::make_tuple(get_x(l) + get_x(r), get_y(l) + get_y(r));
       });
-#elif defined(XGBOOST_USE_HIP)
-  thrust::exclusive_scan_by_key(
-      thrust::hip::par(alloc), scan_key_it, scan_key_it + merge_path.size(),
-      scan_val_it, merge_path.data(),
-      thrust::make_tuple<uint64_t, uint64_t>(0ul, 0ul),
-      thrust::equal_to<size_t>{},
-      [=] __device__(Tuple const &l, Tuple const &r) -> Tuple {
-        return thrust::make_tuple(get_x(l) + get_x(r), get_y(l) + get_y(r));
-      });
-#endif
 
   return merge_path;
 }
@@ -414,7 +387,6 @@ size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_col
   // Reverse scan to accumulate weights into first duplicated element on left.
   auto val_it = thrust::make_reverse_iterator(dh::tend(entries));
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::inclusive_scan_by_key(
       thrust::cuda::par(alloc), key_it, key_it + entries.size(),
       val_it, val_it,
@@ -428,21 +400,6 @@ size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_col
         }
         return l;
       });
-#elif defined(XGBOOST_USE_HIP)
-  thrust::inclusive_scan_by_key(
-      thrust::hip::par(alloc), key_it, key_it + entries.size(),
-      val_it, val_it,
-      thrust::equal_to<size_t>{},
-      [] __device__(SketchEntry const &r, SketchEntry const &l) {
-        // Only accumulate for the first type of duplication.
-        if (l.value - r.value == 0 && l.rmin - r.rmin != 0) {
-          auto w = l.wmin + r.wmin;
-          SketchEntry v{l.rmin, l.rmin + w, w, l.value};
-          return v;
-        }
-        return l;
-      });
-#endif
 
   auto d_columns_ptr_out = columns_ptr_b_.DeviceSpan();
   // thrust unique_by_key preserves the first element.
@@ -691,7 +648,6 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts, bool is_column_split) {
     // track of the unique keys (feature indices) after the thrust::reduce_by_key` call.
     dh::caching_device_vector<size_t> d_max_keys(d_in_columns_ptr.size() - 1);
     dh::caching_device_vector<SketchEntry> d_max_values(d_in_columns_ptr.size() - 1);
-#if defined(XGBOOST_USE_CUDA)
     auto new_end = thrust::reduce_by_key(
         thrust::cuda::par(alloc), key_it, key_it + in_cut_values.size(), val_it, d_max_keys.begin(),
         d_max_values.begin(), thrust::equal_to<bst_feature_t>{},
@@ -705,21 +661,6 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts, bool is_column_split) {
                                                          default_entry);
     thrust::scatter(thrust::cuda::par(alloc), d_max_values.begin(), d_max_values.end(),
                     d_max_keys.begin(), d_max_results.begin());
-#elif defined(XGBOOST_USE_HIP)
-    auto new_end = thrust::reduce_by_key(
-        thrust::hip::par(alloc), key_it, key_it + in_cut_values.size(), val_it, d_max_keys.begin(),
-        d_max_values.begin(), thrust::equal_to<bst_feature_t>{},
-        [] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
-    d_max_keys.erase(new_end.first, d_max_keys.end());
-    d_max_values.erase(new_end.second, d_max_values.end());
-
-    // The device vector needs to be initialized explicitly since we may have some missing columns.
-    SketchEntry default_entry{};
-    dh::caching_device_vector<SketchEntry> d_max_results(d_in_columns_ptr.size() - 1,
-                                                         default_entry);
-    thrust::scatter(thrust::hip::par(alloc), d_max_values.begin(), d_max_values.end(),
-                    d_max_keys.begin(), d_max_results.begin());
-#endif
     dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_results));
     auto max_it = MakeIndexTransformIter([&](auto i) {
       if (IsCat(h_feature_types, i)) {
diff --git a/src/common/quantile.cuh b/src/common/quantile.cuh
index fac254abf..63d7d1e5a 100644
--- a/src/common/quantile.cuh
+++ b/src/common/quantile.cuh
@@ -10,6 +10,12 @@
 #include "timer.h"
 #include "categorical.h"
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 namespace common {
 
@@ -184,19 +190,11 @@ class SketchContainer {
 
     d_column_scan = this->columns_ptr_.DeviceSpan();
 
-#if defined(XGBOOST_USE_CUDA)
     size_t n_uniques = dh::SegmentedUnique(
         thrust::cuda::par(alloc), d_column_scan.data(),
         d_column_scan.data() + d_column_scan.size(), entries.data(),
         entries.data() + entries.size(), scan_out.DevicePointer(),
         entries.data(), detail::SketchUnique{}, key_comp);
-#elif defined(XGBOOST_USE_HIP)
-    size_t n_uniques = dh::SegmentedUnique(
-        thrust::hip::par(alloc), d_column_scan.data(),
-        d_column_scan.data() + d_column_scan.size(), entries.data(),
-        entries.data() + entries.size(), scan_out.DevicePointer(),
-        entries.data(), detail::SketchUnique{}, key_comp);
-#endif
 
     this->columns_ptr_.Copy(scan_out);
     CHECK(!this->columns_ptr_.HostCanRead());
diff --git a/src/common/stats.cuh b/src/common/stats.cuh
index 0de654818..5c909a830 100644
--- a/src/common/stats.cuh
+++ b/src/common/stats.cuh
@@ -23,6 +23,12 @@
 #include "xgboost/context.h"                       // Context
 #include "xgboost/span.h"                          // Span
 
+#ifdef XGBOOST_USE_HIP
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 namespace common {
 namespace detail {
@@ -217,13 +223,8 @@ void SegmentedWeightedQuantile(Context const* ctx, AlphaIt alpha_it, SegIt seg_b
   auto scan_val = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
                                                    detail::WeightOp<WIter>{w_begin, d_sorted_idx});
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::inclusive_scan_by_key(thrust::cuda::par(caching), scan_key, scan_key + n_weights,
                                 scan_val, weights_cdf.begin());
-#elif defined(XGBOOST_USE_HIP)
-  thrust::inclusive_scan_by_key(thrust::hip::par(caching), scan_key, scan_key + n_weights,
-                                scan_val, weights_cdf.begin());
-#endif
 
   auto n_segments = std::distance(seg_beg, seg_end) - 1;
   quantiles->SetDevice(ctx->Device());
diff --git a/src/data/data.cu b/src/data/data.cu
index 9c0c02b24..39c44954c 100644
--- a/src/data/data.cu
+++ b/src/data/data.cu
@@ -17,6 +17,9 @@
 
 #if defined(XGBOOST_USE_HIP)
 namespace cub = hipcub;
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
 #endif
 
 namespace xgboost {
@@ -122,13 +125,8 @@ void CopyQidImpl(ArrayInterface<1> array_interface, std::vector<bst_group_t>* p_
   group_ptr_.resize(h_num_runs_out + 1, 0);
   dh::XGBCachingDeviceAllocator<char> alloc;
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::inclusive_scan(thrust::cuda::par(alloc), cnt.begin(),
                          cnt.begin() + h_num_runs_out, cnt.begin());
-#elif defined(XGBOOST_USE_HIP)
-  thrust::inclusive_scan(thrust::hip::par(alloc), cnt.begin(),
-                         cnt.begin() + h_num_runs_out, cnt.begin());
-#endif
 
   thrust::copy(cnt.begin(), cnt.begin() + h_num_runs_out,
                group_ptr_.begin() + 1);
diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh
index ac19d47e4..b1c18ac6a 100644
--- a/src/data/device_adapter.cuh
+++ b/src/data/device_adapter.cuh
@@ -17,6 +17,12 @@
 #include "adapter.h"
 #include "array_interface.h"
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 namespace data {
 
@@ -246,17 +252,10 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
   });
 
   dh::XGBCachingDeviceAllocator<char> alloc;
-#if defined(XGBOOST_USE_CUDA)
   bst_row_t row_stride =
       dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
                  thrust::device_pointer_cast(offset.data()) + offset.size(),
                  static_cast<bst_row_t>(0), thrust::maximum<bst_row_t>());
-#elif defined(XGBOOST_USE_HIP)
-  bst_row_t row_stride =
-      dh::Reduce(thrust::hip::par(alloc), thrust::device_pointer_cast(offset.data()),
-                 thrust::device_pointer_cast(offset.data()) + offset.size(),
-                 static_cast<bst_row_t>(0), thrust::maximum<bst_row_t>());
-#endif
   return row_stride;
 }
 
@@ -280,13 +279,8 @@ bool NoInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) {
   // intervals to early stop. But we expect all data to be valid here, using small
   // intervals only decreases performance due to excessive kernel launch and stream
   // synchronization.
-#if defined(XGBOOST_USE_CUDA)
   auto valid = dh::Reduce(thrust::cuda::par(alloc), value_iter, value_iter + batch.Size(), true,
                           thrust::logical_and<>{});
-#elif defined(XGBOOST_USE_HIP)
-  auto valid = dh::Reduce(thrust::hip::par(alloc), value_iter, value_iter + batch.Size(), true,
-                          thrust::logical_and<>{});
-#endif
   return valid;
 }
 };  // namespace data
diff --git a/src/data/iterative_dmatrix.cu b/src/data/iterative_dmatrix.cu
index 68a58fd60..cc09356c4 100644
--- a/src/data/iterative_dmatrix.cu
+++ b/src/data/iterative_dmatrix.cu
@@ -16,6 +16,12 @@
 #include "simple_batch_iterator.h"
 #include "sparse_page_source.h"
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost::data {
 void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
                                     DataIterHandle iter_handle, float missing,
@@ -86,11 +92,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
                             return GetRowCounts(value, row_counts_span, get_device(), missing);
                           }));
 
-#if defined(XGBOOST_USE_CUDA)
     nnz += thrust::reduce(thrust::cuda::par(alloc), row_counts.begin(), row_counts.end());
-#elif defined(XGBOOST_USE_HIP)
-    nnz += thrust::reduce(thrust::hip::par(alloc), row_counts.begin(), row_counts.end());
-#endif
 
     batches++;
   } while (iter.Next());
diff --git a/src/data/simple_dmatrix.cuh b/src/data/simple_dmatrix.cuh
index 01e532d01..a862ed23d 100644
--- a/src/data/simple_dmatrix.cuh
+++ b/src/data/simple_dmatrix.cuh
@@ -13,6 +13,12 @@
 #include "../common/error_msg.h"  // for InfInData
 #include "device_adapter.cuh"     // for HasInfInData
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost::data {
 
 #if defined(XGBOOST_USE_CUDA)
@@ -69,15 +75,9 @@ void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset,
   });
 
   dh::XGBCachingDeviceAllocator<char> alloc;
-#if defined(XGBOOST_USE_CUDA)
   thrust::exclusive_scan(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
                          thrust::device_pointer_cast(offset.data() + offset.size()),
                          thrust::device_pointer_cast(offset.data()));
-#elif defined(XGBOOST_USE_HIP)
-  thrust::exclusive_scan(thrust::hip::par(alloc), thrust::device_pointer_cast(offset.data()),
-                         thrust::device_pointer_cast(offset.data() + offset.size()),
-                         thrust::device_pointer_cast(offset.data()));
-#endif
 }
 
 template <typename AdapterBatchT>
diff --git a/src/metric/auc.cu b/src/metric/auc.cu
index abbc4e944..d2194034e 100644
--- a/src/metric/auc.cu
+++ b/src/metric/auc.cu
@@ -25,6 +25,12 @@
 #include "xgboost/data.h"
 #include "xgboost/span.h"
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 namespace metric {
 // tag the this file, used by force static link later.
@@ -127,17 +133,10 @@ GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
       thrust::make_counting_iterator(0),
       [=] XGBOOST_DEVICE(size_t i) { return predts[d_sorted_idx[i]]; });
 
-#if defined(XGBOOST_USE_CUDA)
   auto end_unique = thrust::unique_by_key_copy(
       thrust::cuda::par(alloc), uni_key, uni_key + d_sorted_idx.size(),
       dh::tbegin(d_unique_idx), thrust::make_discard_iterator(),
       dh::tbegin(d_unique_idx));
-#elif defined(XGBOOST_USE_HIP)
-  auto end_unique = thrust::unique_by_key_copy(
-      thrust::hip::par(alloc), uni_key, uni_key + d_sorted_idx.size(),
-      dh::tbegin(d_unique_idx), thrust::make_discard_iterator(),
-      dh::tbegin(d_unique_idx));
-#endif
 
   d_unique_idx = d_unique_idx.subspan(0, end_unique.second - dh::tbegin(d_unique_idx));
 
@@ -179,11 +178,7 @@ GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
 
   Pair last = cache->fptp.back();
 
-#if defined(XGBOOST_USE_CUDA)
   double auc = thrust::reduce(thrust::cuda::par(alloc), in, in + d_unique_idx.size());
-#elif defined(XGBOOST_USE_HIP)
-  double auc = thrust::reduce(thrust::hip::par(alloc), in, in + d_unique_idx.size());
-#endif
 
   return std::make_tuple(last.first, last.second, auc);
 }
@@ -239,15 +234,9 @@ double ScaleClasses(common::Span<double> results, common::Span<double> local_are
   double tp_sum;
   double auc_sum;
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::tie(auc_sum, tp_sum) =
       thrust::reduce(thrust::cuda::par(alloc), reduce_in, reduce_in + n_classes,
                      Pair{0.0, 0.0}, PairPlus<double, double>{});
-#elif defined(XGBOOST_USE_HIP)
-  thrust::tie(auc_sum, tp_sum) =
-      thrust::reduce(thrust::hip::par(alloc), reduce_in, reduce_in + n_classes,
-                     Pair{0.0, 0.0}, PairPlus<double, double>{});
-#endif
 
   if (tp_sum != 0 && !std::isnan(auc_sum)) {
     auc_sum /= tp_sum;
@@ -329,15 +318,9 @@ void SegmentedReduceAUC(common::Span<size_t const> d_unique_idx,
         return auc;
       });
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::reduce_by_key(thrust::cuda::par(alloc), key_in,
                         key_in + d_unique_idx.size(), val_in,
                         thrust::make_discard_iterator(), dh::tbegin(d_auc));
-#elif defined(XGBOOST_USE_HIP)
-  thrust::reduce_by_key(thrust::hip::par(alloc), key_in,
-                        key_in + d_unique_idx.size(), val_in,
-                        thrust::make_discard_iterator(), dh::tbegin(d_auc));
-#endif
 }
 
 /**
@@ -410,7 +393,6 @@ double GPUMultiClassAUCOVR(MetaInfo const &info, DeviceOrd device,
   dh::TemporaryArray<uint32_t> unique_class_ptr(d_class_ptr.size());
   auto d_unique_class_ptr = dh::ToSpan(unique_class_ptr);
 
-#if defined(XGBOOST_USE_CUDA)
   auto n_uniques = dh::SegmentedUniqueByKey(
       thrust::cuda::par(alloc),
       dh::tbegin(d_class_ptr),
@@ -421,18 +403,6 @@ double GPUMultiClassAUCOVR(MetaInfo const &info, DeviceOrd device,
       d_unique_class_ptr.data(),
       dh::tbegin(d_unique_idx),
       thrust::equal_to<thrust::pair<uint32_t, float>>{});
-#elif defined(XGBOOST_USE_HIP)
-  auto n_uniques = dh::SegmentedUniqueByKey(
-      thrust::hip::par(alloc),
-      dh::tbegin(d_class_ptr),
-      dh::tend(d_class_ptr),
-      uni_key,
-      uni_key + d_sorted_idx.size(),
-      dh::tbegin(d_unique_idx),
-      d_unique_class_ptr.data(),
-      dh::tbegin(d_unique_idx),
-      thrust::equal_to<thrust::pair<uint32_t, float>>{});
-#endif
 
   d_unique_idx = d_unique_idx.subspan(0, n_uniques);
 
@@ -553,15 +523,9 @@ std::pair<double, std::uint32_t> GPURankingAUC(Context const *ctx, common::Span<
       thrust::make_counting_iterator(0),
       [=] XGBOOST_DEVICE(size_t i) { return d_group_ptr[i + 1] - d_group_ptr[i]; });
 
-#if defined(XGBOOST_USE_CUDA)
   size_t n_valid = thrust::count_if(
       thrust::cuda::par(alloc), check_it, check_it + group_ptr.size() - 1,
       [=] XGBOOST_DEVICE(size_t len) { return len >= 3; });
-#elif defined(XGBOOST_USE_HIP)
-  size_t n_valid = thrust::count_if(
-      thrust::hip::par(alloc), check_it, check_it + group_ptr.size() - 1,
-      [=] XGBOOST_DEVICE(size_t len) { return len >= 3; });
-#endif
 
   if (n_valid < info.group_ptr_.size() - 1) {
     InvalidGroupAUC();
@@ -659,13 +623,8 @@ std::pair<double, std::uint32_t> GPURankingAUC(Context const *ctx, common::Span<
   /**
    * Scale the AUC with number of items in each group.
    */
-#if defined(XGBOOST_USE_CUDA)
   double auc = thrust::reduce(thrust::cuda::par(alloc), dh::tbegin(s_d_auc),
                               dh::tend(s_d_auc), 0.0);
-#elif defined(XGBOOST_USE_HIP)
-  double auc = thrust::reduce(thrust::hip::par(alloc), dh::tbegin(s_d_auc),
-                              dh::tend(s_d_auc), 0.0);
-#endif
 
   return std::make_pair(auc, n_valid);
 }
@@ -694,15 +653,9 @@ std::tuple<double, double, double> GPUBinaryPRAUC(common::Span<float const> pred
   dh::XGBCachingDeviceAllocator<char> alloc;
   double total_pos, total_neg;
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::tie(total_pos, total_neg) =
       thrust::reduce(thrust::cuda::par(alloc), it, it + labels.Size(),
                      Pair{0.0, 0.0}, PairPlus<double, double>{});
-#elif defined(XGBOOST_USE_HIP)
-  thrust::tie(total_pos, total_neg) =
-      thrust::reduce(thrust::hip::par(alloc), it, it + labels.Size(),
-                     Pair{0.0, 0.0}, PairPlus<double, double>{});
-#endif
 
   if (total_pos <= 0.0 || total_neg <= 0.0) {
     return {0.0f, 0.0f, 0.0f};
@@ -755,17 +708,10 @@ double GPUMultiClassPRAUC(Context const *ctx, common::Span<float const> predts,
       });
   dh::XGBCachingDeviceAllocator<char> alloc;
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::reduce_by_key(thrust::cuda::par(alloc), key_it,
                         key_it + predts.size(), val_it,
                         thrust::make_discard_iterator(), totals.begin(),
                         thrust::equal_to<size_t>{}, PairPlus<double, double>{});
-#elif defined(XGBOOST_USE_HIP)
-  thrust::reduce_by_key(thrust::hip::par(alloc), key_it,
-                        key_it + predts.size(), val_it,
-                        thrust::make_discard_iterator(), totals.begin(),
-                        thrust::equal_to<size_t>{}, PairPlus<double, double>{});
-#endif
 
   /**
    * Calculate AUC
@@ -834,7 +780,6 @@ GPURankingPRAUCImpl(common::Span<float const> predts, MetaInfo const &info,
   dh::TemporaryArray<uint32_t> unique_class_ptr(d_group_ptr.size());
   auto d_unique_class_ptr = dh::ToSpan(unique_class_ptr);
 
-#if defined(XGBOOST_USE_CUDA)
   auto n_uniques = dh::SegmentedUniqueByKey(
       thrust::cuda::par(alloc),
       dh::tbegin(d_group_ptr),
@@ -845,18 +790,6 @@ GPURankingPRAUCImpl(common::Span<float const> predts, MetaInfo const &info,
       d_unique_class_ptr.data(),
       dh::tbegin(d_unique_idx),
       thrust::equal_to<thrust::pair<uint32_t, float>>{});
-#elif defined(XGBOOST_USE_HIP)
-  auto n_uniques = dh::SegmentedUniqueByKey(
-      thrust::hip::par(alloc),
-      dh::tbegin(d_group_ptr),
-      dh::tend(d_group_ptr),
-      uni_key,
-      uni_key + d_sorted_idx.size(),
-      dh::tbegin(d_unique_idx),
-      d_unique_class_ptr.data(),
-      dh::tbegin(d_unique_idx),
-      thrust::equal_to<thrust::pair<uint32_t, float>>{});
-#endif
 
   d_unique_idx = d_unique_idx.subspan(0, n_uniques);
 
@@ -909,15 +842,9 @@ GPURankingPRAUCImpl(common::Span<float const> predts, MetaInfo const &info,
           return thrust::make_pair(0.0, static_cast<uint32_t>(1));
         });
 
-#if defined(XGBOOST_USE_CUDA)
     thrust::tie(auc, invalid_groups) = thrust::reduce(
         thrust::cuda::par(alloc), it, it + n_groups,
         thrust::pair<double, uint32_t>(0.0, 0), PairPlus<double, uint32_t>{});
-#elif defined(XGBOOST_USE_HIP)
-    thrust::tie(auc, invalid_groups) = thrust::reduce(
-        thrust::hip::par(alloc), it, it + n_groups,
-        thrust::pair<double, uint32_t>(0.0, 0), PairPlus<double, uint32_t>{});
-#endif
   }
   return std::make_pair(auc, n_groups - invalid_groups);
 }
@@ -949,17 +876,10 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,
   dh::XGBDeviceAllocator<char> alloc;
   auto labels = info.labels.View(ctx->Device());
 
-#if defined(XGBOOST_USE_CUDA)
   if (thrust::any_of(thrust::cuda::par(alloc), dh::tbegin(labels.Values()),
                      dh::tend(labels.Values()), PRAUCLabelInvalid{})) {
     InvalidLabels();
   }
-#elif defined(XGBOOST_USE_HIP)
-  if (thrust::any_of(thrust::hip::par(alloc), dh::tbegin(labels.Values()),
-                     dh::tend(labels.Values()), PRAUCLabelInvalid{})) {
-    InvalidLabels();
-  }
-#endif
 
   /**
    * Get total positive/negative for each group.
@@ -981,17 +901,10 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,
         return thrust::make_pair(y * w, (1.0 - y) * w);
       });
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::reduce_by_key(thrust::cuda::par(alloc), key_it,
                         key_it + predts.size(), val_it,
                         thrust::make_discard_iterator(), totals.begin(),
                         thrust::equal_to<size_t>{}, PairPlus<double, double>{});
-#elif defined(XGBOOST_USE_HIP)
-  thrust::reduce_by_key(thrust::hip::par(alloc), key_it,
-                        key_it + predts.size(), val_it,
-                        thrust::make_discard_iterator(), totals.begin(),
-                        thrust::equal_to<size_t>{}, PairPlus<double, double>{});
-#endif
 
   /**
    * Calculate AUC
diff --git a/src/metric/elementwise_metric.cu b/src/metric/elementwise_metric.cu
index f52b28fd1..eb766e964 100644
--- a/src/metric/elementwise_metric.cu
+++ b/src/metric/elementwise_metric.cu
@@ -30,6 +30,12 @@
 #include "../common/device_helpers.cuh"
 #endif  // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 namespace metric {
 // tag the this file, used by force static link later.
@@ -47,7 +53,7 @@ PackedReduceResult Reduce(Context const* ctx, MetaInfo const& info, Fn&& loss) {
   PackedReduceResult result;
   auto labels = info.labels.View(ctx->Device());
   if (ctx->IsCUDA()) {
-#if defined(XGBOOST_USE_CUDA)
+#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
     dh::XGBCachingDeviceAllocator<char> alloc;
     thrust::counting_iterator<size_t> begin(0);
     thrust::counting_iterator<size_t> end = begin + labels.Size();
@@ -62,21 +68,6 @@ PackedReduceResult Reduce(Context const* ctx, MetaInfo const& info, Fn&& loss) {
           return PackedReduceResult{v, wt};
         },
         PackedReduceResult{}, thrust::plus<PackedReduceResult>());
-#elif defined(XGBOOST_USE_HIP)
-    dh::XGBCachingDeviceAllocator<char> alloc;
-    thrust::counting_iterator<size_t> begin(0);
-    thrust::counting_iterator<size_t> end = begin + labels.Size();
-    result = thrust::transform_reduce(
-        thrust::hip::par(alloc), begin, end,
-        [=] XGBOOST_DEVICE(size_t i) {
-          auto idx = linalg::UnravelIndex(i, labels.Shape());
-          auto sample_id = std::get<0>(idx);
-          auto target_id = std::get<1>(idx);
-          auto res = loss(i, sample_id, target_id);
-          float v{std::get<0>(res)}, wt{std::get<1>(res)};
-          return PackedReduceResult{v, wt};
-        },
-        PackedReduceResult{}, thrust::plus<PackedReduceResult>());
 #else
     common::AssertGPUSupport();
 #endif  //  defined(XGBOOST_USE_CUDA)
diff --git a/src/metric/multiclass_metric.cu b/src/metric/multiclass_metric.cu
index 6e9019488..e8f71dfd4 100644
--- a/src/metric/multiclass_metric.cu
+++ b/src/metric/multiclass_metric.cu
@@ -24,6 +24,12 @@
 #include "../common/device_helpers.cuh"
 #endif  // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 namespace metric {
 // tag the this file, used by force static link later.
@@ -104,7 +110,6 @@ class MultiClassMetricsReduction {
 
     dh::XGBCachingDeviceAllocator<char> alloc;
 
-#if defined(XGBOOST_USE_CUDA)
     PackedReduceResult result = thrust::transform_reduce(
         thrust::cuda::par(alloc),
         begin, end,
@@ -122,25 +127,6 @@ class MultiClassMetricsReduction {
         },
         PackedReduceResult(),
         thrust::plus<PackedReduceResult>());
-#elif defined(XGBOOST_USE_HIP)
-    PackedReduceResult result = thrust::transform_reduce(
-        thrust::hip::par(alloc),
-        begin, end,
-        [=] XGBOOST_DEVICE(size_t idx) {
-          bst_float weight = is_null_weight ? 1.0f : s_weights[idx];
-          bst_float residue = 0;
-          auto label = static_cast<int>(s_labels[idx]);
-          if (label >= 0 && label < static_cast<int32_t>(n_class)) {
-            residue = EvalRowPolicy::EvalRow(
-                label, &s_preds[idx * n_class], n_class) * weight;
-          } else {
-            s_label_error[0] = label;
-          }
-          return PackedReduceResult{ residue, weight };
-        },
-        PackedReduceResult(),
-        thrust::plus<PackedReduceResult>());
-#endif
 
     CheckLabelError(s_label_error[0], n_class);
 
diff --git a/src/metric/survival_metric.cu b/src/metric/survival_metric.cu
index b501bed76..19c1891e3 100644
--- a/src/metric/survival_metric.cu
+++ b/src/metric/survival_metric.cu
@@ -25,6 +25,12 @@
 #include "../common/device_helpers.cuh"
 #endif  // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 using AFTParam = xgboost::common::AFTParam;
 using ProbabilityDistributionType = xgboost::common::ProbabilityDistributionType;
 template <typename Distribution>
@@ -103,7 +109,6 @@ class ElementWiseSurvivalMetricsReduction {
 
     dh::XGBCachingDeviceAllocator<char> alloc;
 
-#if defined(XGBOOST_USE_CUDA)
     PackedReduceResult result = thrust::transform_reduce(
       thrust::cuda::par(alloc),
       begin, end,
@@ -118,22 +123,6 @@ class ElementWiseSurvivalMetricsReduction {
       },
       PackedReduceResult(),
       thrust::plus<PackedReduceResult>());
-#elif defined(XGBOOST_USE_HIP)
-    PackedReduceResult result = thrust::transform_reduce(
-      thrust::hip::par(alloc),
-      begin, end,
-      [=] XGBOOST_DEVICE(size_t idx) {
-        double weight = is_null_weight ? 1.0 : static_cast<double>(s_weights[idx]);
-        double residue = d_policy.EvalRow(
-            static_cast<double>(s_label_lower_bound[idx]),
-            static_cast<double>(s_label_upper_bound[idx]),
-            static_cast<double>(s_preds[idx]));
-        residue *= weight;
-        return PackedReduceResult{residue, weight};
-      },
-      PackedReduceResult(),
-      thrust::plus<PackedReduceResult>());
-#endif
 
     return result;
   }
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index 4a75903b7..89506a86b 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -29,6 +29,12 @@
 #include "xgboost/tree_model.h"
 #include "xgboost/tree_updater.h"
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost::predictor {
 DMLC_REGISTRY_FILE_TAG(gpu_predictor);
 
@@ -512,7 +518,6 @@ void ExtractPaths(
         return PathInfo{static_cast<int64_t>(idx), path_length, tree_idx};
       });
 
-#if defined(XGBOOST_USE_CUDA)
   auto end = thrust::copy_if(
       thrust::cuda::par(alloc), nodes_transform,
       nodes_transform + d_nodes.size(), info.begin(),
@@ -525,20 +530,6 @@ void ExtractPaths(
   thrust::exclusive_scan(thrust::cuda::par(alloc), length_iterator,
                          length_iterator + info.size() + 1,
                          path_segments.begin());
-#elif defined(XGBOOST_USE_HIP)
-  auto end = thrust::copy_if(
-      thrust::hip::par(alloc), nodes_transform,
-      nodes_transform + d_nodes.size(), info.begin(),
-      [=] __device__(const PathInfo& e) { return e.leaf_position != -1; });
-  info.resize(end - info.begin());
-  auto length_iterator = dh::MakeTransformIterator<size_t>(
-      info.begin(),
-      [=] __device__(const PathInfo& info) { return info.length; });
-  dh::caching_device_vector<size_t> path_segments(info.size() + 1);
-  thrust::exclusive_scan(thrust::hip::par(alloc), length_iterator,
-                         length_iterator + info.size() + 1,
-                         path_segments.begin());
-#endif
 
   paths->resize(path_segments.back());
 
diff --git a/src/tree/fit_stump.cu b/src/tree/fit_stump.cu
index 8bbb62a29..2b0a248ce 100644
--- a/src/tree/fit_stump.cu
+++ b/src/tree/fit_stump.cu
@@ -21,6 +21,12 @@
 #include "xgboost/logging.h"  // CHECK_EQ
 #include "xgboost/span.h"     // span
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost::tree::cuda_impl {
 void FitStump(Context const* ctx, MetaInfo const& info,
               linalg::TensorView<GradientPair const, 2> gpair, linalg::VectorView<float> out) {
@@ -45,11 +51,7 @@ void FitStump(Context const* ctx, MetaInfo const& info,
 
   dh::XGBCachingDeviceAllocator<char> alloc;
 
-#if defined(XGBOOST_USE_CUDA)
   auto policy = thrust::cuda::par(alloc);
-#elif defined(XGBOOST_USE_HIP)
-  auto policy = thrust::hip::par(alloc);
-#endif
 
   thrust::reduce_by_key(policy, key_it, key_it + gpair.Size(), grad_it,
                         thrust::make_discard_iterator(), dh::tbegin(d_sum.Values()));
diff --git a/src/tree/gpu_hist/evaluator.cu b/src/tree/gpu_hist/evaluator.cu
index e4ca29c97..5d00640a4 100644
--- a/src/tree/gpu_hist/evaluator.cu
+++ b/src/tree/gpu_hist/evaluator.cu
@@ -12,6 +12,12 @@
 #include "evaluate_splits.cuh"
 #include "xgboost/data.h"
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost::tree {
 void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<FeatureType const> ft,
                              bst_feature_t n_features, TrainParam const &param,
@@ -28,7 +34,6 @@ void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<Fea
     // This condition avoids sort-based split function calls if the users want
     // onehot-encoding-based splits.
     // For some reason, any_of adds 1.5 minutes to compilation time for CUDA 11.x.
-#if defined(XGBOOST_USE_CUDA)
     need_sort_histogram_ =
         thrust::any_of(thrust::cuda::par(alloc), beg, end, [=] XGBOOST_DEVICE(size_t i) {
           auto idx = i - 1;
@@ -39,18 +44,6 @@ void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<Fea
           }
           return false;
         });
-#elif defined(XGBOOST_USE_HIP)
-    need_sort_histogram_ =
-        thrust::any_of(thrust::hip::par(alloc), beg, end, [=] XGBOOST_DEVICE(size_t i) {
-          auto idx = i - 1;
-          if (common::IsCat(ft, idx)) {
-            auto n_bins = ptrs[i] - ptrs[idx];
-            bool use_sort = !common::UseOneHot(n_bins, to_onehot);
-            return use_sort;
-          }
-          return false;
-        });
-#endif
 
     node_categorical_storage_size_ =
         common::CatBitField::ComputeStorageSize(cuts.MaxCategory() + 1);
@@ -72,19 +65,11 @@ void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<Fea
     auto it = thrust::make_counting_iterator(0ul);
     auto values = cuts.cut_values_.ConstDeviceSpan();
 
-#if defined(XGBOOST_USE_CUDA)
     thrust::transform(thrust::cuda::par(alloc), it, it + feature_idx_.size(), feature_idx_.begin(),
                       [=] XGBOOST_DEVICE(size_t i) {
                         auto fidx = dh::SegmentId(ptrs, i);
                         return fidx;
                       });
-#elif defined(XGBOOST_USE_HIP)
-    thrust::transform(thrust::hip::par(alloc), it, it + feature_idx_.size(), feature_idx_.begin(),
-                      [=] XGBOOST_DEVICE(size_t i) {
-                        auto fidx = dh::SegmentId(ptrs, i);
-                        return fidx;
-                      });
-#endif
   }
   is_column_split_ = is_column_split;
   device_ = device;
@@ -101,7 +86,6 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
   auto d_feature_idx = dh::ToSpan(feature_idx_);
   auto total_bins = shared_inputs.feature_values.size();
 
-#if defined(XGBOOST_USE_CUDA)
   thrust::transform(thrust::cuda::par(alloc), it, it + data.size(), dh::tbegin(data),
                     [=] XGBOOST_DEVICE(uint32_t i) {
                       auto const &input = d_inputs[i / total_bins];
@@ -115,27 +99,11 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
                       }
                       return thrust::make_tuple(i, 0.0f);
                     });
-#elif defined(XGBOOST_USE_HIP)
-  thrust::transform(thrust::hip::par(alloc), it, it + data.size(), dh::tbegin(data),
-                    [=] XGBOOST_DEVICE(uint32_t i) {
-                      auto const &input = d_inputs[i / total_bins];
-                      auto j = i % total_bins;
-                      auto fidx = d_feature_idx[j];
-                      if (common::IsCat(shared_inputs.feature_types, fidx)) {
-                        auto grad =
-                            shared_inputs.rounding.ToFloatingPoint(input.gradient_histogram[j]);
-                        auto lw = evaluator.CalcWeightCat(shared_inputs.param, grad);
-                        return thrust::make_tuple(i, lw);
-                      }
-                      return thrust::make_tuple(i, 0.0f);
-                    });
-#endif
 
   // Sort an array segmented according to
   // - nodes
   // - features within each node
   // - gradients within each feature
-#if defined(XGBOOST_USE_CUDA)
   thrust::stable_sort_by_key(thrust::cuda::par(alloc), dh::tbegin(data), dh::tend(data),
                              dh::tbegin(sorted_idx),
                              [=] XGBOOST_DEVICE(SortPair const &l, SortPair const &r) {
@@ -166,38 +134,6 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
                                }
                                return li < ri;
                              });
-#elif defined(XGBOOST_USE_HIP)
-  thrust::stable_sort_by_key(thrust::hip::par(alloc), dh::tbegin(data), dh::tend(data),
-                             dh::tbegin(sorted_idx),
-                             [=] XGBOOST_DEVICE(SortPair const &l, SortPair const &r) {
-                               auto li = thrust::get<0>(l);
-                               auto ri = thrust::get<0>(r);
-
-                               auto l_node = li / total_bins;
-                               auto r_node = ri / total_bins;
-
-                               if (l_node != r_node) {
-                                 return l_node < r_node;  // not the same node
-                               }
-
-                               li = li % total_bins;
-                               ri = ri % total_bins;
-
-                               auto lfidx = d_feature_idx[li];
-                               auto rfidx = d_feature_idx[ri];
-
-                               if (lfidx != rfidx) {
-                                 return lfidx < rfidx;  // not the same feature
-                               }
-
-                               if (common::IsCat(shared_inputs.feature_types, lfidx)) {
-                                 auto lw = thrust::get<1>(l);
-                                 auto rw = thrust::get<1>(r);
-                                 return lw < rw;
-                               }
-                               return li < ri;
-                             });
-#endif
   return dh::ToSpan(cat_sorted_idx_);
 }
 }  // namespace xgboost::tree
diff --git a/src/tree/gpu_hist/histogram.cu b/src/tree/gpu_hist/histogram.cu
index e52977065..64e665afc 100644
--- a/src/tree/gpu_hist/histogram.cu
+++ b/src/tree/gpu_hist/histogram.cu
@@ -16,6 +16,12 @@
 #include "row_partitioner.cuh"
 #include "xgboost/base.h"
 
+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 namespace tree {
 namespace {
@@ -60,13 +66,8 @@ GradientQuantiser::GradientQuantiser(common::Span<GradientPair const> gpair, Met
 
   thrust::device_ptr<GradientPair const> gpair_beg{gpair.data()};
   auto beg = thrust::make_transform_iterator(gpair_beg, Clip());
-#if defined(XGBOOST_USE_CUDA)
   Pair p =
       dh::Reduce(thrust::cuda::par(alloc), beg, beg + gpair.size(), Pair{}, thrust::plus<Pair>{});
-#elif defined(XGBOOST_USE_HIP)
-  Pair p =
-      dh::Reduce(thrust::hip::par(alloc), beg, beg + gpair.size(), Pair{}, thrust::plus<Pair>{});
-#endif
 
   // Treat pair as array of 4 primitive types to allreduce
   using ReduceT = typename decltype(p.first)::ValueT;