diff --git a/src/common/algorithm.hip.h b/src/common/algorithm.hip.h
deleted file mode 100644
index e69de29bb..000000000
diff --git a/src/common/bitfield.h b/src/common/bitfield.h
index 6bb5f3404..0c726f70f 100644
--- a/src/common/bitfield.h
+++ b/src/common/bitfield.h
@@ -13,18 +13,18 @@
 #include <string>
 #include <vector>
 
-#if defined(__CUDACC__)
+#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
 #include "device_helpers.cuh"
-#endif  // defined(__CUDACC__)
+#endif  // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 
 #include "xgboost/span.h"
 #include "common.h"
 
 namespace xgboost {
 
-#if defined(__CUDACC__)
+#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 using BitFieldAtomicType = unsigned long long;  // NOLINT
 
 __forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
@@ -48,7 +48,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
 
   return old;
 }
-#endif  // defined(__CUDACC__)
+#endif  // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 
 /*!
  * \brief A non-owning type with auxiliary methods defined for manipulating bits.
@@ -100,7 +100,7 @@ struct BitFieldContainer {
   XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
     return common::DivRoundUp(size, kValueSize);
   }
-#if defined(__CUDA_ARCH__)
+#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
   __device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
     auto tid = blockIdx.x * blockDim.x + threadIdx.x;
     size_t min_size = min(bits_.size(), rhs.bits_.size());
@@ -117,9 +117,9 @@ struct BitFieldContainer {
     }
     return *this;
   }
-#endif  // #if defined(__CUDA_ARCH__)
+#endif  // #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
 
-#if defined(__CUDA_ARCH__)
+#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
   __device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
     size_t min_size = min(bits_.size(), rhs.bits_.size());
     auto tid = blockIdx.x * blockDim.x + threadIdx.x;
@@ -138,7 +138,7 @@ struct BitFieldContainer {
   }
 #endif  // defined(__CUDA_ARCH__)
 
-#if defined(__CUDA_ARCH__)
+#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
   __device__ auto Set(index_type pos) {
     Pos pos_v = Direction::Shift(ToBitPos(pos));
     value_type& value = bits_[pos_v.int_pos];
@@ -166,7 +166,7 @@ struct BitFieldContainer {
     value_type clear_bit = ~(kOne << pos_v.bit_pos);
     value &= clear_bit;
   }
-#endif  // defined(__CUDA_ARCH__)
+#endif  // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
 
   XGBOOST_DEVICE bool Check(Pos pos_v) const {
     pos_v = Direction::Shift(pos_v);
diff --git a/src/common/common.h b/src/common/common.h
index 35c807bef..6ea342232 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -27,6 +27,12 @@
 
 #define WITH_CUDA() true
 
+#elif defined(__HIP_PLATFORM_AMD__)
+#include <thrust/system/hip/error.h>
+#include <thrust/system_error.h>
+
+#define WITH_CUDA() true
+
 #else
 
 #define WITH_CUDA() false
@@ -34,7 +40,7 @@
 #endif  // defined(__CUDACC__)
 
 namespace dh {
-#if defined(__CUDACC__)
+#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 /*
  * Error handling  functions
  */
@@ -49,7 +55,7 @@ inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
   }
   return code;
 }
-#endif  // defined(__CUDACC__)
+#endif  // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 }  // namespace dh
 
 namespace xgboost {
@@ -167,7 +173,7 @@ class Range {
 int AllVisibleGPUs();
 
 inline void AssertGPUSupport() {
-#ifndef XGBOOST_USE_CUDA
+#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
     LOG(FATAL) << "XGBoost version not compiled with GPU support.";
 #endif  // XGBOOST_USE_CUDA
 }
@@ -180,7 +186,7 @@ inline void AssertOneAPISupport() {
 
 void SetDevice(std::int32_t device);
 
-#if !defined(XGBOOST_USE_CUDA)
+#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
 inline void SetDevice(std::int32_t device) {
   if (device >= 0) {
     AssertGPUSupport();
diff --git a/src/common/compressed_iterator.h b/src/common/compressed_iterator.h
index 5a5b5f252..9e7b7b22a 100644
--- a/src/common/compressed_iterator.h
+++ b/src/common/compressed_iterator.h
@@ -11,9 +11,9 @@
 
 #include "common.h"
 
-#ifdef __CUDACC__
+#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 #include "device_helpers.cuh"
-#endif  // __CUDACC__
+#endif  // __CUDACC__ || __HIP_PLATFORM_AMD__
 
 namespace xgboost {
 namespace common {
@@ -105,7 +105,7 @@ class CompressedBufferWriter {
     }
   }
 
-#ifdef __CUDACC__
+#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
   __device__ void AtomicWriteSymbol
     (CompressedByteT* buffer, uint64_t symbol, size_t offset) {
     size_t ibit_start = offset * symbol_bits_;
@@ -119,7 +119,7 @@ class CompressedBufferWriter {
       symbol >>= 8;
     }
   }
-#endif  // __CUDACC__
+#endif  // __CUDACC__ || __HIP_PLATFORM_AMD__
 
   template <typename IterT>
   void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
diff --git a/src/common/cuda_context.hip.h b/src/common/cuda_context.hip.h
deleted file mode 100644
index e69de29bb..000000000
diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
index 58300d06c..3fb18f493 100644
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -53,7 +53,7 @@
 
 #endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 
-#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
 
 #else  // In device code and CUDA < 600
 __device__ __forceinline__ double atomicAdd(double* address, double val) {  // NOLINT
@@ -702,6 +702,8 @@ typename std::iterator_traits<T>::value_type SumReduction(T in, int nVals) {
 constexpr std::pair<int, int> CUDAVersion() {
 #if defined(__CUDACC_VER_MAJOR__)
   return std::make_pair(__CUDACC_VER_MAJOR__, __CUDACC_VER_MINOR__);
+#elif defined(__HIP_PLATFORM_AMD__)
+  return std::make_pair(HIP_LIBRARY_MAJOR_VERSION, HIP_VERSION_MINOR);
 #else
   // clang/clang-tidy
   return std::make_pair((CUDA_VERSION) / 1000, (CUDA_VERSION) % 100 / 10);
@@ -1329,6 +1331,9 @@ class CUDAStreamView {
     // CUDA > 11.0
     dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
 #endif  // __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0:
+
+#elif defined(__HIP_PLATFORM_AMD__)
+    dh::safe_cuda(hipStreamWaitEvent(stream_, hipEvent_t{e}, hipEventWaitDefault));
 #else   // clang
     dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
 #endif  //  defined(__CUDACC_VER_MAJOR__)
diff --git a/src/common/math.h b/src/common/math.h
index 71a494544..9c9ee604d 100644
--- a/src/common/math.h
+++ b/src/common/math.h
@@ -148,32 +148,32 @@ CheckNAN(T) {
   return false;
 }
 
-#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
+#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIP_PLATFORM_AMD__)
 
 bool CheckNAN(double v);
 
 #else
 
 XGBOOST_DEVICE bool inline CheckNAN(float x) {
-#if defined(__CUDA_ARCH__)
+#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
   return isnan(x);
 #else
   return std::isnan(x);
-#endif  // defined(__CUDA_ARCH__)
+#endif  // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
 }
 
 XGBOOST_DEVICE bool inline CheckNAN(double x) {
-#if defined(__CUDA_ARCH__)
+#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
   return isnan(x);
 #else
   return std::isnan(x);
-#endif  // defined(__CUDA_ARCH__)
+#endif  // defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
 }
 
 #endif  // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
 // GPU version is not uploaded in CRAN anyway.
 // Specialize only when using R with CPU.
-#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
+#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
 double LogGamma(double v);
 
 #else  // Not R or R with GPU.
@@ -196,7 +196,7 @@ XGBOOST_DEVICE inline T LogGamma(T v) {
 #endif  // _MSC_VER
 }
 
-#endif  // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
+#endif  // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
 
 }  // namespace common
 }  // namespace xgboost
diff --git a/src/common/stats.h b/src/common/stats.h
index 2f42a698e..a72545896 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -112,7 +112,7 @@ void Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWe
 
 void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
 
-#if !defined(XGBOOST_USE_CUDA)
+#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
 inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
                    linalg::Tensor<float, 1>*) {
   common::AssertGPUSupport();
@@ -120,7 +120,7 @@ inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalW
 inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
   common::AssertGPUSupport();
 }
-#endif  // !defined(XGBOOST_USE_CUDA)
+#endif  // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
 }  // namespace cuda_impl
 
 /**
diff --git a/src/common/threading_utils.hip.h b/src/common/threading_utils.hip.h
deleted file mode 100644
index e69de29bb..000000000
diff --git a/src/common/transform.h b/src/common/transform.h
index a7b96766c..5f9c3f1bf 100644
--- a/src/common/transform.h
+++ b/src/common/transform.h
@@ -17,9 +17,9 @@
 #include "xgboost/host_device_vector.h"
 #include "xgboost/span.h"
 
-#if defined (__CUDACC__)
+#if defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 #include "device_helpers.cuh"
-#endif  // defined (__CUDACC__)
+#endif  // defined (__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 
 namespace xgboost {
 namespace common {
@@ -28,7 +28,7 @@ constexpr size_t kBlockThreads = 256;
 
 namespace detail {
 
-#if defined(__CUDACC__)
+#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 template <typename Functor, typename... SpanType>
 __global__ void LaunchCUDAKernel(Functor _func, Range _range,
                                  SpanType... _spans) {
@@ -36,7 +36,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range,
     _func(i, _spans...);
   }
 }
-#endif  // defined(__CUDACC__)
+#endif  // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 
 }  // namespace detail
 
@@ -127,7 +127,7 @@ class Transform {
       UnpackShard(device, _vectors...);
     }
 
-#if defined(__CUDACC__)
+#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
     template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
               typename... HDV>
     void LaunchCUDA(Functor _func, HDV*... _vectors) const {
@@ -159,7 +159,7 @@ class Transform {
 
       LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
     }
-#endif  // defined(__CUDACC__)
+#endif  // defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
 
     template <typename... HDV>
     void LaunchCPU(Functor func, HDV *...vectors) const {