From c875f0425ffd9533cfa4e6c8a72815a90ebcfa7a Mon Sep 17 00:00:00 2001
From: amdsc21 <96135754+amdsc21@users.noreply.github.com>
Date: Thu, 9 Mar 2023 20:48:31 +0100
Subject: [PATCH] finished rank_metric.cu

---
 src/metric/rank_metric.cu  | 50 ++++++++++++++++++++++++++++++++++++++
 src/metric/rank_metric.hip |  5 ++++
 2 files changed, 55 insertions(+)
diff --git a/src/metric/rank_metric.cu b/src/metric/rank_metric.cu
index 5f98db7a9..b19571559 100644
--- a/src/metric/rank_metric.cu
+++ b/src/metric/rank_metric.cu
@@ -34,7 +34,12 @@ struct EvalRankGpu : public GPUMetric, public EvalRankConfig {
     const auto ngroups = static_cast<bst_omp_uint>(gptr.size() - 1);
 
     auto device = ctx_->gpu_id;
+
+#if defined(XGBOOST_USE_CUDA)
     dh::safe_cuda(cudaSetDevice(device));
+#elif defined(XGBOOST_USE_HIP)
+    dh::safe_cuda(hipSetDevice(device));
+#endif
 
     info.labels.SetDevice(device);
     preds.SetDevice(device);
@@ -99,7 +104,13 @@ struct EvalPrecisionGpu {
     auto *dhits = hits.data().get();
 
     int device_id = -1;
+
+#if defined(XGBOOST_USE_CUDA)
     dh::safe_cuda(cudaGetDevice(&device_id));
+#elif defined(XGBOOST_USE_HIP)
+    dh::safe_cuda(hipGetDevice(&device_id));
+#endif
+
     // For each group item compute the aggregated precision
     dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) {
       const auto group_idx = dgroup_idx[idx];
@@ -112,8 +123,14 @@ struct EvalPrecisionGpu {
 
     // Allocator to be used for managing space overhead while performing reductions
     dh::XGBCachingDeviceAllocator<char> alloc;
+
+#if defined(XGBOOST_USE_CUDA)
     return static_cast<double>(thrust::reduce(thrust::cuda::par(alloc),
                                               hits.begin(), hits.end())) / ecfg.topn;
+#elif defined(XGBOOST_USE_HIP)
+    return static_cast<double>(thrust::reduce(thrust::hip::par(alloc),
+                                              hits.begin(), hits.end())) / ecfg.topn;
+#endif
   }
 };
 
@@ -142,7 +159,12 @@ struct EvalNDCGGpu {
     auto *ddcgs = dcgs.data().get();
 
     int device_id = -1;
+
+#if defined(XGBOOST_USE_CUDA)
     dh::safe_cuda(cudaGetDevice(&device_id));
+#elif defined(XGBOOST_USE_HIP)
+    dh::safe_cuda(hipGetDevice(&device_id));
+#endif
 
     // For each group item compute the aggregated precision
     dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) {
@@ -177,7 +199,13 @@ struct EvalNDCGGpu {
     double *didcg = idcg.data().get();
 
     int device_id = -1;
+
+#if defined(XGBOOST_USE_CUDA)
     dh::safe_cuda(cudaGetDevice(&device_id));
+#elif defined(XGBOOST_USE_HIP)
+    dh::safe_cuda(hipGetDevice(&device_id));
+#endif
+
     // Compute the group's DCG and reduce it across all groups
     dh::LaunchN(ngroups, nullptr, [=] __device__(uint32_t gidx) {
       if (didcg[gidx] == 0.0f) {
@@ -189,7 +217,12 @@ struct EvalNDCGGpu {
 
     // Allocator to be used for managing space overhead while performing reductions
     dh::XGBCachingDeviceAllocator<char> alloc;
+
+#if defined(XGBOOST_USE_CUDA)
     return thrust::reduce(thrust::cuda::par(alloc), dcg.begin(), dcg.end());
+#elif defined(XGBOOST_USE_HIP)
+    return thrust::reduce(thrust::hip::par(alloc), dcg.begin(), dcg.end());
+#endif
   }
 };
 
@@ -225,10 +258,17 @@ struct EvalMAPGpu {
     // Next, prefix scan the nontrivial labels that are segmented to accumulate them.
     // This is required for computing the metric sum
     // Data segmented into different groups...
+#if defined(XGBOOST_USE_CUDA)
     thrust::inclusive_scan_by_key(thrust::cuda::par(alloc),
                                   dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx),
                                   hits.begin(),  // Input value
                                   hits.begin());  // In-place scan
+#elif defined(XGBOOST_USE_HIP)
+    thrust::inclusive_scan_by_key(thrust::hip::par(alloc),
+                                  dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx),
+                                  hits.begin(),  // Input value
+                                  hits.begin());  // In-place scan
+#endif
 
     // Find each group's metric sum
     dh::caching_device_vector<double> sumap(ngroups, 0);
@@ -236,7 +276,13 @@ struct EvalMAPGpu {
     const auto *dhits = hits.data().get();
 
     int device_id = -1;
+
+#if defined(XGBOOST_USE_CUDA)
     dh::safe_cuda(cudaGetDevice(&device_id));
+#elif defined(XGBOOST_USE_HIP)
+    dh::safe_cuda(hipGetDevice(&device_id));
+#endif
+
     // For each group item compute the aggregated precision
     dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) {
       if (DetermineNonTrivialLabelLambda(idx)) {
@@ -264,7 +310,11 @@ struct EvalMAPGpu {
       }
     });
 
+#if defined(XGBOOST_USE_CUDA)
     return thrust::reduce(thrust::cuda::par(alloc), sumap.begin(), sumap.end());
+#elif defined(XGBOOST_USE_HIP)
+    return thrust::reduce(thrust::hip::par(alloc), sumap.begin(), sumap.end());
+#endif
   }
 };
 
diff --git a/src/metric/rank_metric.hip b/src/metric/rank_metric.hip
index e69de29bb..a8ed8b267 100644
--- a/src/metric/rank_metric.hip
+++ b/src/metric/rank_metric.hip
@@ -0,0 +1,5 @@
+
+
+#if defined(XGBOOST_USE_HIP)
+#include "rank_metric.cu"
+#endif