From c875f0425ffd9533cfa4e6c8a72815a90ebcfa7a Mon Sep 17 00:00:00 2001 From: amdsc21 <96135754+amdsc21@users.noreply.github.com> Date: Thu, 9 Mar 2023 20:48:31 +0100 Subject: [PATCH] finished rank_metric.cu --- src/metric/rank_metric.cu | 50 ++++++++++++++++++++++++++++++++++++++ src/metric/rank_metric.hip | 5 ++++ 2 files changed, 55 insertions(+) diff --git a/src/metric/rank_metric.cu b/src/metric/rank_metric.cu index 5f98db7a9..b19571559 100644 --- a/src/metric/rank_metric.cu +++ b/src/metric/rank_metric.cu @@ -34,7 +34,12 @@ struct EvalRankGpu : public GPUMetric, public EvalRankConfig { const auto ngroups = static_cast(gptr.size() - 1); auto device = ctx_->gpu_id; + +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaSetDevice(device)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipSetDevice(device)); +#endif info.labels.SetDevice(device); preds.SetDevice(device); @@ -99,7 +104,13 @@ struct EvalPrecisionGpu { auto *dhits = hits.data().get(); int device_id = -1; + +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaGetDevice(&device_id)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipGetDevice(&device_id)); +#endif + // For each group item compute the aggregated precision dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) { const auto group_idx = dgroup_idx[idx]; @@ -112,8 +123,14 @@ struct EvalPrecisionGpu { // Allocator to be used for managing space overhead while performing reductions dh::XGBCachingDeviceAllocator alloc; + +#if defined(XGBOOST_USE_CUDA) return static_cast(thrust::reduce(thrust::cuda::par(alloc), hits.begin(), hits.end())) / ecfg.topn; +#elif defined(XGBOOST_USE_HIP) + return static_cast(thrust::reduce(thrust::hip::par(alloc), + hits.begin(), hits.end())) / ecfg.topn; +#endif } }; @@ -142,7 +159,12 @@ struct EvalNDCGGpu { auto *ddcgs = dcgs.data().get(); int device_id = -1; + +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaGetDevice(&device_id)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipGetDevice(&device_id)); +#endif // For each group item compute the aggregated precision dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) { @@ -177,7 +199,13 @@ struct EvalNDCGGpu { double *didcg = idcg.data().get(); int device_id = -1; + +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaGetDevice(&device_id)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipGetDevice(&device_id)); +#endif + // Compute the group's DCG and reduce it across all groups dh::LaunchN(ngroups, nullptr, [=] __device__(uint32_t gidx) { if (didcg[gidx] == 0.0f) { @@ -189,7 +217,12 @@ struct EvalNDCGGpu { // Allocator to be used for managing space overhead while performing reductions dh::XGBCachingDeviceAllocator alloc; + +#if defined(XGBOOST_USE_CUDA) return thrust::reduce(thrust::cuda::par(alloc), dcg.begin(), dcg.end()); +#elif defined(XGBOOST_USE_HIP) + return thrust::reduce(thrust::hip::par(alloc), dcg.begin(), dcg.end()); +#endif } }; @@ -225,10 +258,17 @@ struct EvalMAPGpu { // Next, prefix scan the nontrivial labels that are segmented to accumulate them. // This is required for computing the metric sum // Data segmented into different groups... +#if defined(XGBOOST_USE_CUDA) thrust::inclusive_scan_by_key(thrust::cuda::par(alloc), dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx), hits.begin(), // Input value hits.begin()); // In-place scan +#elif defined(XGBOOST_USE_HIP) + thrust::inclusive_scan_by_key(thrust::hip::par(alloc), + dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx), + hits.begin(), // Input value + hits.begin()); // In-place scan +#endif // Find each group's metric sum dh::caching_device_vector sumap(ngroups, 0); @@ -236,7 +276,13 @@ struct EvalMAPGpu { const auto *dhits = hits.data().get(); int device_id = -1; + +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaGetDevice(&device_id)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipGetDevice(&device_id)); +#endif + // For each group item compute the aggregated precision dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) { if (DetermineNonTrivialLabelLambda(idx)) { @@ -264,7 +310,11 @@ struct EvalMAPGpu { } }); +#if defined(XGBOOST_USE_CUDA) return thrust::reduce(thrust::cuda::par(alloc), sumap.begin(), sumap.end()); +#elif defined(XGBOOST_USE_HIP) + return thrust::reduce(thrust::hip::par(alloc), sumap.begin(), sumap.end()); +#endif } }; diff --git a/src/metric/rank_metric.hip b/src/metric/rank_metric.hip index e69de29bb..a8ed8b267 100644 --- a/src/metric/rank_metric.hip +++ b/src/metric/rank_metric.hip @@ -0,0 +1,5 @@ + + +#if defined(XGBOOST_USE_HIP) +#include "rank_metric.cu" +#endif