From bb2feab0b2bd1c0676729336648c5453d2618833 Mon Sep 17 00:00:00 2001 From: Hendrik Groove Date: Mon, 21 Oct 2024 01:55:41 +0200 Subject: [PATCH] try --- src/tree/gpu_hist/evaluate_splits.cu | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/tree/gpu_hist/evaluate_splits.cu b/src/tree/gpu_hist/evaluate_splits.cu index a72357ec9..07377a1d5 100644 --- a/src/tree/gpu_hist/evaluate_splits.cu +++ b/src/tree/gpu_hist/evaluate_splits.cu @@ -11,9 +11,9 @@ #include "evaluate_splits.cuh" #include "expand_entry.cuh" -#if defined(XGBOOST_USE_CUDA) +#if defined(XGBOOST_USE_HIP) #define WARP_SIZE 32 -#elif defined(XGBOOST_USE_HIP) +#elif defined(XGBOOST_USE_HIP2) #include #ifdef __AMDGCN_WAVEFRONT_SIZE @@ -110,10 +110,10 @@ class EvaluateSplitAgent { } local_sum = SumReduceT(temp_storage->sum_reduce).Sum(local_sum); // NOLINT // Broadcast result from thread 0 -#if defined(XGBOOST_USE_CUDA) +#if defined(XGBOOST_USE_HIP) return {__shfl_sync(0xffffffff, local_sum.GetQuantisedGrad(), 0), __shfl_sync(0xffffffff, local_sum.GetQuantisedHess(), 0)}; -#elif defined(XGBOOST_USE_HIP) +#elif defined(XGBOOST_USE_HIP2) return {__shfl(local_sum.GetQuantisedGrad(), 0), __shfl(local_sum.GetQuantisedHess(), 0)}; #endif @@ -144,9 +144,9 @@ class EvaluateSplitAgent { // This reduce result is only valid in thread 0 // broadcast to the rest of the warp -#if defined(XGBOOST_USE_CUDA) +#if defined(XGBOOST_USE_HIP) auto best_thread = __shfl_sync(0xffffffff, best.key, 0); -#elif defined(XGBOOST_USE_HIP) +#elif defined(XGBOOST_USE_HIP2) auto best_thread = __shfl(best.key, 0); #endif @@ -181,9 +181,9 @@ class EvaluateSplitAgent { auto best = MaxReduceT(temp_storage->max_reduce).Reduce({(int)threadIdx.x, gain}, cub::ArgMax()); // This reduce result is only valid in thread 0 // broadcast to the rest of the warp -#if defined(XGBOOST_USE_CUDA) +#if defined(XGBOOST_USE_HIP) auto best_thread = __shfl_sync(0xffffffff, best.key, 0); -#elif defined(XGBOOST_USE_HIP) +#elif defined(XGBOOST_USE_HIP2) auto best_thread = __shfl(best.key, 0); #endif @@ -215,9 +215,9 @@ class EvaluateSplitAgent { auto best = MaxReduceT(temp_storage->max_reduce).Reduce({(int)threadIdx.x, gain}, cub::ArgMax()); // This reduce result is only valid in thread 0 // broadcast to the rest of the warp -#if defined(XGBOOST_USE_CUDA) +#if defined(XGBOOST_USE_HIP) auto best_thread = __shfl_sync(0xffffffff, best.key, 0); -#elif defined(XGBOOST_USE_HIP) +#elif defined(XGBOOST_USE_HIP2) auto best_thread = __shfl(best.key, 0); #endif