From a79a35c22c3e3eb29b756daa9758bcbc872c5160 Mon Sep 17 00:00:00 2001 From: amdsc21 <96135754+amdsc21@users.noreply.github.com> Date: Wed, 15 Mar 2023 22:00:26 +0100 Subject: [PATCH] add warp size --- src/tree/gpu_hist/evaluate_splits.cu | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/tree/gpu_hist/evaluate_splits.cu b/src/tree/gpu_hist/evaluate_splits.cu index 7f1aad967..dc7ea1513 100644 --- a/src/tree/gpu_hist/evaluate_splits.cu +++ b/src/tree/gpu_hist/evaluate_splits.cu @@ -18,6 +18,12 @@ #include "evaluate_splits.cuh" #include "expand_entry.cuh" +#if defined(XGBOOST_USE_HIP) +#define WARP_SIZE WAVEFRONT_SIZE +#elif defined(XGBOOST_USE_CUDA) +#define WARP_SIZE 32 +#endif + namespace xgboost { #if defined(XGBOOST_USE_HIP) namespace cub = hipcub; @@ -97,11 +103,7 @@ class EvaluateSplitAgent { param(shared_inputs.param), evaluator(evaluator), missing(parent_sum - ReduceFeature()) { static_assert( -#if defined(XGBOOST_USE_HIP) - kBlockSize == WAVEFRONT_SIZE, -#elif defined(XGBOOST_USE_CUDA) - kBlockSize == 32, -#endif + kBlockSize == WARP_SIZE, "This kernel relies on the assumption block_size == warp_size"); // There should be no missing value gradients for a dense matrix KERNEL_CHECK(!shared_inputs.is_dense || missing.GetQuantisedHess() == 0); @@ -393,11 +395,7 @@ void GPUHistEvaluator::LaunchEvaluateSplits( combined_num_features, DeviceSplitCandidate()); // One block for each feature -#if defined(XGBOOST_USE_HIP) - uint32_t constexpr kBlockThreads = WAVEFRONT_SIZE; -#elif defined(XGBOOST_USE_CUDA) - uint32_t constexpr kBlockThreads = 32; -#endif + uint32_t constexpr kBlockThreads = WARP_SIZE; dh::LaunchKernel {static_cast(combined_num_features), kBlockThreads, 0}( EvaluateSplitsKernel, max_active_features, d_inputs,