From 364df7db0f42434490f87cc648068c27ad7c432b Mon Sep 17 00:00:00 2001 From: amdsc21 <96135754+amdsc21@users.noreply.github.com> Date: Tue, 14 Mar 2023 06:17:21 +0100 Subject: [PATCH] fix ../tree/gpu_hist/evaluate_splits.hip bugs, size 64 --- src/tree/gpu_hist/evaluate_splits.cu | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/tree/gpu_hist/evaluate_splits.cu b/src/tree/gpu_hist/evaluate_splits.cu index b898a8642..7f1aad967 100644 --- a/src/tree/gpu_hist/evaluate_splits.cu +++ b/src/tree/gpu_hist/evaluate_splits.cu @@ -11,6 +11,7 @@ #include "../../common/device_helpers.cuh" #elif defined(XGBOOST_USE_HIP) #include "../../common/device_helpers.hip.h" +#include #endif #include "../../data/ellpack_page.cuh" @@ -96,7 +97,11 @@ class EvaluateSplitAgent { param(shared_inputs.param), evaluator(evaluator), missing(parent_sum - ReduceFeature()) { static_assert( +#if defined(XGBOOST_USE_HIP) + kBlockSize == WAVEFRONT_SIZE, +#elif defined(XGBOOST_USE_CUDA) kBlockSize == 32, +#endif "This kernel relies on the assumption block_size == warp_size"); // There should be no missing value gradients for a dense matrix KERNEL_CHECK(!shared_inputs.is_dense || missing.GetQuantisedHess() == 0); @@ -388,7 +393,11 @@ void GPUHistEvaluator::LaunchEvaluateSplits( combined_num_features, DeviceSplitCandidate()); // One block for each feature +#if defined(XGBOOST_USE_HIP) + uint32_t constexpr kBlockThreads = WAVEFRONT_SIZE; +#elif defined(XGBOOST_USE_CUDA) uint32_t constexpr kBlockThreads = 32; +#endif dh::LaunchKernel {static_cast(combined_num_features), kBlockThreads, 0}( EvaluateSplitsKernel, max_active_features, d_inputs,