From 18034a429153affd16faec8ec4c4ac3d887b7a66 Mon Sep 17 00:00:00 2001 From: amdsc21 <96135754+amdsc21@users.noreply.github.com> Date: Sun, 26 Mar 2023 01:42:51 +0100 Subject: [PATCH] tune histogram --- src/tree/gpu_hist/histogram.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/tree/gpu_hist/histogram.cu b/src/tree/gpu_hist/histogram.cu index da1179526..426343901 100644 --- a/src/tree/gpu_hist/histogram.cu +++ b/src/tree/gpu_hist/histogram.cu @@ -325,8 +325,13 @@ void BuildGradientHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const& // Allocate number of blocks such that each block has about kMinItemsPerBlock work // Up to a maximum where the device is saturated +#if defined(XGBOOST_USE_CUDA) grid_size = std::min(grid_size, static_cast( common::DivRoundUp(items_per_group, kMinItemsPerBlock))); +#elif defined(XGBOOST_USE_HIP) + grid_size = std::min(common::DivRoundUp(grid_size, num_groups), static_cast( + common::DivRoundUp(items_per_group, kMinItemsPerBlock))); +#endif dh::LaunchKernel {dim3(grid_size, num_groups), static_cast(kBlockThreads), smem_size, ctx->Stream()} (kernel, matrix, feature_groups, d_ridx, histogram.data(),