From 65d83e288f94765f6638d92ae4c82f0e1abbfe09 Mon Sep 17 00:00:00 2001 From: amdsc21 <96135754+amdsc21@users.noreply.github.com> Date: Wed, 19 Apr 2023 19:53:26 +0200 Subject: [PATCH] fix device query --- src/tree/gpu_hist/histogram.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tree/gpu_hist/histogram.cu b/src/tree/gpu_hist/histogram.cu index 426343901..c6f6b79b2 100644 --- a/src/tree/gpu_hist/histogram.cu +++ b/src/tree/gpu_hist/histogram.cu @@ -306,12 +306,14 @@ void BuildGradientHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const& dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device)); int n_blocks_per_mp = 0; dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel, + kBlockThreads, smem_size)); #elif defined(XGBOOST_USE_HIP) dh::safe_cuda(hipDeviceGetAttribute(&n_mps, hipDeviceAttributeMultiprocessorCount, device)); int n_blocks_per_mp = 0; dh::safe_cuda(hipOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel, -#endif kBlockThreads, smem_size)); +#endif + // This gives the number of blocks to keep the device occupied // Use this as the maximum number of blocks unsigned grid_size = n_blocks_per_mp * n_mps;