diff --git a/src/tree/gpu_hist/histogram.cu b/src/tree/gpu_hist/histogram.cu index 426343901..c6f6b79b2 100644 --- a/src/tree/gpu_hist/histogram.cu +++ b/src/tree/gpu_hist/histogram.cu @@ -306,12 +306,14 @@ void BuildGradientHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const& dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device)); int n_blocks_per_mp = 0; dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel, + kBlockThreads, smem_size)); #elif defined(XGBOOST_USE_HIP) dh::safe_cuda(hipDeviceGetAttribute(&n_mps, hipDeviceAttributeMultiprocessorCount, device)); int n_blocks_per_mp = 0; dh::safe_cuda(hipOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel, -#endif kBlockThreads, smem_size)); +#endif + // This gives the number of blocks to keep the device occupied // Use this as the maximum number of blocks unsigned grid_size = n_blocks_per_mp * n_mps;