fix device query

This commit is contained in:
amdsc21 2023-04-19 19:53:26 +02:00
parent f645cf51c1
commit 65d83e288f

View File

@ -306,12 +306,14 @@ void BuildGradientHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const&
dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device));
int n_blocks_per_mp = 0;
dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
kBlockThreads, smem_size));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipDeviceGetAttribute(&n_mps, hipDeviceAttributeMultiprocessorCount, device));
int n_blocks_per_mp = 0;
dh::safe_cuda(hipOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
#endif
kBlockThreads, smem_size));
#endif
// This gives the number of blocks to keep the device occupied
// Use this as the maximum number of blocks
unsigned grid_size = n_blocks_per_mp * n_mps;