For histograms, opting into maximum shared memory available per block. (#5491)

This commit is contained in:
Andy Adinets
2020-04-21 04:56:42 +02:00
committed by GitHub
parent 9c1103e06c
commit 73142041b9
6 changed files with 54 additions and 31 deletions

View File

@@ -100,7 +100,7 @@ inline size_t TotalMemory(int device_idx) {
}
/**
* \fn inline int max_shared_memory(int device_idx)
* \fn inline int MaxSharedMemory(int device_idx)
*
* \brief Maximum shared memory per block on this device.
*
@@ -113,6 +113,23 @@ inline size_t MaxSharedMemory(int device_idx) {
return prop.sharedMemPerBlock;
}
/**
* \fn inline int MaxSharedMemoryOptin(int device_idx)
*
* \brief Maximum dynamic shared memory per thread block on this device
that can be opted into when using cudaFuncSetAttribute().
*
* \param device_idx Zero-based index of the device.
*/
inline size_t MaxSharedMemoryOptin(int device_idx) {
int max_shared_memory = 0;
dh::safe_cuda(cudaDeviceGetAttribute
(&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlockOptin,
device_idx));
return size_t(max_shared_memory);
}
inline void CheckComputeCapability() {
for (int d_idx = 0; d_idx < xgboost::common::AllVisibleGPUs(); ++d_idx) {
cudaDeviceProp prop;