For histograms, opting into maximum shared memory available per block. (#5491)
This commit is contained in:
@@ -100,7 +100,7 @@ inline size_t TotalMemory(int device_idx) {
|
||||
}
|
||||
|
||||
/**
|
||||
* \fn inline int max_shared_memory(int device_idx)
|
||||
* \fn inline int MaxSharedMemory(int device_idx)
|
||||
*
|
||||
* \brief Maximum shared memory per block on this device.
|
||||
*
|
||||
@@ -113,6 +113,23 @@ inline size_t MaxSharedMemory(int device_idx) {
|
||||
return prop.sharedMemPerBlock;
|
||||
}
|
||||
|
||||
/**
|
||||
* \fn inline int MaxSharedMemoryOptin(int device_idx)
|
||||
*
|
||||
* \brief Maximum dynamic shared memory per thread block on this device
|
||||
that can be opted into when using cudaFuncSetAttribute().
|
||||
*
|
||||
* \param device_idx Zero-based index of the device.
|
||||
*/
|
||||
|
||||
inline size_t MaxSharedMemoryOptin(int device_idx) {
|
||||
int max_shared_memory = 0;
|
||||
dh::safe_cuda(cudaDeviceGetAttribute
|
||||
(&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlockOptin,
|
||||
device_idx));
|
||||
return size_t(max_shared_memory);
|
||||
}
|
||||
|
||||
inline void CheckComputeCapability() {
|
||||
for (int d_idx = 0; d_idx < xgboost::common::AllVisibleGPUs(); ++d_idx) {
|
||||
cudaDeviceProp prop;
|
||||
|
||||
Reference in New Issue
Block a user