Fall back to CUB allocator if RMM memory pool is not set up (#6150)
* Fall back to CUB allocator if RMM memory pool is not set up * Fix build * Prevent memory leak * Add note about lack of memory initialisation * Add check for other fast allocators * Set use_cub_allocator_ to true when RMM is not enabled * Fix clang-tidy * Do not demangle symbol; add check to ensure Linux+Clang/GCC combo
This commit is contained in:
parent
5b05f88ba9
commit
72ef553550
@ -90,6 +90,12 @@ endif (USE_AVX)
|
|||||||
if (PLUGIN_RMM AND NOT (USE_CUDA))
|
if (PLUGIN_RMM AND NOT (USE_CUDA))
|
||||||
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
|
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
|
||||||
endif (PLUGIN_RMM AND NOT (USE_CUDA))
|
endif (PLUGIN_RMM AND NOT (USE_CUDA))
|
||||||
|
if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
|
||||||
|
message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
|
||||||
|
endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
|
||||||
|
if (PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
|
||||||
|
message(SEND_ERROR "`PLUGIN_RMM` must be used with Linux.")
|
||||||
|
endif (PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
|
||||||
if (ENABLE_ALL_WARNINGS)
|
if (ENABLE_ALL_WARNINGS)
|
||||||
if ((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
|
if ((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
|
||||||
message(SEND_ERROR "ENABLE_ALL_WARNINGS is only available for Clang and GCC.")
|
message(SEND_ERROR "ENABLE_ALL_WARNINGS is only available for Clang and GCC.")
|
||||||
|
|||||||
@ -402,7 +402,7 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
|
|||||||
}
|
}
|
||||||
void deallocate(pointer ptr, size_t n) { // NOLINT
|
void deallocate(pointer ptr, size_t n) { // NOLINT
|
||||||
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
|
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
|
||||||
return SuperT::deallocate(ptr, n);
|
SuperT::deallocate(ptr, n);
|
||||||
}
|
}
|
||||||
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
XGBDefaultDeviceAllocatorImpl()
|
XGBDefaultDeviceAllocatorImpl()
|
||||||
@ -410,49 +410,59 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
|
|||||||
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
|
||||||
template <typename T>
|
|
||||||
using XGBCachingDeviceAllocatorImpl = XGBDefaultDeviceAllocatorImpl<T>;
|
|
||||||
#else
|
|
||||||
/**
|
/**
|
||||||
* \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs
|
* \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end, unless
|
||||||
* allocations if verbose. Does not initialise memory on construction.
|
* RMM pool allocator is enabled. Does not initialise memory on construction.
|
||||||
*/
|
*/
|
||||||
template <class T>
|
template <class T>
|
||||||
struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
|
struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
|
||||||
|
using SuperT = XGBBaseDeviceAllocator<T>;
|
||||||
using pointer = thrust::device_ptr<T>; // NOLINT
|
using pointer = thrust::device_ptr<T>; // NOLINT
|
||||||
template<typename U>
|
template<typename U>
|
||||||
struct rebind // NOLINT
|
struct rebind // NOLINT
|
||||||
{
|
{
|
||||||
using other = XGBCachingDeviceAllocatorImpl<U>; // NOLINT
|
using other = XGBCachingDeviceAllocatorImpl<U>; // NOLINT
|
||||||
};
|
};
|
||||||
cub::CachingDeviceAllocator& GetGlobalCachingAllocator ()
|
cub::CachingDeviceAllocator& GetGlobalCachingAllocator() {
|
||||||
{
|
// Configure allocator with maximum cached bin size of ~1GB and no limit on
|
||||||
// Configure allocator with maximum cached bin size of ~1GB and no limit on
|
// maximum cached bytes
|
||||||
// maximum cached bytes
|
static cub::CachingDeviceAllocator *allocator = new cub::CachingDeviceAllocator(2, 9, 29);
|
||||||
static cub::CachingDeviceAllocator *allocator = new cub::CachingDeviceAllocator(2, 9, 29);
|
return *allocator;
|
||||||
return *allocator;
|
}
|
||||||
}
|
pointer allocate(size_t n) { // NOLINT
|
||||||
pointer allocate(size_t n) { // NOLINT
|
pointer ptr;
|
||||||
T *ptr;
|
if (use_cub_allocator_) {
|
||||||
GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&ptr),
|
T* raw_ptr{nullptr};
|
||||||
n * sizeof(T));
|
GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void**>(&raw_ptr), n * sizeof(T));
|
||||||
pointer thrust_ptr(ptr);
|
ptr = pointer(raw_ptr);
|
||||||
GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T));
|
} else {
|
||||||
return thrust_ptr;
|
ptr = SuperT::allocate(n);
|
||||||
}
|
}
|
||||||
void deallocate(pointer ptr, size_t n) { // NOLINT
|
GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T));
|
||||||
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
|
return ptr;
|
||||||
GetGlobalCachingAllocator().DeviceFree(ptr.get());
|
}
|
||||||
}
|
void deallocate(pointer ptr, size_t n) { // NOLINT
|
||||||
|
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
|
||||||
__host__ __device__
|
if (use_cub_allocator_) {
|
||||||
void construct(T *) // NOLINT
|
GetGlobalCachingAllocator().DeviceFree(ptr.get());
|
||||||
{
|
} else {
|
||||||
// no-op
|
SuperT::deallocate(ptr, n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
|
XGBCachingDeviceAllocatorImpl()
|
||||||
|
: SuperT(rmm::mr::get_current_device_resource(), cudaStream_t{nullptr}) {
|
||||||
|
std::string symbol{typeid(*SuperT::resource()).name()};
|
||||||
|
if (symbol.find("pool_memory_resource") != std::string::npos
|
||||||
|
|| symbol.find("binning_memory_resource") != std::string::npos
|
||||||
|
|| symbol.find("arena_memory_resource") != std::string::npos) {
|
||||||
|
use_cub_allocator_ = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
|
private:
|
||||||
|
bool use_cub_allocator_{true};
|
||||||
|
};
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
// Declare xgboost allocators
|
// Declare xgboost allocators
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user