Fall back to CUB allocator if RMM memory pool is not set up (#6150)

* Fall back to CUB allocator if RMM memory pool is not set up

* Fix build

* Prevent memory leak

* Add note about lack of memory initialisation

* Add check for other fast allocators

* Set use_cub_allocator_ to true when RMM is not enabled

* Fix clang-tidy

* Do not demangle symbol; add check to ensure Linux+Clang/GCC combo
This commit is contained in:
Philip Hyunsu Cho 2020-09-24 11:04:50 -07:00 committed by GitHub
parent 5b05f88ba9
commit 72ef553550
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 33 deletions

View File

@ -90,6 +90,12 @@ endif (USE_AVX)
if (PLUGIN_RMM AND NOT (USE_CUDA)) if (PLUGIN_RMM AND NOT (USE_CUDA))
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.") message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
endif (PLUGIN_RMM AND NOT (USE_CUDA)) endif (PLUGIN_RMM AND NOT (USE_CUDA))
if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
if (PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
message(SEND_ERROR "`PLUGIN_RMM` must be used with Linux.")
endif (PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
if (ENABLE_ALL_WARNINGS) if (ENABLE_ALL_WARNINGS)
if ((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")) if ((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
message(SEND_ERROR "ENABLE_ALL_WARNINGS is only available for Clang and GCC.") message(SEND_ERROR "ENABLE_ALL_WARNINGS is only available for Clang and GCC.")

View File

@ -402,7 +402,7 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
} }
void deallocate(pointer ptr, size_t n) { // NOLINT void deallocate(pointer ptr, size_t n) { // NOLINT
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T)); GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
return SuperT::deallocate(ptr, n); SuperT::deallocate(ptr, n);
} }
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 #if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
XGBDefaultDeviceAllocatorImpl() XGBDefaultDeviceAllocatorImpl()
@ -410,49 +410,59 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 #endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
}; };
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
template <typename T>
using XGBCachingDeviceAllocatorImpl = XGBDefaultDeviceAllocatorImpl<T>;
#else
/** /**
* \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs * \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end, unless
* allocations if verbose. Does not initialise memory on construction. * RMM pool allocator is enabled. Does not initialise memory on construction.
*/ */
template <class T> template <class T>
struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> { struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
using SuperT = XGBBaseDeviceAllocator<T>;
using pointer = thrust::device_ptr<T>; // NOLINT using pointer = thrust::device_ptr<T>; // NOLINT
template<typename U> template<typename U>
struct rebind // NOLINT struct rebind // NOLINT
{ {
using other = XGBCachingDeviceAllocatorImpl<U>; // NOLINT using other = XGBCachingDeviceAllocatorImpl<U>; // NOLINT
}; };
cub::CachingDeviceAllocator& GetGlobalCachingAllocator () cub::CachingDeviceAllocator& GetGlobalCachingAllocator() {
{ // Configure allocator with maximum cached bin size of ~1GB and no limit on
// Configure allocator with maximum cached bin size of ~1GB and no limit on // maximum cached bytes
// maximum cached bytes static cub::CachingDeviceAllocator *allocator = new cub::CachingDeviceAllocator(2, 9, 29);
static cub::CachingDeviceAllocator *allocator = new cub::CachingDeviceAllocator(2, 9, 29); return *allocator;
return *allocator; }
} pointer allocate(size_t n) { // NOLINT
pointer allocate(size_t n) { // NOLINT pointer ptr;
T *ptr; if (use_cub_allocator_) {
GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&ptr), T* raw_ptr{nullptr};
n * sizeof(T)); GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void**>(&raw_ptr), n * sizeof(T));
pointer thrust_ptr(ptr); ptr = pointer(raw_ptr);
GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T)); } else {
return thrust_ptr; ptr = SuperT::allocate(n);
} }
void deallocate(pointer ptr, size_t n) { // NOLINT GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T));
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T)); return ptr;
GetGlobalCachingAllocator().DeviceFree(ptr.get()); }
} void deallocate(pointer ptr, size_t n) { // NOLINT
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
__host__ __device__ if (use_cub_allocator_) {
void construct(T *) // NOLINT GetGlobalCachingAllocator().DeviceFree(ptr.get());
{ } else {
// no-op SuperT::deallocate(ptr, n);
}
}
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
XGBCachingDeviceAllocatorImpl()
: SuperT(rmm::mr::get_current_device_resource(), cudaStream_t{nullptr}) {
std::string symbol{typeid(*SuperT::resource()).name()};
if (symbol.find("pool_memory_resource") != std::string::npos
|| symbol.find("binning_memory_resource") != std::string::npos
|| symbol.find("arena_memory_resource") != std::string::npos) {
use_cub_allocator_ = false;
}
} }
};
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 #endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
private:
bool use_cub_allocator_{true};
};
} // namespace detail } // namespace detail
// Declare xgboost allocators // Declare xgboost allocators