Add use_rmm flag to global configuration (#6656)
* Ensure RMM is 0.18 or later * Add use_rmm flag to global configuration * Modify XGBCachingDeviceAllocatorImpl to skip CUB when use_rmm=True * Update the demo * [CI] Pin NumPy to 1.19.4, since NumPy 1.19.5 doesn't work with latest Shap
This commit is contained in:
committed by
GitHub
parent
e4894111ba
commit
366f3cb9d8
@@ -32,6 +32,7 @@
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/span.h"
|
||||
#include "xgboost/global_config.h"
|
||||
|
||||
#include "common.h"
|
||||
|
||||
@@ -42,6 +43,14 @@
|
||||
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
#include "rmm/mr/device/per_device_resource.hpp"
|
||||
#include "rmm/mr/device/thrust_allocator_adaptor.hpp"
|
||||
#include "rmm/version_config.hpp"
|
||||
|
||||
#if !defined(RMM_VERSION_MAJOR) || !defined(RMM_VERSION_MINOR)
|
||||
#error "Please use RMM version 0.18 or later"
|
||||
#elif RMM_VERSION_MAJOR == 0 && RMM_VERSION_MINOR < 18
|
||||
#error "Please use RMM version 0.18 or later"
|
||||
#endif // !defined(RMM_VERSION_MAJOR) || !defined(RMM_VERSION_MINOR)
|
||||
|
||||
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
|
||||
@@ -453,21 +462,42 @@ struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
|
||||
return *allocator;
|
||||
}
|
||||
pointer allocate(size_t n) { // NOLINT
|
||||
T* ptr;
|
||||
auto errc = GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&ptr),
|
||||
n * sizeof(T));
|
||||
if (errc != cudaSuccess) {
|
||||
ThrowOOMError("Caching allocator", n * sizeof(T));
|
||||
pointer thrust_ptr;
|
||||
if (use_cub_allocator_) {
|
||||
T* raw_ptr{nullptr};
|
||||
auto errc = GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&raw_ptr),
|
||||
n * sizeof(T));
|
||||
if (errc != cudaSuccess) {
|
||||
ThrowOOMError("Caching allocator", n * sizeof(T));
|
||||
}
|
||||
thrust_ptr = pointer(raw_ptr);
|
||||
} else {
|
||||
try {
|
||||
thrust_ptr = SuperT::allocate(n);
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
} catch (const std::exception &e) {
|
||||
ThrowOOMError(e.what(), n * sizeof(T));
|
||||
}
|
||||
}
|
||||
pointer thrust_ptr{ ptr };
|
||||
GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T));
|
||||
return thrust_ptr;
|
||||
}
|
||||
void deallocate(pointer ptr, size_t n) { // NOLINT
|
||||
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
|
||||
GetGlobalCachingAllocator().DeviceFree(ptr.get());
|
||||
if (use_cub_allocator_) {
|
||||
GetGlobalCachingAllocator().DeviceFree(ptr.get());
|
||||
} else {
|
||||
SuperT::deallocate(ptr, n);
|
||||
}
|
||||
}
|
||||
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
XGBCachingDeviceAllocatorImpl()
|
||||
: SuperT(rmm::cuda_stream_default, rmm::mr::get_current_device_resource()),
|
||||
use_cub_allocator_(!xgboost::GlobalConfigThreadLocalStore::Get()->use_rmm) {}
|
||||
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
XGBOOST_DEVICE void construct(T *) {} // NOLINT
|
||||
private:
|
||||
bool use_cub_allocator_{true};
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
|
||||
Reference in New Issue
Block a user