Modify caching allocator/vector and fix issues relating to inability to train large datasets (#4615)

This commit is contained in:
sriramch
2019-07-08 23:33:27 -07:00
committed by Rory Mitchell
parent cd1526d3b1
commit 7a388cbf8b
5 changed files with 22 additions and 18 deletions

View File

@@ -305,11 +305,11 @@ struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
};
pointer allocate(size_t n) {
pointer ptr = super_t::allocate(n);
GlobalMemoryLogger().RegisterAllocation(ptr.get(), n);
GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T));
return ptr;
}
void deallocate(pointer ptr, size_t n) {
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n);
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
return super_t::deallocate(ptr, n);
}
};
@@ -329,19 +329,19 @@ struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
{
// Configure allocator with maximum cached bin size of ~1GB and no limit on
// maximum cached bytes
static cub::CachingDeviceAllocator allocator(8,3,10);
return allocator;
static cub::CachingDeviceAllocator *allocator = new cub::CachingDeviceAllocator(2, 9, 29);
return *allocator;
}
pointer allocate(size_t n) {
T *ptr;
GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&ptr),
n * sizeof(T));
pointer thrust_ptr = thrust::device_ptr<T>(ptr);
GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n);
pointer thrust_ptr(ptr);
GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T));
return thrust_ptr;
}
void deallocate(pointer ptr, size_t n) {
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n);
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
GetGlobalCachingAllocator().DeviceFree(ptr.get());
}
__host__ __device__
@@ -363,6 +363,7 @@ template <typename T>
using device_vector = thrust::device_vector<T, XGBDeviceAllocator<T>>;
template <typename T>
using caching_device_vector = thrust::device_vector<T, XGBCachingDeviceAllocator<T>>;
/**
* \brief A double buffer, useful for algorithms like sort.
*/
@@ -376,9 +377,7 @@ class DoubleBuffer {
DoubleBuffer(VectorT *v1, VectorT *v2) {
a = xgboost::common::Span<T>(v1->data().get(), v1->size());
b = xgboost::common::Span<T>(v2->data().get(), v2->size());
buff.d_buffers[0] = v1->data().get();
buff.d_buffers[1] = v2->data().get();
buff.selector = 0;
buff = cub::DoubleBuffer<T>(a.data(), b.data());
}
size_t Size() const {