Modify caching allocator/vector and fix issues relating to inability to train large datasets (#4615)

2019-07-08 23:33:27 -07:00
parent cd1526d3b1
commit 7a388cbf8b
5 changed files with 22 additions and 18 deletions
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -305,11 +305,11 @@ struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
  };
  pointer allocate(size_t n) {
    pointer ptr = super_t::allocate(n);
-    GlobalMemoryLogger().RegisterAllocation(ptr.get(), n);
+    GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T));
    return ptr;
  }
  void deallocate(pointer ptr, size_t n) {
-    GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n);
+    GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
    return super_t::deallocate(ptr, n);
  }
 };
@@ -329,19 +329,19 @@ struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
   {
    // Configure allocator with maximum cached bin size of ~1GB and no limit on
    // maximum cached bytes
-     static cub::CachingDeviceAllocator allocator(8,3,10);
-     return allocator;
+     static cub::CachingDeviceAllocator *allocator = new cub::CachingDeviceAllocator(2, 9, 29);
+     return *allocator;
   }
   pointer allocate(size_t n) {
     T *ptr;
     GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&ptr),
                                                n * sizeof(T));
-     pointer thrust_ptr = thrust::device_ptr<T>(ptr);
-     GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n);
+     pointer thrust_ptr(ptr);
+     GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T));
     return thrust_ptr;
   }
   void deallocate(pointer ptr, size_t n) {
-     GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n);
+     GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
     GetGlobalCachingAllocator().DeviceFree(ptr.get());
   }
  __host__ __device__
@@ -363,6 +363,7 @@ template <typename T>
 using device_vector = thrust::device_vector<T,  XGBDeviceAllocator<T>>;
 template <typename T>
 using caching_device_vector = thrust::device_vector<T,  XGBCachingDeviceAllocator<T>>;
+
 /**
 * \brief A double buffer, useful for algorithms like sort.
 */
@@ -376,9 +377,7 @@ class DoubleBuffer {
  DoubleBuffer(VectorT *v1, VectorT *v2) {
    a = xgboost::common::Span<T>(v1->data().get(), v1->size());
    b = xgboost::common::Span<T>(v2->data().get(), v2->size());
-    buff.d_buffers[0] = v1->data().get();
-    buff.d_buffers[1] = v2->data().get();
-    buff.selector = 0;
+    buff = cub::DoubleBuffer<T>(a.data(), b.data());
  }

  size_t Size() const {