Use caching allocator from RMM, when RMM is enabled (#6131)

2020-09-18 12:51:49 +08:00 · 2020-09-18 12:51:49 +08:00 · 5384ed85c8
commit 5384ed85c8
parent 6bc9b9dc4f
2 changed files with 12 additions and 2 deletions
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@ -406,10 +406,14 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
  }
 #if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
  XGBDefaultDeviceAllocatorImpl()
-    : SuperT(rmm::mr::get_current_device_resource(), cudaStream_t{0}) {}
+    : SuperT(rmm::mr::get_current_device_resource(), cudaStream_t{nullptr}) {}
 #endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 };
 #if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 template <typename T>
 using XGBCachingDeviceAllocatorImpl = XGBDefaultDeviceAllocatorImpl<T>;
 #else
 /**
 * \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs
 * allocations if verbose. Does not initialise memory on construction.
@ -448,6 +452,7 @@ struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
    // no-op
  }
 };
 #endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 }  // namespace detail
 // Declare xgboost allocators
--- a/src/metric/rank_metric.cu
+++ b/src/metric/rank_metric.cu
@ -12,6 +12,7 @@
 #include <thrust/iterator/discard_iterator.h>
 #include <cmath>
 #include <array>
 #include <vector>
 #include "metric_common.h"
@ -379,7 +380,11 @@ struct EvalAucGpu : public Metric {
        }
      });
-      auto nunique_preds = seg_idx.back();
+      std::array<uint32_t, 1> h_nunique_preds;
      dh::safe_cuda(cudaMemcpyAsync(h_nunique_preds.data(),
                                    seg_idx.data().get() + seg_idx.size() - 1,
                                    sizeof(uint32_t), cudaMemcpyDeviceToHost));
      auto nunique_preds = h_nunique_preds.back();
      ReleaseMemory(seg_idx);
      // Next, accumulate the positive and negative precisions for every prediction group