Use caching allocator from RMM, when RMM is enabled (#6131)

This commit is contained in:
Jiaming Yuan 2020-09-18 12:51:49 +08:00 committed by GitHub
parent 6bc9b9dc4f
commit 5384ed85c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 2 deletions

View File

@ -406,10 +406,14 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
} }
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 #if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
XGBDefaultDeviceAllocatorImpl() XGBDefaultDeviceAllocatorImpl()
: SuperT(rmm::mr::get_current_device_resource(), cudaStream_t{0}) {} : SuperT(rmm::mr::get_current_device_resource(), cudaStream_t{nullptr}) {}
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1 #endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
}; };
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
template <typename T>
using XGBCachingDeviceAllocatorImpl = XGBDefaultDeviceAllocatorImpl<T>;
#else
/** /**
* \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs * \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs
* allocations if verbose. Does not initialise memory on construction. * allocations if verbose. Does not initialise memory on construction.
@ -448,6 +452,7 @@ struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
// no-op // no-op
} }
}; };
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
} // namespace detail } // namespace detail
// Declare xgboost allocators // Declare xgboost allocators

View File

@ -12,6 +12,7 @@
#include <thrust/iterator/discard_iterator.h> #include <thrust/iterator/discard_iterator.h>
#include <cmath> #include <cmath>
#include <array>
#include <vector> #include <vector>
#include "metric_common.h" #include "metric_common.h"
@ -379,7 +380,11 @@ struct EvalAucGpu : public Metric {
} }
}); });
auto nunique_preds = seg_idx.back(); std::array<uint32_t, 1> h_nunique_preds;
dh::safe_cuda(cudaMemcpyAsync(h_nunique_preds.data(),
seg_idx.data().get() + seg_idx.size() - 1,
sizeof(uint32_t), cudaMemcpyDeviceToHost));
auto nunique_preds = h_nunique_preds.back();
ReleaseMemory(seg_idx); ReleaseMemory(seg_idx);
// Next, accumulate the positive and negative precisions for every prediction group // Next, accumulate the positive and negative precisions for every prediction group