gpu_hist performance fixes (#5558)

* Remove unnecessary cuda API calls

* Fix histogram memory growth
This commit is contained in:
Rory Mitchell
2020-04-19 12:21:13 +12:00
committed by GitHub
parent e1f22baf8c
commit d6d1035950
7 changed files with 52 additions and 109 deletions

View File

@@ -59,13 +59,6 @@ class ElementWiseMetricsReduction {
#if defined(XGBOOST_USE_CUDA)
~ElementWiseMetricsReduction() {
if (device_ >= 0) {
dh::safe_cuda(cudaSetDevice(device_));
allocator_.Free();
}
}
PackedReduceResult DeviceReduceMetrics(
const HostDeviceVector<bst_float>& weights,
const HostDeviceVector<bst_float>& labels,
@@ -83,8 +76,9 @@ class ElementWiseMetricsReduction {
auto d_policy = policy_;
dh::XGBCachingDeviceAllocator<char> alloc;
PackedReduceResult result = thrust::transform_reduce(
thrust::cuda::par(allocator_),
thrust::cuda::par(alloc),
begin, end,
[=] XGBOOST_DEVICE(size_t idx) {
bst_float weight = is_null_weight ? 1.0f : s_weights[idx];
@@ -130,7 +124,6 @@ class ElementWiseMetricsReduction {
EvalRow policy_;
#if defined(XGBOOST_USE_CUDA)
int device_{-1};
dh::CubMemory allocator_;
#endif // defined(XGBOOST_USE_CUDA)
};

View File

@@ -73,13 +73,6 @@ class MultiClassMetricsReduction {
#if defined(XGBOOST_USE_CUDA)
~MultiClassMetricsReduction() {
if (device_ >= 0) {
dh::safe_cuda(cudaSetDevice(device_));
allocator_.Free();
}
}
PackedReduceResult DeviceReduceMetrics(
const HostDeviceVector<bst_float>& weights,
const HostDeviceVector<bst_float>& labels,
@@ -98,8 +91,9 @@ class MultiClassMetricsReduction {
auto s_label_error = label_error_.GetSpan<int32_t>(1);
s_label_error[0] = 0;
dh::XGBCachingDeviceAllocator<char> alloc;
PackedReduceResult result = thrust::transform_reduce(
thrust::cuda::par(allocator_),
thrust::cuda::par(alloc),
begin, end,
[=] XGBOOST_DEVICE(size_t idx) {
bst_float weight = is_null_weight ? 1.0f : s_weights[idx];
@@ -152,7 +146,6 @@ class MultiClassMetricsReduction {
#if defined(XGBOOST_USE_CUDA)
dh::PinnedMemory label_error_;
int device_{-1};
dh::CubMemory allocator_;
#endif // defined(XGBOOST_USE_CUDA)
};