Remove various synchronisations from cuda API calls, instrument monitor (#4205)

* Remove various synchronisations from cuda API calls, instrument monitor
with nvtx profiler ranges.
This commit is contained in:
Rory Mitchell
2019-03-10 15:01:23 +13:00
committed by GitHub
parent f83e62dca5
commit 4eeeded7d1
9 changed files with 116 additions and 104 deletions

View File

@@ -145,8 +145,6 @@ class Transform {
static_cast<int>(dh::DivRoundUp(*(range_.end()), kBlockThreads));
detail::LaunchCUDAKernel<<<GRID_SIZE, kBlockThreads>>>(
_func, shard_range, UnpackHDV(_vectors, device)...);
dh::safe_cuda(cudaGetLastError());
dh::safe_cuda(cudaDeviceSynchronize());
}
}
#else