Remove various synchronisations from cuda API calls, instrument monitor (#4205)
* Remove various synchronisations from cuda API calls, instrument monitor with nvtx profiler ranges.
This commit is contained in:
@@ -145,8 +145,6 @@ class Transform {
|
||||
static_cast<int>(dh::DivRoundUp(*(range_.end()), kBlockThreads));
|
||||
detail::LaunchCUDAKernel<<<GRID_SIZE, kBlockThreads>>>(
|
||||
_func, shard_range, UnpackHDV(_vectors, device)...);
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
Reference in New Issue
Block a user