Remove various synchronisations from cuda API calls, instrument monitor (#4205)
* Remove various synchronisations from cuda API calls, instrument monitor with nvtx profiler ranges.
This commit is contained in:
@@ -308,7 +308,7 @@ class DVec {
|
||||
}
|
||||
safe_cuda(cudaSetDevice(this->DeviceIdx()));
|
||||
if (other.DeviceIdx() == this->DeviceIdx()) {
|
||||
dh::safe_cuda(cudaMemcpy(this->Data(), other.Data(),
|
||||
dh::safe_cuda(cudaMemcpyAsync(this->Data(), other.Data(),
|
||||
other.Size() * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
} else {
|
||||
@@ -338,7 +338,7 @@ class DVec {
|
||||
throw std::runtime_error(
|
||||
"Cannot copy assign vector to dvec, sizes are different");
|
||||
}
|
||||
safe_cuda(cudaMemcpy(this->Data(), begin.get(), Size() * sizeof(T),
|
||||
safe_cuda(cudaMemcpyAsync(this->Data(), begin.get(), Size() * sizeof(T),
|
||||
cudaMemcpyDefault));
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user