Remove various synchronisations from cuda API calls, instrument monitor (#4205)

* Remove various synchronisations from cuda API calls, instrument monitor
with nvtx profiler ranges.
This commit is contained in:
Rory Mitchell
2019-03-10 15:01:23 +13:00
committed by GitHub
parent f83e62dca5
commit 4eeeded7d1
9 changed files with 116 additions and 104 deletions

View File

@@ -308,7 +308,7 @@ class DVec {
}
safe_cuda(cudaSetDevice(this->DeviceIdx()));
if (other.DeviceIdx() == this->DeviceIdx()) {
dh::safe_cuda(cudaMemcpy(this->Data(), other.Data(),
dh::safe_cuda(cudaMemcpyAsync(this->Data(), other.Data(),
other.Size() * sizeof(T),
cudaMemcpyDeviceToDevice));
} else {
@@ -338,7 +338,7 @@ class DVec {
throw std::runtime_error(
"Cannot copy assign vector to dvec, sizes are different");
}
safe_cuda(cudaMemcpy(this->Data(), begin.get(), Size() * sizeof(T),
safe_cuda(cudaMemcpyAsync(this->Data(), begin.get(), Size() * sizeof(T),
cudaMemcpyDefault));
}
};