This reverts commit f7f673b00c.
This commit is contained in:
@@ -1176,7 +1176,7 @@ inline void CUDAEvent::Record(CUDAStreamView stream) { // NOLINT
|
||||
dh::safe_cuda(cudaEventRecord(event_, cudaStream_t{stream}));
|
||||
}
|
||||
|
||||
inline CUDAStreamView DefaultStream() { return CUDAStreamView{cudaStreamPerThread}; }
|
||||
inline CUDAStreamView DefaultStream() { return CUDAStreamView{cudaStreamLegacy}; }
|
||||
|
||||
class CUDAStream {
|
||||
cudaStream_t stream_;
|
||||
|
||||
@@ -134,12 +134,12 @@ void LaunchGetColumnSizeKernel(std::int32_t device, IterSpan<BatchIt> batch_iter
|
||||
CHECK(!force_use_u64);
|
||||
auto kernel = GetColumnSizeSharedMemKernel<kBlockThreads, std::uint32_t, BatchIt>;
|
||||
auto grid_size = EstimateGridSize<kBlockThreads>(device, kernel, required_shared_memory);
|
||||
dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory}(
|
||||
dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, dh::DefaultStream()}(
|
||||
kernel, batch_iter, is_valid, out_column_size);
|
||||
} else {
|
||||
auto kernel = GetColumnSizeSharedMemKernel<kBlockThreads, std::size_t, BatchIt>;
|
||||
auto grid_size = EstimateGridSize<kBlockThreads>(device, kernel, required_shared_memory);
|
||||
dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory}(
|
||||
dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, dh::DefaultStream()}(
|
||||
kernel, batch_iter, is_valid, out_column_size);
|
||||
}
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user