more logging

This commit is contained in:
Hendrik Groove 2024-10-21 00:08:50 +02:00
parent e2e6b6e71f
commit 1de5734d4c

View File

@ -983,6 +983,18 @@ auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce
using Ty = std::remove_cv_t<Init>;
Ty aggregate = init;
// Get the HIP stream from the policy
hipStream_t stream = thrust::hip::stream(policy);
std::cerr << "HIP stream: " << stream << std::endl;
// Check stream validity
hipError_t stream_err = hipStreamQuery(stream);
if (stream_err != hipSuccess && stream_err != hipErrorNotReady) {
std::cerr << "Invalid stream: " << hipGetErrorString(stream_err) << std::endl;
} else {
std::cerr << "Stream is valid" << std::endl;
}
for (size_t offset = 0; offset < size; offset += kLimit) {
auto begin_it = first + offset;
auto end_it = first + std::min(offset + kLimit, size);
@ -991,16 +1003,32 @@ auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce
std::cerr << "Processing batch: offset=" << offset << ", batch_size=" << batch_size << std::endl;
hipError_t err = hipStreamQuery(stream);
if (err != hipSuccess && err != hipErrorNotReady) {
std::cerr << "Invalid stream: " << hipGetErrorString(err) << std::endl;
}
try {
// Get the raw pointers for debugging
auto raw_begin = thrust::raw_pointer_cast(&*begin_it);
auto raw_end = thrust::raw_pointer_cast(&*end_it);
std::cerr << "Raw pointers - begin: " << raw_begin << ", end: " << raw_end << std::endl;
// Check if the pointers are valid device pointers
hipPointerAttribute_t attrs;
hipError_t ptr_err = hipPointerGetAttributes(&attrs, raw_begin);
if (ptr_err != hipSuccess) {
std::cerr << "Invalid begin pointer: " << hipGetErrorString(ptr_err) << std::endl;
} else {
std::cerr << "Valid begin pointer, memory type: " << attrs.type << std::endl;
}
auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op);
aggregate = reduce_op(aggregate, ret);
std::cerr << "Batch reduction completed successfully" << std::endl;
} catch (const std::exception& e) {
std::cerr << "Exception in thrust::reduce: " << e.what() << std::endl;
// Get the last HIP error
hipError_t last_error = hipGetLastError();
std::cerr << "Last HIP error: " << hipGetErrorString(last_error) << std::endl;
throw;
}
}