From 1de5734d4ca89305c88bc612291a43d857038364 Mon Sep 17 00:00:00 2001 From: Hendrik Groove Date: Mon, 21 Oct 2024 00:08:50 +0200 Subject: [PATCH] more logging --- src/common/device_helpers.hip.h | 38 ++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/common/device_helpers.hip.h b/src/common/device_helpers.hip.h index 5e5ceb13d..432ad70ae 100644 --- a/src/common/device_helpers.hip.h +++ b/src/common/device_helpers.hip.h @@ -983,6 +983,18 @@ auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce using Ty = std::remove_cv_t; Ty aggregate = init; + // Get the HIP stream from the policy + hipStream_t stream = thrust::hip::stream(policy); + std::cerr << "HIP stream: " << stream << std::endl; + + // Check stream validity + hipError_t stream_err = hipStreamQuery(stream); + if (stream_err != hipSuccess && stream_err != hipErrorNotReady) { + std::cerr << "Invalid stream: " << hipGetErrorString(stream_err) << std::endl; + } else { + std::cerr << "Stream is valid" << std::endl; + } + for (size_t offset = 0; offset < size; offset += kLimit) { auto begin_it = first + offset; auto end_it = first + std::min(offset + kLimit, size); @@ -991,16 +1003,32 @@ auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce std::cerr << "Processing batch: offset=" << offset << ", batch_size=" << batch_size << std::endl; - hipError_t err = hipStreamQuery(stream); - if (err != hipSuccess && err != hipErrorNotReady) { - std::cerr << "Invalid stream: " << hipGetErrorString(err) << std::endl; - } - try { + // Get the raw pointers for debugging + auto raw_begin = thrust::raw_pointer_cast(&*begin_it); + auto raw_end = thrust::raw_pointer_cast(&*end_it); + std::cerr << "Raw pointers - begin: " << raw_begin << ", end: " << raw_end << std::endl; + + // Check if the pointers are valid device pointers + hipPointerAttribute_t attrs; + hipError_t ptr_err = hipPointerGetAttributes(&attrs, raw_begin); + if (ptr_err != hipSuccess) { + std::cerr << "Invalid begin pointer: " << hipGetErrorString(ptr_err) << std::endl; + } else { + std::cerr << "Valid begin pointer, memory type: " << attrs.type << std::endl; + } + auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op); aggregate = reduce_op(aggregate, ret); + + std::cerr << "Batch reduction completed successfully" << std::endl; } catch (const std::exception& e) { std::cerr << "Exception in thrust::reduce: " << e.what() << std::endl; + + // Get the last HIP error + hipError_t last_error = hipGetLastError(); + std::cerr << "Last HIP error: " << hipGetErrorString(last_error) << std::endl; + throw; } }