From 20a9c223b641675d5a53b20bf07cad0198898d0f Mon Sep 17 00:00:00 2001 From: Hendrik Groove Date: Mon, 21 Oct 2024 21:52:34 +0200 Subject: [PATCH] remove logging --- CMakeLists.txt | 2 +- src/common/device_helpers.hip.h | 28 ----------------- src/data/array_interface.cu | 15 --------- src/learner.cc | 12 ------- src/objective/regression_obj.cu | 55 --------------------------------- 5 files changed, 1 insertion(+), 111 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 792cb62d0..2a56f8bb9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,7 +260,7 @@ if (USE_HIP) set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -Wunused-result -w") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_INCLUDE_DIRS}") - set(CMAKE_HIP_SEPARABLE_COMPILATION ON) + #set(CMAKE_HIP_SEPARABLE_COMPILATION ON) add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap) endif (USE_HIP) diff --git a/src/common/device_helpers.hip.h b/src/common/device_helpers.hip.h index 5b848f6ea..8003cca7f 100644 --- a/src/common/device_helpers.hip.h +++ b/src/common/device_helpers.hip.h @@ -579,8 +579,6 @@ xgboost::common::Span LazyResize(xgboost::Context const *ctx, template void CopyDeviceSpanToVector(std::vector *dst, xgboost::common::Span src) { CHECK_EQ(dst->size(), src.size()); - std::cerr << "CopyDeviceSpanToVector: Copying " << src.size() * sizeof(T) - << " bytes from device to host" << std::endl; dh::safe_cuda(hipMemcpyAsync(dst->data(), src.data(), dst->size() * sizeof(T), hipMemcpyDeviceToHost)); } @@ -609,8 +607,6 @@ void CopyToD(HContainer const &h, DContainer *d) { using HVT = std::remove_cv_t; using DVT = std::remove_cv_t; static_assert(std::is_same::value, "Host and device containers must have same value type."); - std::cerr << "CopyToD: Copying " << h.size() * sizeof(HVT) - << " bytes from host to device" << std::endl; dh::safe_cuda(hipMemcpyAsync(d->data().get(), h.data(), h.size() * sizeof(HVT), hipMemcpyHostToDevice)); } @@ -661,7 +657,6 @@ struct PinnedMemory { */ template typename std::iterator_traits::value_type SumReduction(T in, int nVals) { - std::cerr << "Entering SumReduction, nVals: " << nVals << std::endl; using ValueT = typename std::iterator_traits::value_type; size_t tmpSize {0}; @@ -669,21 +664,14 @@ typename std::iterator_traits::value_type SumReduction(T in, int nVals) { try { dh::safe_cuda(hipcub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals)); - std::cerr << "Temporary storage size: " << tmpSize << std::endl; - TemporaryArray temp(tmpSize + sizeof(ValueT)); auto ptr = reinterpret_cast(temp.data().get()) + 1; - dh::safe_cuda(hipcub::DeviceReduce::Sum( reinterpret_cast(ptr), tmpSize, in, reinterpret_cast(temp.data().get()), nVals)); - ValueT sum; dh::safe_cuda(hipMemcpy(&sum, temp.data().get(), sizeof(ValueT), hipMemcpyDeviceToHost)); - - std::cerr << "SumReduction completed successfully" << std::endl; return sum; } catch (const std::exception& e) { - std::cerr << "Exception in SumReduction: " << e.what() << std::endl; throw; } } @@ -971,10 +959,8 @@ size_t SegmentedUniqueByKey( template auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce_op) { - std::cerr << "Entering Reduce function" << std::endl; size_t constexpr kLimit = std::numeric_limits::max() / 2; size_t size = std::distance(first, second); - std::cerr << "Total size for reduction: " << size << std::endl; using Ty = std::remove_cv_t; Ty aggregate = init; @@ -984,35 +970,21 @@ auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce auto end_it = first + std::min(offset + kLimit, size); size_t batch_size = std::distance(begin_it, end_it); CHECK_LE(batch_size, size); - - std::cerr << "Processing batch: offset=" << offset << ", batch_size=" << batch_size << std::endl; - try { // Print the iterator types - std::cerr << "Iterator types - begin: " << typeid(begin_it).name() - << ", end: " << typeid(end_it).name() << std::endl; - auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op); aggregate = reduce_op(aggregate, ret); - - std::cerr << "Batch reduction completed successfully" << std::endl; } catch (const thrust::system_error& e) { - std::cerr << "Thrust system error in reduce: " << e.what() << std::endl; - std::cerr << "Error code: " << e.code() << std::endl; throw; } catch (const std::exception& e) { - std::cerr << "Exception in thrust::reduce: " << e.what() << std::endl; throw; } // Check for any HIP errors after the reduction hipError_t hip_err = hipGetLastError(); if (hip_err != hipSuccess) { - std::cerr << "HIP error after reduction: " << hipGetErrorString(hip_err) << std::endl; } } - - std::cerr << "Exiting Reduce function" << std::endl; return aggregate; } diff --git a/src/data/array_interface.cu b/src/data/array_interface.cu index 11baf4c1f..2058a72fd 100644 --- a/src/data/array_interface.cu +++ b/src/data/array_interface.cu @@ -62,50 +62,35 @@ void ArrayInterfaceHandler::SyncCudaStream(std::int64_t stream) { } bool ArrayInterfaceHandler::IsCudaPtr(void const* ptr) { - std::cerr << "Entering IsCudaPtr with ptr: " << ptr << std::endl; if (!ptr) { - std::cerr << "Pointer is null, returning false" << std::endl; return false; } // Check if the pointer is within the process's address space uintptr_t ptr_value = reinterpret_cast(ptr); uintptr_t process_max_addr = (uintptr_t)-1; - std::cerr << "Pointer value: " << ptr_value << ", Max address: " << process_max_addr << std::endl; #if defined(XGBOOST_USE_HIP) hipPointerAttribute_t attr; - std::cerr << "Calling hipPointerGetAttributes" << std::endl; auto err = hipPointerGetAttributes(&attr, ptr); - std::cerr << "hipPointerGetAttributes returned: " << hipGetErrorString(err) << std::endl; if (err == hipErrorInvalidValue) { - std::cerr << "Invalid pointer (hipErrorInvalidValue), returning false" << std::endl; return false; } else if (err == hipSuccess) { - std::cerr << "Pointer attributes obtained successfully" << std::endl; - std::cerr << "Memory type: " << attr.type << std::endl; switch (attr.type) { case hipMemoryTypeUnregistered: - std::cerr << "Memory type is Unregistered, returning false" << std::endl; return false; case hipMemoryTypeHost: - std::cerr << "Memory type is Host, returning false" << std::endl; return false; case hipMemoryTypeDevice: - std::cerr << "Memory type is Device, returning true" << std::endl; return true; case hipMemoryTypeManaged: - std::cerr << "Memory type is Managed, returning true" << std::endl; return true; default: - std::cerr << "Unknown memory type: " << attr.type << std::endl; return false; } } else { - std::cerr << "hipPointerGetAttributes failed with error: " - << hipGetErrorString(err) << std::endl; return false; } #elif defined(XGBOOST_USE_CUDA) diff --git a/src/learner.cc b/src/learner.cc index e2187c0ff..02b3bc569 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -1265,54 +1265,42 @@ class LearnerImpl : public LearnerIO { } void UpdateOneIter(int iter, std::shared_ptr train) override { - std::cerr << "Entering UpdateOneIter, iteration: " << iter << std::endl; monitor_.Start("UpdateOneIter"); TrainingObserver::Instance().Update(iter); - std::cerr << "Configuring..." << std::endl; this->Configure(); - std::cerr << "Initializing base score..." << std::endl; this->InitBaseScore(train.get()); if (ctx_.seed_per_iteration) { - std::cerr << "Setting seed for iteration..." << std::endl; common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter); } - std::cerr << "Validating DMatrix..." << std::endl; this->ValidateDMatrix(train.get(), true); - std::cerr << "Caching predictions..." << std::endl; auto& predt = prediction_container_.Cache(train, ctx_.Device()); monitor_.Start("PredictRaw"); - std::cerr << "Predicting raw values..." << std::endl; this->PredictRaw(train.get(), &predt, true, 0, 0); TrainingObserver::Instance().Observe(predt.predictions, "Predictions"); monitor_.Stop("PredictRaw"); monitor_.Start("GetGradient"); - std::cerr << "Getting gradients..." << std::endl; try { GetGradient(predt.predictions, train->Info(), iter, &gpair_); } catch (const std::exception& e) { - std::cerr << "Exception in GetGradient: " << e.what() << std::endl; throw; } monitor_.Stop("GetGradient"); TrainingObserver::Instance().Observe(*gpair_.Data(), "Gradients"); - std::cerr << "Performing boosting..." << std::endl; try { gbm_->DoBoost(train.get(), &gpair_, &predt, obj_.get()); } catch (const std::exception& e) { - std::cerr << "Exception in DoBoost: " << e.what() << std::endl; throw; } monitor_.Stop("UpdateOneIter"); - std::cerr << "Exiting UpdateOneIter" << std::endl; } void BoostOneIter(int iter, std::shared_ptr train, diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu index c7a33cb02..e0fa0101e 100644 --- a/src/objective/regression_obj.cu +++ b/src/objective/regression_obj.cu @@ -68,31 +68,15 @@ class RegLossObj : public FitIntercept { public: void ValidateLabel(MetaInfo const& info) { - std::cerr << "Entering ValidateLabel function" << std::endl; - std::cerr << "Number of rows: " << info.num_row_ << std::endl; - std::cerr << "Label shape: " << info.labels.Shape()[0] << "x" << info.labels.Shape()[1] << std::endl; - - // Check GPU memory - size_t free, total; - if (hipMemGetInfo(&free, &total) == hipSuccess) { - std::cerr << "GPU Memory - Free: " << free << ", Total: " << total << std::endl; - } else { - std::cerr << "Failed to get GPU memory info" << std::endl; - } - auto label = info.labels.View(ctx_->Device()); - std::cerr << "Label device: " << (ctx_->Device().IsCUDA() ? "GPU" : "CPU") << std::endl; - std::cerr << "Label data pointer: " << label.Values().data() << std::endl; bool valid = false; try { valid = ctx_->DispatchDevice( [&] { - std::cerr << "Validating labels on CPU" << std::endl; return std::all_of(linalg::cbegin(label), linalg::cend(label), [](float y) -> bool { if (!std::isfinite(y)) { - std::cerr << "Non-finite label value found: " << y << std::endl; return false; } return Loss::CheckLabel(y); @@ -100,9 +84,7 @@ void ValidateLabel(MetaInfo const& info) { }, [&] { #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) - std::cerr << "Validating labels on GPU" << std::endl; auto cuctx = ctx_->CUDACtx(); - std::cerr << "CUDA context pointer: " << cuctx << std::endl; auto it = dh::MakeTransformIterator( thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool { @@ -115,12 +97,9 @@ void ValidateLabel(MetaInfo const& info) { return Loss::CheckLabel(y); }); - std::cerr << "Starting GPU reduction" << std::endl; bool result = dh::Reduce(cuctx->CTP(), it, it + label.Size(), true, thrust::logical_and<>{}); - std::cerr << "GPU reduction completed" << std::endl; return result; #else - std::cerr << "GPU support not enabled" << std::endl; common::AssertGPUSupport(); return false; #endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) @@ -130,17 +109,10 @@ void ValidateLabel(MetaInfo const& info) { valid = false; } - std::cerr << "Label validation result: " << (valid ? "Valid" : "Invalid") << std::endl; - if (!valid) { - std::cerr << "Invalid labels detected. Error message: " << Loss::LabelErrorMsg() << std::endl; - // Print GPU error info hipError_t error = hipGetLastError(); - std::cerr << "Last GPU error: " << hipGetErrorString(error) << std::endl; LOG(FATAL) << Loss::LabelErrorMsg(); } - - std::cerr << "Exiting ValidateLabel function" << std::endl; } // 0 - scale_pos_weight, 1 - is_null_weight RegLossObj(): additional_input_(2) {} @@ -676,40 +648,18 @@ class MeanAbsoluteError : public ObjFunction { void GetGradient(HostDeviceVector const& preds, const MetaInfo& info, std::int32_t iter, linalg::Matrix* out_gpair) override { - std::cerr << "Entering GetGradient, iteration: " << iter << std::endl; - try { - GPU_CHECK_LAST(); // Check for any previous GPU errors - - std::cerr << "Checking regression inputs..." << std::endl; CheckRegInputs(info, preds); - - std::cerr << "Setting up labels..." << std::endl; auto labels = info.labels.View(ctx_->Device()); - std::cerr << "Labels shape: " << labels.Shape()[0] << "x" << labels.Shape()[1] << std::endl; - - std::cerr << "Setting up output gradient pairs..." << std::endl; out_gpair->SetDevice(ctx_->Device()); out_gpair->Reshape(info.num_row_, this->Targets(info)); auto gpair = out_gpair->View(ctx_->Device()); - std::cerr << "Gradient pairs shape: " << gpair.Shape()[0] << "x" << gpair.Shape()[1] << std::endl; - - GPU_CHECK_LAST(); // Check for GPU errors after memory operations - - std::cerr << "Setting up predictions..." << std::endl; preds.SetDevice(ctx_->Device()); auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info)); - std::cerr << "Predictions shape: " << predt.Shape()[0] << "x" << predt.Shape()[1] << std::endl; - std::cerr << "Setting up weights..." << std::endl; info.weights_.SetDevice(ctx_->Device()); common::OptionalWeights weight{ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan() : info.weights_.ConstHostSpan()}; - std::cerr << "Weights size: " << weight.Size() << std::endl; - - GPU_CHECK_LAST(); // Check for GPU errors before kernel launch - - std::cerr << "Running ElementWiseKernel..." << std::endl; linalg::ElementWiseKernel( ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable { auto sign = [](auto x) { @@ -721,16 +671,11 @@ void GetGradient(HostDeviceVector const& preds, const MetaInfo& info, gpair(i, j) = GradientPair{grad, hess}; }); - GPU_CHECK_LAST(); // Check for GPU errors after kernel execution - - std::cerr << "ElementWiseKernel completed successfully" << std::endl; } catch (const std::exception& e) { std::cerr << "Exception in GetGradient: " << e.what() << std::endl; GPU_CHECK_LAST(); // Check for GPU errors in case of exception throw; } - - std::cerr << "Exiting GetGradient" << std::endl; } void InitEstimation(MetaInfo const& info, linalg::Tensor* base_margin) const override {