From 20a9c223b641675d5a53b20bf07cad0198898d0f Mon Sep 17 00:00:00 2001
From: Hendrik Groove <hendrik.groove@dw.com>
Date: Mon, 21 Oct 2024 21:52:34 +0200
Subject: [PATCH] remove logging

---
 CMakeLists.txt                  |  2 +-
 src/common/device_helpers.hip.h | 28 -----------------
 src/data/array_interface.cu     | 15 ---------
 src/learner.cc                  | 12 -------
 src/objective/regression_obj.cu | 55 ---------------------------------
 5 files changed, 1 insertion(+), 111 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 792cb62d0..2a56f8bb9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -260,7 +260,7 @@ if (USE_HIP)
   set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -Wunused-result -w")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_INCLUDE_DIRS}")
-  set(CMAKE_HIP_SEPARABLE_COMPILATION ON)
+  #set(CMAKE_HIP_SEPARABLE_COMPILATION ON)
   add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
 endif (USE_HIP)
 
diff --git a/src/common/device_helpers.hip.h b/src/common/device_helpers.hip.h
index 5b848f6ea..8003cca7f 100644
--- a/src/common/device_helpers.hip.h
+++ b/src/common/device_helpers.hip.h
@@ -579,8 +579,6 @@ xgboost::common::Span<T> LazyResize(xgboost::Context const *ctx,
 template <typename T>
 void CopyDeviceSpanToVector(std::vector<T> *dst, xgboost::common::Span<T> src) {
   CHECK_EQ(dst->size(), src.size());
-  std::cerr << "CopyDeviceSpanToVector: Copying " << src.size() * sizeof(T) 
-            << " bytes from device to host" << std::endl;
   dh::safe_cuda(hipMemcpyAsync(dst->data(), src.data(), dst->size() * sizeof(T),
                                 hipMemcpyDeviceToHost));
 }
@@ -609,8 +607,6 @@ void CopyToD(HContainer const &h, DContainer *d) {
   using HVT = std::remove_cv_t<typename HContainer::value_type>;
   using DVT = std::remove_cv_t<typename DContainer::value_type>;
   static_assert(std::is_same<HVT, DVT>::value, "Host and device containers must have same value type.");
-  std::cerr << "CopyToD: Copying " << h.size() * sizeof(HVT) 
-            << " bytes from host to device" << std::endl;
   dh::safe_cuda(hipMemcpyAsync(d->data().get(), h.data(), h.size() * sizeof(HVT), 
                                hipMemcpyHostToDevice));
 }
@@ -661,7 +657,6 @@ struct PinnedMemory {
 */
 template <typename T>
 typename std::iterator_traits<T>::value_type SumReduction(T in, int nVals) {
-  std::cerr << "Entering SumReduction, nVals: " << nVals << std::endl;
   using ValueT = typename std::iterator_traits<T>::value_type;
 
   size_t tmpSize {0};
@@ -669,21 +664,14 @@ typename std::iterator_traits<T>::value_type SumReduction(T in, int nVals) {
   
   try {
     dh::safe_cuda(hipcub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals));
-    std::cerr << "Temporary storage size: " << tmpSize << std::endl;
-
     TemporaryArray<char> temp(tmpSize + sizeof(ValueT));
     auto ptr = reinterpret_cast<ValueT *>(temp.data().get()) + 1;
-    
     dh::safe_cuda(hipcub::DeviceReduce::Sum(
       reinterpret_cast<void *>(ptr), tmpSize, in, reinterpret_cast<ValueT *>(temp.data().get()), nVals));
-
     ValueT sum;
     dh::safe_cuda(hipMemcpy(&sum, temp.data().get(), sizeof(ValueT), hipMemcpyDeviceToHost));
-    
-    std::cerr << "SumReduction completed successfully" << std::endl;
     return sum;
   } catch (const std::exception& e) {
-    std::cerr << "Exception in SumReduction: " << e.what() << std::endl;
     throw;
   }
 }
@@ -971,10 +959,8 @@ size_t SegmentedUniqueByKey(
 
 template <typename Policy, typename InputIt, typename Init, typename Func>
 auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce_op) {
-  std::cerr << "Entering Reduce function" << std::endl;
   size_t constexpr kLimit = std::numeric_limits<int32_t>::max() / 2;
   size_t size = std::distance(first, second);
-  std::cerr << "Total size for reduction: " << size << std::endl;
 
   using Ty = std::remove_cv_t<Init>;
   Ty aggregate = init;
@@ -984,35 +970,21 @@ auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce
     auto end_it = first + std::min(offset + kLimit, size);
     size_t batch_size = std::distance(begin_it, end_it);
     CHECK_LE(batch_size, size);
-
-    std::cerr << "Processing batch: offset=" << offset << ", batch_size=" << batch_size << std::endl;
-
  try {
       // Print the iterator types
-      std::cerr << "Iterator types - begin: " << typeid(begin_it).name() 
-                << ", end: " << typeid(end_it).name() << std::endl;
-
       auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op);
       aggregate = reduce_op(aggregate, ret);
-
-      std::cerr << "Batch reduction completed successfully" << std::endl;
     } catch (const thrust::system_error& e) {
-      std::cerr << "Thrust system error in reduce: " << e.what() << std::endl;
-      std::cerr << "Error code: " << e.code() << std::endl;
       throw;
     } catch (const std::exception& e) {
-      std::cerr << "Exception in thrust::reduce: " << e.what() << std::endl;
       throw;
     }
 
     // Check for any HIP errors after the reduction
     hipError_t hip_err = hipGetLastError();
     if (hip_err != hipSuccess) {
-      std::cerr << "HIP error after reduction: " << hipGetErrorString(hip_err) << std::endl;
     }
   }
-
-  std::cerr << "Exiting Reduce function" << std::endl;
   return aggregate;
 }
 
diff --git a/src/data/array_interface.cu b/src/data/array_interface.cu
index 11baf4c1f..2058a72fd 100644
--- a/src/data/array_interface.cu
+++ b/src/data/array_interface.cu
@@ -62,50 +62,35 @@ void ArrayInterfaceHandler::SyncCudaStream(std::int64_t stream) {
 }
 
 bool ArrayInterfaceHandler::IsCudaPtr(void const* ptr) {
-  std::cerr << "Entering IsCudaPtr with ptr: " << ptr << std::endl;
   if (!ptr) {
-    std::cerr << "Pointer is null, returning false" << std::endl;
     return false;
   }
 
   // Check if the pointer is within the process's address space
   uintptr_t ptr_value = reinterpret_cast<uintptr_t>(ptr);
   uintptr_t process_max_addr = (uintptr_t)-1;
-  std::cerr << "Pointer value: " << ptr_value << ", Max address: " << process_max_addr << std::endl;
 
 #if defined(XGBOOST_USE_HIP)
   hipPointerAttribute_t attr;
-  std::cerr << "Calling hipPointerGetAttributes" << std::endl;
   auto err = hipPointerGetAttributes(&attr, ptr);
   
-  std::cerr << "hipPointerGetAttributes returned: " << hipGetErrorString(err) << std::endl;
 
   if (err == hipErrorInvalidValue) {
-    std::cerr << "Invalid pointer (hipErrorInvalidValue), returning false" << std::endl;
     return false;
   } else if (err == hipSuccess) {
-    std::cerr << "Pointer attributes obtained successfully" << std::endl;
-    std::cerr << "Memory type: " << attr.type << std::endl;
     switch (attr.type) {
       case hipMemoryTypeUnregistered:
-        std::cerr << "Memory type is Unregistered, returning false" << std::endl;
         return false;
       case hipMemoryTypeHost:
-        std::cerr << "Memory type is Host, returning false" << std::endl;
         return false;
       case hipMemoryTypeDevice:
-        std::cerr << "Memory type is Device, returning true" << std::endl;
         return true;
       case hipMemoryTypeManaged:
-        std::cerr << "Memory type is Managed, returning true" << std::endl;
         return true;
       default:
-        std::cerr << "Unknown memory type: " << attr.type << std::endl;
         return false;
     }
   } else {
-    std::cerr << "hipPointerGetAttributes failed with error: " 
-              << hipGetErrorString(err) << std::endl;
     return false;
   }
 #elif defined(XGBOOST_USE_CUDA)
diff --git a/src/learner.cc b/src/learner.cc
index e2187c0ff..02b3bc569 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -1265,54 +1265,42 @@ class LearnerImpl : public LearnerIO {
   }
 
 void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) override {
-  std::cerr << "Entering UpdateOneIter, iteration: " << iter << std::endl;
   monitor_.Start("UpdateOneIter");
   TrainingObserver::Instance().Update(iter);
   
-  std::cerr << "Configuring..." << std::endl;
   this->Configure();
   
-  std::cerr << "Initializing base score..." << std::endl;
   this->InitBaseScore(train.get());
 
   if (ctx_.seed_per_iteration) {
-    std::cerr << "Setting seed for iteration..." << std::endl;
     common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
   }
 
-  std::cerr << "Validating DMatrix..." << std::endl;
   this->ValidateDMatrix(train.get(), true);
 
-  std::cerr << "Caching predictions..." << std::endl;
   auto& predt = prediction_container_.Cache(train, ctx_.Device());
 
   monitor_.Start("PredictRaw");
-  std::cerr << "Predicting raw values..." << std::endl;
   this->PredictRaw(train.get(), &predt, true, 0, 0);
   TrainingObserver::Instance().Observe(predt.predictions, "Predictions");
   monitor_.Stop("PredictRaw");
 
   monitor_.Start("GetGradient");
-  std::cerr << "Getting gradients..." << std::endl;
   try {
     GetGradient(predt.predictions, train->Info(), iter, &gpair_);
   } catch (const std::exception& e) {
-    std::cerr << "Exception in GetGradient: " << e.what() << std::endl;
     throw;
   }
   monitor_.Stop("GetGradient");
   TrainingObserver::Instance().Observe(*gpair_.Data(), "Gradients");
 
-  std::cerr << "Performing boosting..." << std::endl;
   try {
     gbm_->DoBoost(train.get(), &gpair_, &predt, obj_.get());
   } catch (const std::exception& e) {
-    std::cerr << "Exception in DoBoost: " << e.what() << std::endl;
     throw;
   }
   
   monitor_.Stop("UpdateOneIter");
-  std::cerr << "Exiting UpdateOneIter" << std::endl;
 }
 
   void BoostOneIter(int iter, std::shared_ptr<DMatrix> train,
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index c7a33cb02..e0fa0101e 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -68,31 +68,15 @@ class RegLossObj : public FitIntercept {
 
  public:
 void ValidateLabel(MetaInfo const& info) {
-  std::cerr << "Entering ValidateLabel function" << std::endl;
-  std::cerr << "Number of rows: " << info.num_row_ << std::endl;
-  std::cerr << "Label shape: " << info.labels.Shape()[0] << "x" << info.labels.Shape()[1] << std::endl;
-
-  // Check GPU memory
-  size_t free, total;
-  if (hipMemGetInfo(&free, &total) == hipSuccess) {
-    std::cerr << "GPU Memory - Free: " << free << ", Total: " << total << std::endl;
-  } else {
-    std::cerr << "Failed to get GPU memory info" << std::endl;
-  }
-
   auto label = info.labels.View(ctx_->Device());
-  std::cerr << "Label device: " << (ctx_->Device().IsCUDA() ? "GPU" : "CPU") << std::endl;
-  std::cerr << "Label data pointer: " << label.Values().data() << std::endl;
 
   bool valid = false;
   try {
     valid = ctx_->DispatchDevice(
       [&] {
-        std::cerr << "Validating labels on CPU" << std::endl;
         return std::all_of(linalg::cbegin(label), linalg::cend(label),
                            [](float y) -> bool { 
                              if (!std::isfinite(y)) {
-                               std::cerr << "Non-finite label value found: " << y << std::endl;
                                return false;
                              }
                              return Loss::CheckLabel(y); 
@@ -100,9 +84,7 @@ void ValidateLabel(MetaInfo const& info) {
       },
       [&] {
 #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
-        std::cerr << "Validating labels on GPU" << std::endl;
         auto cuctx = ctx_->CUDACtx();
-        std::cerr << "CUDA context pointer: " << cuctx << std::endl;
         
         auto it = dh::MakeTransformIterator<bool>(
             thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool {
@@ -115,12 +97,9 @@ void ValidateLabel(MetaInfo const& info) {
               return Loss::CheckLabel(y);
             });
         
-        std::cerr << "Starting GPU reduction" << std::endl;
         bool result = dh::Reduce(cuctx->CTP(), it, it + label.Size(), true, thrust::logical_and<>{});
-        std::cerr << "GPU reduction completed" << std::endl;
         return result;
 #else
-        std::cerr << "GPU support not enabled" << std::endl;
         common::AssertGPUSupport();
         return false;
 #endif  // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
@@ -130,17 +109,10 @@ void ValidateLabel(MetaInfo const& info) {
     valid = false;
   }
 
-  std::cerr << "Label validation result: " << (valid ? "Valid" : "Invalid") << std::endl;
-
   if (!valid) {
-    std::cerr << "Invalid labels detected. Error message: " << Loss::LabelErrorMsg() << std::endl;
-    // Print GPU error info
     hipError_t error = hipGetLastError();
-    std::cerr << "Last GPU error: " << hipGetErrorString(error) << std::endl;
     LOG(FATAL) << Loss::LabelErrorMsg();
   }
-
-  std::cerr << "Exiting ValidateLabel function" << std::endl;
 }
   // 0 - scale_pos_weight, 1 - is_null_weight
   RegLossObj(): additional_input_(2) {}
@@ -676,40 +648,18 @@ class MeanAbsoluteError : public ObjFunction {
 
 void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info,
                  std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) override {
-  std::cerr << "Entering GetGradient, iteration: " << iter << std::endl;
-
   try {
-    GPU_CHECK_LAST();  // Check for any previous GPU errors
-
-    std::cerr << "Checking regression inputs..." << std::endl;
     CheckRegInputs(info, preds);
-
-    std::cerr << "Setting up labels..." << std::endl;
     auto labels = info.labels.View(ctx_->Device());
-    std::cerr << "Labels shape: " << labels.Shape()[0] << "x" << labels.Shape()[1] << std::endl;
-
-    std::cerr << "Setting up output gradient pairs..." << std::endl;
     out_gpair->SetDevice(ctx_->Device());
     out_gpair->Reshape(info.num_row_, this->Targets(info));
     auto gpair = out_gpair->View(ctx_->Device());
-    std::cerr << "Gradient pairs shape: " << gpair.Shape()[0] << "x" << gpair.Shape()[1] << std::endl;
-
-    GPU_CHECK_LAST();  // Check for GPU errors after memory operations
-
-    std::cerr << "Setting up predictions..." << std::endl;
     preds.SetDevice(ctx_->Device());
     auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));
-    std::cerr << "Predictions shape: " << predt.Shape()[0] << "x" << predt.Shape()[1] << std::endl;
 
-    std::cerr << "Setting up weights..." << std::endl;
     info.weights_.SetDevice(ctx_->Device());
     common::OptionalWeights weight{ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
                                                   : info.weights_.ConstHostSpan()};
-    std::cerr << "Weights size: " << weight.Size() << std::endl;
-
-    GPU_CHECK_LAST();  // Check for GPU errors before kernel launch
-
-    std::cerr << "Running ElementWiseKernel..." << std::endl;
     linalg::ElementWiseKernel(
         ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {
           auto sign = [](auto x) {
@@ -721,16 +671,11 @@ void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info,
           gpair(i, j) = GradientPair{grad, hess};
         });
 
-    GPU_CHECK_LAST();  // Check for GPU errors after kernel execution
-
-    std::cerr << "ElementWiseKernel completed successfully" << std::endl;
   } catch (const std::exception& e) {
     std::cerr << "Exception in GetGradient: " << e.what() << std::endl;
     GPU_CHECK_LAST();  // Check for GPU errors in case of exception
     throw;
   }
-
-  std::cerr << "Exiting GetGradient" << std::endl;
 }
 
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {