diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu index fdb06474e..9ae9b076e 100644 --- a/src/objective/regression_obj.cu +++ b/src/objective/regression_obj.cu @@ -36,6 +36,9 @@ #include "xgboost/tree_model.h" // RegTree #include "regression_param.h" +#include +#include +#include #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #include "../common/cuda_context.cuh" // for CUDAContext @@ -63,31 +66,63 @@ class RegLossObj : public FitIntercept { HostDeviceVector additional_input_; public: - void ValidateLabel(MetaInfo const& info) { - auto label = info.labels.View(ctx_->Device()); - auto valid = ctx_->DispatchDevice( - [&] { - return std::all_of(linalg::cbegin(label), linalg::cend(label), - [](float y) -> bool { return Loss::CheckLabel(y); }); - }, - [&] { +void ValidateLabel(MetaInfo const& info) { + std::cerr << "Entering ValidateLabel function" << std::endl; + std::cerr << "Number of rows: " << info.num_row_ << std::endl; + std::cerr << "Label shape: " << info.labels.Shape()[0] << "x" << info.labels.Shape()[1] << std::endl; + + auto label = info.labels.View(ctx_->Device()); + std::cerr << "Label device: " << (ctx_->Device().IsCUDA() ? "GPU" : "CPU") << std::endl; + + bool valid = false; + try { + valid = ctx_->DispatchDevice( + [&] { + std::cerr << "Validating labels on CPU" << std::endl; + return std::all_of(linalg::cbegin(label), linalg::cend(label), + [](float y) -> bool { + if (!std::isfinite(y)) { + std::cerr << "Non-finite label value found: " << y << std::endl; + return false; + } + return Loss::CheckLabel(y); + }); + }, + [&] { #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) - auto cuctx = ctx_->CUDACtx(); - auto it = dh::MakeTransformIterator( - thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool { - auto [m, n] = linalg::UnravelIndex(i, label.Shape()); - return Loss::CheckLabel(label(m, n)); - }); - return dh::Reduce(cuctx->CTP(), it, it + label.Size(), true, thrust::logical_and<>{}); + std::cerr << "Validating labels on GPU" << std::endl; + auto cuctx = ctx_->CUDACtx(); + auto it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool { + auto [m, n] = linalg::UnravelIndex(i, label.Shape()); + float y = label(m, n); + if (!isfinite(y)) { + printf("Non-finite label value found on GPU: %f\n", y); + return false; + } + return Loss::CheckLabel(y); + }); + return dh::Reduce(cuctx->CTP(), it, it + label.Size(), true, thrust::logical_and<>{}); #else - common::AssertGPUSupport(); - return false; -#endif // defined(XGBOOST_USE_CUDA) - }); - if (!valid) { - LOG(FATAL) << Loss::LabelErrorMsg(); - } + std::cerr << "GPU support not enabled" << std::endl; + common::AssertGPUSupport(); + return false; +#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) + }); + } catch (const std::exception& e) { + std::cerr << "Exception during label validation: " << e.what() << std::endl; + valid = false; } + + std::cerr << "Label validation result: " << (valid ? "Valid" : "Invalid") << std::endl; + + if (!valid) { + std::cerr << "Invalid labels detected. Error message: " << Loss::LabelErrorMsg() << std::endl; + LOG(FATAL) << Loss::LabelErrorMsg(); + } + + std::cerr << "Exiting ValidateLabel function" << std::endl; +} // 0 - scale_pos_weight, 1 - is_null_weight RegLossObj(): additional_input_(2) {}