validate label logging

This commit is contained in:
Hendrik Groove 2024-10-20 17:32:22 +02:00
parent 60a3bea7c6
commit fd95be5f20

View File

@ -36,6 +36,9 @@
#include "xgboost/tree_model.h" // RegTree #include "xgboost/tree_model.h" // RegTree
#include "regression_param.h" #include "regression_param.h"
#include <iostream>
#include <cmath>
#include <exception>
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#include "../common/cuda_context.cuh" // for CUDAContext #include "../common/cuda_context.cuh" // for CUDAContext
@ -63,31 +66,63 @@ class RegLossObj : public FitIntercept {
HostDeviceVector<float> additional_input_; HostDeviceVector<float> additional_input_;
public: public:
void ValidateLabel(MetaInfo const& info) { void ValidateLabel(MetaInfo const& info) {
std::cerr << "Entering ValidateLabel function" << std::endl;
std::cerr << "Number of rows: " << info.num_row_ << std::endl;
std::cerr << "Label shape: " << info.labels.Shape()[0] << "x" << info.labels.Shape()[1] << std::endl;
auto label = info.labels.View(ctx_->Device()); auto label = info.labels.View(ctx_->Device());
auto valid = ctx_->DispatchDevice( std::cerr << "Label device: " << (ctx_->Device().IsCUDA() ? "GPU" : "CPU") << std::endl;
bool valid = false;
try {
valid = ctx_->DispatchDevice(
[&] { [&] {
std::cerr << "Validating labels on CPU" << std::endl;
return std::all_of(linalg::cbegin(label), linalg::cend(label), return std::all_of(linalg::cbegin(label), linalg::cend(label),
[](float y) -> bool { return Loss::CheckLabel(y); }); [](float y) -> bool {
if (!std::isfinite(y)) {
std::cerr << "Non-finite label value found: " << y << std::endl;
return false;
}
return Loss::CheckLabel(y);
});
}, },
[&] { [&] {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
std::cerr << "Validating labels on GPU" << std::endl;
auto cuctx = ctx_->CUDACtx(); auto cuctx = ctx_->CUDACtx();
auto it = dh::MakeTransformIterator<bool>( auto it = dh::MakeTransformIterator<bool>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool { thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool {
auto [m, n] = linalg::UnravelIndex(i, label.Shape()); auto [m, n] = linalg::UnravelIndex(i, label.Shape());
return Loss::CheckLabel(label(m, n)); float y = label(m, n);
if (!isfinite(y)) {
printf("Non-finite label value found on GPU: %f\n", y);
return false;
}
return Loss::CheckLabel(y);
}); });
return dh::Reduce(cuctx->CTP(), it, it + label.Size(), true, thrust::logical_and<>{}); return dh::Reduce(cuctx->CTP(), it, it + label.Size(), true, thrust::logical_and<>{});
#else #else
std::cerr << "GPU support not enabled" << std::endl;
common::AssertGPUSupport(); common::AssertGPUSupport();
return false; return false;
#endif // defined(XGBOOST_USE_CUDA) #endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
}); });
} catch (const std::exception& e) {
std::cerr << "Exception during label validation: " << e.what() << std::endl;
valid = false;
}
std::cerr << "Label validation result: " << (valid ? "Valid" : "Invalid") << std::endl;
if (!valid) { if (!valid) {
std::cerr << "Invalid labels detected. Error message: " << Loss::LabelErrorMsg() << std::endl;
LOG(FATAL) << Loss::LabelErrorMsg(); LOG(FATAL) << Loss::LabelErrorMsg();
} }
}
std::cerr << "Exiting ValidateLabel function" << std::endl;
}
// 0 - scale_pos_weight, 1 - is_null_weight // 0 - scale_pos_weight, 1 - is_null_weight
RegLossObj(): additional_input_(2) {} RegLossObj(): additional_input_(2) {}