validate label logging

This commit is contained in:
Hendrik Groove 2024-10-20 17:32:22 +02:00
parent 60a3bea7c6
commit fd95be5f20

View File

@ -36,6 +36,9 @@
#include "xgboost/tree_model.h" // RegTree
#include "regression_param.h"
#include <iostream>
#include <cmath>
#include <exception>
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
#include "../common/cuda_context.cuh" // for CUDAContext
@ -63,31 +66,63 @@ class RegLossObj : public FitIntercept {
HostDeviceVector<float> additional_input_;
public:
void ValidateLabel(MetaInfo const& info) {
auto label = info.labels.View(ctx_->Device());
auto valid = ctx_->DispatchDevice(
[&] {
return std::all_of(linalg::cbegin(label), linalg::cend(label),
[](float y) -> bool { return Loss::CheckLabel(y); });
},
[&] {
void ValidateLabel(MetaInfo const& info) {
std::cerr << "Entering ValidateLabel function" << std::endl;
std::cerr << "Number of rows: " << info.num_row_ << std::endl;
std::cerr << "Label shape: " << info.labels.Shape()[0] << "x" << info.labels.Shape()[1] << std::endl;
auto label = info.labels.View(ctx_->Device());
std::cerr << "Label device: " << (ctx_->Device().IsCUDA() ? "GPU" : "CPU") << std::endl;
bool valid = false;
try {
valid = ctx_->DispatchDevice(
[&] {
std::cerr << "Validating labels on CPU" << std::endl;
return std::all_of(linalg::cbegin(label), linalg::cend(label),
[](float y) -> bool {
if (!std::isfinite(y)) {
std::cerr << "Non-finite label value found: " << y << std::endl;
return false;
}
return Loss::CheckLabel(y);
});
},
[&] {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
auto cuctx = ctx_->CUDACtx();
auto it = dh::MakeTransformIterator<bool>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool {
auto [m, n] = linalg::UnravelIndex(i, label.Shape());
return Loss::CheckLabel(label(m, n));
});
return dh::Reduce(cuctx->CTP(), it, it + label.Size(), true, thrust::logical_and<>{});
std::cerr << "Validating labels on GPU" << std::endl;
auto cuctx = ctx_->CUDACtx();
auto it = dh::MakeTransformIterator<bool>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool {
auto [m, n] = linalg::UnravelIndex(i, label.Shape());
float y = label(m, n);
if (!isfinite(y)) {
printf("Non-finite label value found on GPU: %f\n", y);
return false;
}
return Loss::CheckLabel(y);
});
return dh::Reduce(cuctx->CTP(), it, it + label.Size(), true, thrust::logical_and<>{});
#else
common::AssertGPUSupport();
return false;
#endif // defined(XGBOOST_USE_CUDA)
});
if (!valid) {
LOG(FATAL) << Loss::LabelErrorMsg();
}
std::cerr << "GPU support not enabled" << std::endl;
common::AssertGPUSupport();
return false;
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
});
} catch (const std::exception& e) {
std::cerr << "Exception during label validation: " << e.what() << std::endl;
valid = false;
}
std::cerr << "Label validation result: " << (valid ? "Valid" : "Invalid") << std::endl;
if (!valid) {
std::cerr << "Invalid labels detected. Error message: " << Loss::LabelErrorMsg() << std::endl;
LOG(FATAL) << Loss::LabelErrorMsg();
}
std::cerr << "Exiting ValidateLabel function" << std::endl;
}
// 0 - scale_pos_weight, 1 - is_null_weight
RegLossObj(): additional_input_(2) {}