- Rework the precision metric for both CPU and GPU. - Mention it in the document. - Cleanup old support code for GPU ranking metric. - Deterministic GPU implementation. * Drop support for classification. * type. * use batch shape. * lint. * cpu build. * cpu build. * lint. * Tests. * Fix. * Cleanup error message.
171 lines
5.8 KiB
C++
171 lines
5.8 KiB
C++
/**
|
|
* Copyright 2023 by XGBoost contributors
|
|
*/
|
|
#include "ranking_utils.h"
|
|
|
|
#include <algorithm> // for copy_n, max, min, none_of, all_of
|
|
#include <cstddef> // for size_t
|
|
#include <cstdio> // for sscanf
|
|
#include <functional> // for greater
|
|
#include <string> // for char_traits, string
|
|
|
|
#include "algorithm.h" // for ArgSort
|
|
#include "linalg_op.h" // for cbegin, cend
|
|
#include "optional_weight.h" // for MakeOptionalWeights
|
|
#include "threading_utils.h" // for ParallelFor
|
|
#include "xgboost/base.h" // for bst_group_t
|
|
#include "xgboost/context.h" // for Context
|
|
#include "xgboost/data.h" // for MetaInfo
|
|
#include "xgboost/linalg.h" // for All, TensorView, Range
|
|
#include "xgboost/logging.h" // for CHECK_EQ
|
|
|
|
namespace xgboost::ltr {
|
|
void RankingCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
|
|
if (info.group_ptr_.empty()) {
|
|
group_ptr_.Resize(2, 0);
|
|
group_ptr_.HostVector()[1] = info.num_row_;
|
|
} else {
|
|
group_ptr_.HostVector() = info.group_ptr_;
|
|
}
|
|
|
|
auto const& gptr = group_ptr_.ConstHostVector();
|
|
for (std::size_t i = 1; i < gptr.size(); ++i) {
|
|
std::size_t n = gptr[i] - gptr[i - 1];
|
|
max_group_size_ = std::max(max_group_size_, n);
|
|
}
|
|
|
|
double sum_weights = 0;
|
|
auto n_groups = Groups();
|
|
auto weight = common::MakeOptionalWeights(ctx, info.weights_);
|
|
for (bst_omp_uint k = 0; k < n_groups; ++k) {
|
|
sum_weights += weight[k];
|
|
}
|
|
weight_norm_ = static_cast<double>(n_groups) / sum_weights;
|
|
}
|
|
|
|
common::Span<std::size_t const> RankingCache::MakeRankOnCPU(Context const* ctx,
|
|
common::Span<float const> predt) {
|
|
auto gptr = this->DataGroupPtr(ctx);
|
|
auto rank = this->sorted_idx_cache_.HostSpan();
|
|
CHECK_EQ(rank.size(), predt.size());
|
|
|
|
common::ParallelFor(this->Groups(), ctx->Threads(), [&](auto g) {
|
|
auto cnt = gptr[g + 1] - gptr[g];
|
|
auto g_predt = predt.subspan(gptr[g], cnt);
|
|
auto g_rank = rank.subspan(gptr[g], cnt);
|
|
auto sorted_idx = common::ArgSort<std::size_t>(
|
|
ctx, g_predt.data(), g_predt.data() + g_predt.size(), std::greater<>{});
|
|
CHECK_EQ(g_rank.size(), sorted_idx.size());
|
|
std::copy_n(sorted_idx.data(), sorted_idx.size(), g_rank.data());
|
|
});
|
|
|
|
return rank;
|
|
}
|
|
|
|
#if !defined(XGBOOST_USE_CUDA)
|
|
void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
|
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,
|
|
common::Span<float const>) {
|
|
common::AssertGPUSupport();
|
|
return {};
|
|
}
|
|
#endif // !defined()
|
|
|
|
void NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
|
|
auto const h_group_ptr = this->DataGroupPtr(ctx);
|
|
|
|
discounts_.Resize(MaxGroupSize(), 0);
|
|
auto& h_discounts = discounts_.HostVector();
|
|
for (std::size_t i = 0; i < MaxGroupSize(); ++i) {
|
|
h_discounts[i] = CalcDCGDiscount(i);
|
|
}
|
|
|
|
auto n_groups = h_group_ptr.size() - 1;
|
|
auto h_labels = info.labels.HostView().Slice(linalg::All(), 0);
|
|
|
|
CheckNDCGLabels(this->Param(), h_labels,
|
|
[](auto beg, auto end, auto op) { return std::none_of(beg, end, op); });
|
|
|
|
inv_idcg_.Reshape(n_groups);
|
|
auto h_inv_idcg = inv_idcg_.HostView();
|
|
std::size_t topk = this->Param().TopK();
|
|
auto const exp_gain = this->Param().ndcg_exp_gain;
|
|
|
|
common::ParallelFor(n_groups, ctx->Threads(), [&](auto g) {
|
|
auto g_labels = h_labels.Slice(linalg::Range(h_group_ptr[g], h_group_ptr[g + 1]));
|
|
auto sorted_idx = common::ArgSort<std::size_t>(ctx, linalg::cbegin(g_labels),
|
|
linalg::cend(g_labels), std::greater<>{});
|
|
|
|
double idcg{0.0};
|
|
for (std::size_t i = 0; i < std::min(g_labels.Size(), topk); ++i) {
|
|
if (exp_gain) {
|
|
idcg += h_discounts[i] * CalcDCGGain(g_labels(sorted_idx[i]));
|
|
} else {
|
|
idcg += h_discounts[i] * g_labels(sorted_idx[i]);
|
|
}
|
|
}
|
|
h_inv_idcg(g) = CalcInvIDCG(idcg);
|
|
});
|
|
}
|
|
|
|
#if !defined(XGBOOST_USE_CUDA)
|
|
void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
|
#endif // !defined(XGBOOST_USE_CUDA)
|
|
|
|
DMLC_REGISTER_PARAMETER(LambdaRankParam);
|
|
|
|
void PreCache::InitOnCPU(Context const*, MetaInfo const& info) {
|
|
auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0);
|
|
CheckPreLabels("pre", h_label,
|
|
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
|
|
}
|
|
|
|
#if !defined(XGBOOST_USE_CUDA)
|
|
void PreCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
|
#endif // !defined(XGBOOST_USE_CUDA)
|
|
|
|
void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {
|
|
auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0);
|
|
CheckPreLabels("map", h_label,
|
|
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
|
|
}
|
|
|
|
#if !defined(XGBOOST_USE_CUDA)
|
|
void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
|
#endif // !defined(XGBOOST_USE_CUDA)
|
|
|
|
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
|
|
std::string out_name;
|
|
if (!param.empty()) {
|
|
std::ostringstream os;
|
|
if (std::sscanf(param.c_str(), "%u[-]?", topn) == 1) {
|
|
os << name << '@' << param;
|
|
out_name = os.str();
|
|
} else {
|
|
os << name << param;
|
|
out_name = os.str();
|
|
}
|
|
if (*param.crbegin() == '-') {
|
|
*minus = true;
|
|
}
|
|
} else {
|
|
out_name = name.c_str();
|
|
}
|
|
return out_name;
|
|
}
|
|
|
|
std::string MakeMetricName(StringView name, position_t topn, bool minus) {
|
|
std::ostringstream ss;
|
|
if (topn == LambdaRankParam::NotSet()) {
|
|
ss << name;
|
|
} else {
|
|
ss << name << "@" << topn;
|
|
}
|
|
if (minus) {
|
|
ss << "-";
|
|
}
|
|
std::string out_name = ss.str();
|
|
return out_name;
|
|
}
|
|
} // namespace xgboost::ltr
|