/** * Copyright 2023, XGBoost contributors * * Vocabulary explanation: * * There are two different lists we need to handle in the objective, first is the list of * labels (relevance degree) provided by the user. Its order has no particular meaning * when bias estimation is NOT used. Another one is generated by our model, sorted index * based on prediction scores. `rank_high` refers to the position index of the model rank * list that is higher than `rank_low`, while `idx_high` refers to where does the * `rank_high` sample comes from. Simply put, `rank_high` indexes into the rank list * obtained from the model, while `idx_high` indexes into the user provided sample list. */ #ifndef XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_ #define XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_ #include // for min, max #include // for assert #include // for log, abs #include // for size_t #include // for greater #include // for shared_ptr #include // for minstd_rand, uniform_int_distribution #include // for vector #include "../common/algorithm.h" // for ArgSort #include "../common/math.h" // for Sigmoid #include "../common/ranking_utils.h" // for CalcDCGGain #include "../common/transform_iterator.h" // for MakeIndexTransformIter #include "xgboost/base.h" // for GradientPair, XGBOOST_DEVICE, kRtEps #include "xgboost/context.h" // for Context #include "xgboost/data.h" // for MetaInfo #include "xgboost/host_device_vector.h" // for HostDeviceVector #include "xgboost/linalg.h" // for VectorView, Vector #include "xgboost/logging.h" // for CHECK_EQ #include "xgboost/span.h" // for Span namespace xgboost::obj { double constexpr Eps64() { return 1e-16; } template XGBOOST_DEVICE double DeltaNDCG(float y_high, float y_low, std::size_t rank_high, std::size_t rank_low, double inv_IDCG, common::Span discount) { // Use rank_high instead of idx_high as we are calculating discount based on ranks // provided by the model. double gain_high = exp ? ltr::CalcDCGGain(y_high) : y_high; double discount_high = discount[rank_high]; double gain_low = exp ? ltr::CalcDCGGain(y_low) : y_low; double discount_low = discount[rank_low]; double original = gain_high * discount_high + gain_low * discount_low; double changed = gain_low * discount_high + gain_high * discount_low; double delta_NDCG = (original - changed) * inv_IDCG; assert(delta_NDCG >= -1.0); assert(delta_NDCG <= 1.0); return delta_NDCG; } XGBOOST_DEVICE inline double DeltaMAP(float y_high, float y_low, std::size_t rank_high, std::size_t rank_low, common::Span n_rel, common::Span acc) { double r_h = static_cast(rank_high) + 1.0; double r_l = static_cast(rank_low) + 1.0; double delta{0.0}; double n_total_relevances = n_rel.back(); assert(n_total_relevances > 0.0); auto m = n_rel[rank_low]; double n = n_rel[rank_high]; if (y_high < y_low) { auto a = m / r_l - (n + 1.0) / r_h; auto b = acc[rank_low - 1] - acc[rank_high]; delta = (a - b) / n_total_relevances; } else { auto a = n / r_h - m / r_l; auto b = acc[rank_low - 1] - acc[rank_high]; delta = (a + b) / n_total_relevances; } return delta; } template XGBOOST_DEVICE GradientPair LambdaGrad(linalg::VectorView labels, common::Span predts, common::Span sorted_idx, std::size_t rank_high, // higher index on the model rank list std::size_t rank_low, // lower index on the model rank list Delta delta, // function to calculate delta score linalg::VectorView t_plus, // input bias ratio linalg::VectorView t_minus, // input bias ratio double* p_cost) { assert(sorted_idx.size() > 0 && "Empty sorted idx for a group."); std::size_t idx_high = sorted_idx[rank_high]; std::size_t idx_low = sorted_idx[rank_low]; if (labels(idx_high) == labels(idx_low)) { *p_cost = 0; return {0.0f, 0.0f}; } auto best_score = predts[sorted_idx.front()]; auto worst_score = predts[sorted_idx.back()]; auto y_high = labels(idx_high); float s_high = predts[idx_high]; auto y_low = labels(idx_low); float s_low = predts[idx_low]; // Use double whenever possible as we are working on the exp space. double delta_score = std::abs(s_high - s_low); double const sigmoid = common::Sigmoid(s_high - s_low); // Change in metric score like \delta NDCG or \delta MAP double delta_metric = std::abs(delta(y_high, y_low, rank_high, rank_low)); if (best_score != worst_score) { delta_metric /= (delta_score + 0.01); } if (unbiased) { *p_cost = std::log(1.0 / (1.0 - sigmoid)) * delta_metric; } auto lambda_ij = (sigmoid - 1.0) * delta_metric; auto hessian_ij = std::max(sigmoid * (1.0 - sigmoid), Eps64()) * delta_metric * 2.0; auto k = t_plus.Size(); assert(t_minus.Size() == k && "Invalid size of position bias"); // We need to skip samples that exceed the maximum number of tracked positions, and // samples that have low probability and might bring us floating point issues. if (unbiased && idx_high < k && idx_low < k && t_minus(idx_low) >= Eps64() && t_plus(idx_high) >= Eps64()) { // The index should be ranks[idx_low], since we assume label is sorted, this reduces // to `idx_low`, which represents the position on the input list, as explained in the // file header. lambda_ij /= (t_plus(idx_high) * t_minus(idx_low)); hessian_ij /= (t_plus(idx_high) * t_minus(idx_low)); } auto pg = GradientPair{static_cast(lambda_ij), static_cast(hessian_ij)}; return pg; } XGBOOST_DEVICE inline GradientPair Repulse(GradientPair pg) { auto ng = GradientPair{-pg.GetGrad(), pg.GetHess()}; return ng; } namespace cuda_impl { void LambdaRankGetGradientNDCG(Context const* ctx, std::int32_t iter, HostDeviceVector const& preds, MetaInfo const& info, std::shared_ptr p_cache, linalg::VectorView t_plus, // input bias ratio linalg::VectorView t_minus, // input bias ratio linalg::VectorView li, linalg::VectorView lj, HostDeviceVector* out_gpair); /** * \brief Generate statistic for MAP used for calculating \Delta Z in lambda mart. */ void MAPStat(Context const* ctx, MetaInfo const& info, common::Span d_rank_idx, std::shared_ptr p_cache); void LambdaRankGetGradientMAP(Context const* ctx, std::int32_t iter, HostDeviceVector const& predt, MetaInfo const& info, std::shared_ptr p_cache, linalg::VectorView t_plus, // input bias ratio linalg::VectorView t_minus, // input bias ratio linalg::VectorView li, linalg::VectorView lj, HostDeviceVector* out_gpair); void LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter, HostDeviceVector const& predt, const MetaInfo& info, std::shared_ptr p_cache, linalg::VectorView ti_plus, // input bias ratio linalg::VectorView tj_minus, // input bias ratio linalg::VectorView li, linalg::VectorView lj, HostDeviceVector* out_gpair); void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView li_full, linalg::VectorView lj_full, linalg::Vector* p_ti_plus, linalg::Vector* p_tj_minus, linalg::Vector* p_li, linalg::Vector* p_lj, std::shared_ptr p_cache); } // namespace cuda_impl namespace cpu_impl { /** * \brief Generate statistic for MAP used for calculating \Delta Z in lambda mart. * * \param label Ground truth relevance label. * \param rank_idx Sorted index of prediction. * \param p_cache An initialized MAPCache. */ void MAPStat(Context const* ctx, linalg::VectorView label, common::Span rank_idx, std::shared_ptr p_cache); } // namespace cpu_impl /** * \param Construct pairs on CPU * * \tparam Op Functor for upgrading a pair of gradients. * * \param ctx The global context. * \param iter The boosting iteration. * \param cache ltr cache. * \param g The current query group * \param g_label label The labels for the current query group * \param g_rank Sorted index of model scores for the current query group. * \param op A callable that accepts two index for a pair of documents. The index is for * the ranked list (labels sorted according to model scores). */ template void MakePairs(Context const* ctx, std::int32_t iter, std::shared_ptr const cache, bst_group_t g, linalg::VectorView g_label, common::Span g_rank, Op op) { auto group_ptr = cache->DataGroupPtr(ctx); ltr::position_t cnt = group_ptr[g + 1] - group_ptr[g]; if (cache->Param().HasTruncation()) { for (std::size_t i = 0; i < std::min(cnt, cache->Param().NumPair()); ++i) { for (std::size_t j = i + 1; j < cnt; ++j) { op(i, j); } } } else { CHECK_EQ(g_rank.size(), g_label.Size()); std::minstd_rand rnd(iter); rnd.discard(g); // fixme(jiamingy): honor the global seed // sort label according to the rank list auto it = common::MakeIndexTransformIter( [&g_rank, &g_label](std::size_t idx) { return g_label(g_rank[idx]); }); std::vector y_sorted_idx = common::ArgSort(ctx, it, it + cnt, std::greater<>{}); // permutation iterator to get the original label auto rev_it = common::MakeIndexTransformIter( [&](std::size_t idx) { return g_label(g_rank[y_sorted_idx[idx]]); }); for (std::size_t i = 0; i < cnt;) { std::size_t j = i + 1; // find the bucket boundary while (j < cnt && rev_it[i] == rev_it[j]) { ++j; } // Bucket [i,j), construct n_samples pairs for each sample inside the bucket with // another sample outside the bucket. // // n elements left to the bucket, and n elements right to the bucket std::size_t n_lefts = i, n_rights = static_cast(cnt - j); if (n_lefts + n_rights == 0) { i = j; continue; } auto n_samples = cache->Param().NumPair(); // for each pair specifed by the user while (n_samples--) { // for each sample in the bucket for (std::size_t pair_idx = i; pair_idx < j; ++pair_idx) { std::size_t ridx = std::uniform_int_distribution( static_cast(0), n_lefts + n_rights - 1)(rnd); if (ridx >= n_lefts) { ridx = ridx - i + j; // shift to the right of the bucket } // index that points to the rank list. auto idx0 = y_sorted_idx[pair_idx]; auto idx1 = y_sorted_idx[ridx]; op(idx0, idx1); } } i = j; } } } } // namespace xgboost::obj #endif // XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_