/** * Copyright 2023 by XGBoost Contributors */ #include "test_lambdarank_obj.h" #include // for Test, Message, TestPartResult, CmpHel... #include // for sort #include // for size_t #include // for initializer_list #include // for map #include // for unique_ptr, shared_ptr, make_shared #include // for iota #include // for char_traits, basic_string, string #include // for vector #include "../../../src/common/ranking_utils.h" // for NDCGCache, LambdaRankParam #include "../helpers.h" // for CheckRankingObjFunction, CheckConfigReload #include "xgboost/base.h" // for GradientPair, bst_group_t, Args #include "xgboost/context.h" // for Context #include "xgboost/data.h" // for MetaInfo, DMatrix #include "xgboost/host_device_vector.h" // for HostDeviceVector #include "xgboost/linalg.h" // for Tensor, All, TensorView #include "xgboost/objective.h" // for ObjFunction #include "xgboost/span.h" // for Span namespace xgboost::obj { TEST(LambdaRank, NDCGJsonIO) { Context ctx; TestNDCGJsonIO(&ctx); } void TestNDCGGPair(Context const* ctx) { { std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)}; obj->Configure(Args{{"lambdarank_pair_method", "topk"}}); CheckConfigReload(obj, "rank:ndcg"); // No gain in swapping 2 documents. CheckRankingObjFunction(obj, {1, 1, 1, 1}, {1, 1, 1, 1}, {1.0f, 1.0f}, {0, 2, 4}, {0.0f, -0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f, 0.0f}); } { std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)}; obj->Configure(Args{{"lambdarank_pair_method", "topk"}}); // Test with setting sample weight to second query group CheckRankingObjFunction(obj, {0, 0.1f, 0, 0.1f}, {0, 1, 0, 1}, {2.0f, 0.0f}, {0, 2, 4}, {2.06611f, -2.06611f, 0.0f, 0.0f}, {2.169331f, 2.169331f, 0.0f, 0.0f}); CheckRankingObjFunction(obj, {0, 0.1f, 0, 0.1f}, {0, 1, 0, 1}, {2.0f, 2.0f}, {0, 2, 4}, {2.06611f, -2.06611f, 2.06611f, -2.06611f}, {2.169331f, 2.169331f, 2.169331f, 2.169331f}); } std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)}; obj->Configure(Args{{"lambdarank_pair_method", "topk"}}); HostDeviceVector predts{0, 1, 0, 1}; MetaInfo info; info.labels = linalg::Tensor{{0, 1, 0, 1}, {4, 1}, ctx->Device()}; info.group_ptr_ = {0, 2, 4}; info.num_row_ = 4; linalg::Matrix gpairs; obj->GetGradient(predts, info, 0, &gpairs); ASSERT_EQ(gpairs.Size(), predts.Size()); { predts = {1, 0, 1, 0}; linalg::Matrix gpairs; obj->GetGradient(predts, info, 0, &gpairs); for (std::size_t i = 0; i < gpairs.Size(); ++i) { ASSERT_GT(gpairs.HostView()(i).GetHess(), 0); } ASSERT_LT(gpairs.HostView()(1).GetGrad(), 0); ASSERT_LT(gpairs.HostView()(3).GetGrad(), 0); ASSERT_GT(gpairs.HostView()(0).GetGrad(), 0); ASSERT_GT(gpairs.HostView()(2).GetGrad(), 0); info.weights_ = {2, 3}; linalg::Matrix weighted_gpairs; obj->GetGradient(predts, info, 0, &weighted_gpairs); auto const& h_gpairs = gpairs.HostView(); auto const& h_weighted_gpairs = weighted_gpairs.HostView(); for (size_t i : {0ul, 1ul}) { ASSERT_FLOAT_EQ(h_weighted_gpairs(i).GetGrad(), h_gpairs(i).GetGrad() * 2.0f); ASSERT_FLOAT_EQ(h_weighted_gpairs(i).GetHess(), h_gpairs(i).GetHess() * 2.0f); } for (size_t i : {2ul, 3ul}) { ASSERT_FLOAT_EQ(h_weighted_gpairs(i).GetGrad(), h_gpairs(i).GetGrad() * 3.0f); ASSERT_FLOAT_EQ(h_weighted_gpairs(i).GetHess(), h_gpairs(i).GetHess() * 3.0f); } } ASSERT_NO_THROW(obj->DefaultEvalMetric()); } TEST(LambdaRank, NDCGGPair) { Context ctx; TestNDCGGPair(&ctx); } void TestUnbiasedNDCG(Context const* ctx) { std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)}; obj->Configure(Args{{"lambdarank_pair_method", "topk"}, {"lambdarank_unbiased", "true"}, {"lambdarank_bias_norm", "0"}}); std::shared_ptr p_fmat{RandomDataGenerator{10, 1, 0.0f}.GenerateDMatrix(true, false, 2)}; auto h_label = p_fmat->Info().labels.HostView().Values(); // Move clicked samples to the beginning. std::sort(h_label.begin(), h_label.end(), std::greater<>{}); HostDeviceVector predt(p_fmat->Info().num_row_, 1.0f); linalg::Matrix out_gpair; obj->GetGradient(predt, p_fmat->Info(), 0, &out_gpair); Json config{Object{}}; obj->SaveConfig(&config); auto ti_plus = get(config["ti+"]); ASSERT_FLOAT_EQ(ti_plus[0], 1.0); // bias is non-increasing when prediction is constant. (constant cost on swapping documents) for (std::size_t i = 1; i < ti_plus.size(); ++i) { ASSERT_LE(ti_plus[i], ti_plus[i - 1]); } auto tj_minus = get(config["tj-"]); ASSERT_FLOAT_EQ(tj_minus[0], 1.0); } TEST(LambdaRank, UnbiasedNDCG) { Context ctx; TestUnbiasedNDCG(&ctx); } void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector* out_predt) { out_predt->SetDevice(ctx->Device()); MetaInfo& info = *out_info; info.num_row_ = 128; info.labels.ModifyInplace([&](HostDeviceVector* data, common::Span shape) { shape[0] = info.num_row_; shape[1] = 1; auto& h_data = data->HostVector(); h_data.resize(shape[0]); for (std::size_t i = 0; i < h_data.size(); ++i) { h_data[i] = i % 2; } }); std::vector predt(info.num_row_); std::iota(predt.rbegin(), predt.rend(), 0.0f); out_predt->HostVector() = predt; } TEST(LambdaRank, MakePair) { Context ctx; MetaInfo info; HostDeviceVector predt; InitMakePairTest(&ctx, &info, &predt); ltr::LambdaRankParam param; param.UpdateAllowUnknown(Args{{"lambdarank_pair_method", "topk"}}); ASSERT_TRUE(param.HasTruncation()); std::shared_ptr p_cache = std::make_shared(&ctx, info, param); auto const& h_predt = predt.ConstHostVector(); { auto rank_idx = p_cache->SortedIdx(&ctx, h_predt); for (std::size_t i = 0; i < h_predt.size(); ++i) { ASSERT_EQ(rank_idx[i], static_cast(*(h_predt.crbegin() + i))); } std::int32_t n_pairs{0}; MakePairs(&ctx, 0, p_cache, 0, info.labels.HostView().Slice(linalg::All(), 0), rank_idx, [&](auto i, auto j) { ASSERT_GT(j, i); ASSERT_LT(i, p_cache->Param().NumPair()); ++n_pairs; }); ASSERT_EQ(n_pairs, 3568); } auto const h_label = info.labels.HostView(); { param.UpdateAllowUnknown(Args{{"lambdarank_pair_method", "mean"}}); auto p_cache = std::make_shared(&ctx, info, param); ASSERT_FALSE(param.HasTruncation()); std::int32_t n_pairs = 0; auto rank_idx = p_cache->SortedIdx(&ctx, h_predt); MakePairs(&ctx, 0, p_cache, 0, info.labels.HostView().Slice(linalg::All(), 0), rank_idx, [&](auto i, auto j) { ++n_pairs; // Not in the same bucket ASSERT_NE(h_label(rank_idx[i]), h_label(rank_idx[j])); }); ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair()); } { param.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "2"}}); auto p_cache = std::make_shared(&ctx, info, param); auto rank_idx = p_cache->SortedIdx(&ctx, h_predt); std::int32_t n_pairs = 0; MakePairs(&ctx, 0, p_cache, 0, info.labels.HostView().Slice(linalg::All(), 0), rank_idx, [&](auto i, auto j) { ++n_pairs; // Not in the same bucket ASSERT_NE(h_label(rank_idx[i]), h_label(rank_idx[j])); }); ASSERT_EQ(param.NumPair(), 2); ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair()); } } void TestMAPStat(Context const* ctx) { auto p_fmat = EmptyDMatrix(); MetaInfo& info = p_fmat->Info(); ltr::LambdaRankParam param; param.UpdateAllowUnknown(Args{}); { std::vector h_data{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f}; info.labels.Reshape(h_data.size(), 1); info.labels.Data()->HostVector() = h_data; info.num_row_ = h_data.size(); HostDeviceVector predt; auto& h_predt = predt.HostVector(); h_predt.resize(h_data.size()); std::iota(h_predt.rbegin(), h_predt.rend(), 0.0f); auto p_cache = std::make_shared(ctx, info, param); predt.SetDevice(ctx->Device()); auto rank_idx = p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan()); if (ctx->IsCPU()) { obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx, p_cache); } else { obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache); } Context cpu_ctx; auto n_rel = p_cache->NumRelevant(&cpu_ctx); auto acc = p_cache->Acc(&cpu_ctx); ASSERT_EQ(n_rel[0], 1.0); ASSERT_EQ(acc[0], 1.0); ASSERT_EQ(n_rel.back(), h_data.size() - 1.0); ASSERT_NEAR(acc.back(), 1.95 + (1.0 / h_data.size()), kRtEps); } { info.labels.Reshape(16); auto& h_label = info.labels.Data()->HostVector(); info.group_ptr_ = {0, 8, 16}; info.num_row_ = info.labels.Shape(0); std::fill_n(h_label.begin(), 8, 1.0f); std::fill_n(h_label.begin() + 8, 8, 0.0f); HostDeviceVector predt; auto& h_predt = predt.HostVector(); h_predt.resize(h_label.size()); std::iota(h_predt.rbegin(), h_predt.rbegin() + 8, 0.0f); std::iota(h_predt.rbegin() + 8, h_predt.rend(), 0.0f); auto p_cache = std::make_shared(ctx, info, param); predt.SetDevice(ctx->Device()); auto rank_idx = p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan()); if (ctx->IsCPU()) { obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx, p_cache); } else { obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache); } Context cpu_ctx; auto n_rel = p_cache->NumRelevant(&cpu_ctx); ASSERT_EQ(n_rel[7], 8); // first group ASSERT_EQ(n_rel.back(), 0); // second group } } TEST(LambdaRank, MAPStat) { Context ctx; TestMAPStat(&ctx); } void TestMAPGPair(Context const* ctx) { std::unique_ptr obj{xgboost::ObjFunction::Create("rank:map", ctx)}; Args args; obj->Configure(args); CheckConfigReload(obj, "rank:map"); CheckRankingObjFunction(obj, // obj {0, 0.1f, 0, 0.1f}, // score {0, 1, 0, 1}, // label {2.0f, 2.0f}, // weight {0, 2, 4}, // group {1.2054923f, -1.2054923f, 1.2054923f, -1.2054923f}, // out grad {1.2657166f, 1.2657166f, 1.2657166f, 1.2657166f}); // disable the second query group with 0 weight CheckRankingObjFunction(obj, // obj {0, 0.1f, 0, 0.1f}, // score {0, 1, 0, 1}, // label {2.0f, 0.0f}, // weight {0, 2, 4}, // group {1.2054923f, -1.2054923f, .0f, .0f}, // out grad {1.2657166f, 1.2657166f, .0f, .0f}); } TEST(LambdaRank, MAPGPair) { Context ctx; TestMAPGPair(&ctx); } void TestPairWiseGPair(Context const* ctx) { std::unique_ptr obj{xgboost::ObjFunction::Create("rank:pairwise", ctx)}; Args args; obj->Configure(args); args.emplace_back("lambdarank_unbiased", "true"); } TEST(LambdaRank, Pairwise) { Context ctx; TestPairWiseGPair(&ctx); } } // namespace xgboost::obj