Rework MAP and Pairwise for LTR. (#9075)

This commit is contained in:
Jiaming Yuan
2023-04-28 02:39:12 +08:00
committed by GitHub
parent 0e470ef606
commit e206b899ef
19 changed files with 612 additions and 1135 deletions

View File

@@ -223,4 +223,125 @@ TEST(LambdaRank, MakePair) {
ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair());
}
}
void TestMAPStat(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
ltr::LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
{
std::vector<float> h_data{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f};
info.labels.Reshape(h_data.size(), 1);
info.labels.Data()->HostVector() = h_data;
info.num_row_ = h_data.size();
HostDeviceVector<float> predt;
auto& h_predt = predt.HostVector();
h_predt.resize(h_data.size());
std::iota(h_predt.rbegin(), h_predt.rend(), 0.0f);
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
if (ctx->IsCPU()) {
obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,
p_cache);
} else {
obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache);
}
Context cpu_ctx;
auto n_rel = p_cache->NumRelevant(&cpu_ctx);
auto acc = p_cache->Acc(&cpu_ctx);
ASSERT_EQ(n_rel[0], 1.0);
ASSERT_EQ(acc[0], 1.0);
ASSERT_EQ(n_rel.back(), h_data.size() - 1.0);
ASSERT_NEAR(acc.back(), 1.95 + (1.0 / h_data.size()), kRtEps);
}
{
info.labels.Reshape(16);
auto& h_label = info.labels.Data()->HostVector();
info.group_ptr_ = {0, 8, 16};
info.num_row_ = info.labels.Shape(0);
std::fill_n(h_label.begin(), 8, 1.0f);
std::fill_n(h_label.begin() + 8, 8, 0.0f);
HostDeviceVector<float> predt;
auto& h_predt = predt.HostVector();
h_predt.resize(h_label.size());
std::iota(h_predt.rbegin(), h_predt.rbegin() + 8, 0.0f);
std::iota(h_predt.rbegin() + 8, h_predt.rend(), 0.0f);
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
if (ctx->IsCPU()) {
obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,
p_cache);
} else {
obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache);
}
Context cpu_ctx;
auto n_rel = p_cache->NumRelevant(&cpu_ctx);
ASSERT_EQ(n_rel[7], 8); // first group
ASSERT_EQ(n_rel.back(), 0); // second group
}
}
TEST(LambdaRank, MAPStat) {
Context ctx;
TestMAPStat(&ctx);
}
void TestMAPGPair(Context const* ctx) {
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:map", ctx)};
Args args;
obj->Configure(args);
CheckConfigReload(obj, "rank:map");
CheckRankingObjFunction(obj, // obj
{0, 0.1f, 0, 0.1f}, // score
{0, 1, 0, 1}, // label
{2.0f, 2.0f}, // weight
{0, 2, 4}, // group
{1.2054923f, -1.2054923f, 1.2054923f, -1.2054923f}, // out grad
{1.2657166f, 1.2657166f, 1.2657166f, 1.2657166f});
// disable the second query group with 0 weight
CheckRankingObjFunction(obj, // obj
{0, 0.1f, 0, 0.1f}, // score
{0, 1, 0, 1}, // label
{2.0f, 0.0f}, // weight
{0, 2, 4}, // group
{1.2054923f, -1.2054923f, .0f, .0f}, // out grad
{1.2657166f, 1.2657166f, .0f, .0f});
}
TEST(LambdaRank, MAPGPair) {
Context ctx;
TestMAPGPair(&ctx);
}
void TestPairWiseGPair(Context const* ctx) {
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:pairwise", ctx)};
Args args;
obj->Configure(args);
args.emplace_back("lambdarank_unbiased", "true");
}
TEST(LambdaRank, Pairwise) {
Context ctx;
TestPairWiseGPair(&ctx);
}
} // namespace xgboost::obj

View File

@@ -18,6 +18,12 @@ TEST(LambdaRank, GPUNDCGJsonIO) {
TestNDCGJsonIO(&ctx);
}
TEST(LambdaRank, GPUMAPStat) {
Context ctx;
ctx.gpu_id = 0;
TestMAPStat(&ctx);
}
TEST(LambdaRank, GPUNDCGGPair) {
Context ctx;
ctx.gpu_id = 0;
@@ -153,4 +159,10 @@ TEST(LambdaRank, RankItemCountOnRight) {
RankItemCountImpl(sorted_items, wrapper, 1, static_cast<uint32_t>(1));
RankItemCountImpl(sorted_items, wrapper, 0, static_cast<uint32_t>(0));
}
TEST(LambdaRank, GPUMAPGPair) {
Context ctx;
ctx.gpu_id = 0;
TestMAPGPair(&ctx);
}
} // namespace xgboost::obj

View File

@@ -18,6 +18,8 @@
#include "../helpers.h" // for EmptyDMatrix
namespace xgboost::obj {
void TestMAPStat(Context const* ctx);
inline void TestNDCGJsonIO(Context const* ctx) {
std::unique_ptr<xgboost::ObjFunction> obj{ObjFunction::Create("rank:ndcg", ctx)};
@@ -37,6 +39,8 @@ void TestNDCGGPair(Context const* ctx);
void TestUnbiasedNDCG(Context const* ctx);
void TestMAPGPair(Context const* ctx);
/**
* \brief Initialize test data for make pair tests.
*/

View File

@@ -1,83 +0,0 @@
// Copyright by Contributors
#include <xgboost/context.h>
#include <xgboost/json.h>
#include <xgboost/objective.h>
#include "../helpers.h"
namespace xgboost {
TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPair)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:pairwise", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "rank:pairwise");
// Test with setting sample weight to second query group
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 0.0f},
{0, 2, 4},
{1.9f, -1.9f, 0.0f, 0.0f},
{1.995f, 1.995f, 0.0f, 0.0f});
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{1.0f, 1.0f},
{0, 2, 4},
{0.95f, -0.95f, 0.95f, -0.95f},
{0.9975f, 0.9975f, 0.9975f, 0.9975f});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPairSameLabels)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("rank:pairwise", &ctx)};
obj->Configure(args);
// No computation of gradient/hessian, as there is no diversity in labels
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{1, 1, 1, 1},
{2.0f, 0.0f},
{0, 2, 4},
{0.0f, 0.0f, 0.0f, 0.0f},
{0.0f, 0.0f, 0.0f, 0.0f});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(MAPRankingGPair)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:map", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "rank:map");
// Test with setting sample weight to second query group
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 0.0f},
{0, 2, 4},
{0.95f, -0.95f, 0.0f, 0.0f},
{0.9975f, 0.9975f, 0.0f, 0.0f});
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{1.0f, 1.0f},
{0, 2, 4},
{0.475f, -0.475f, 0.475f, -0.475f},
{0.4988f, 0.4988f, 0.4988f, 0.4988f});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
} // namespace xgboost

View File

@@ -1,175 +0,0 @@
/*!
* Copyright 2019-2021 by XGBoost Contributors
*/
#include <thrust/host_vector.h>
#include "test_ranking_obj.cc"
#include "../../../src/objective/rank_obj.cu"
namespace xgboost {
template <typename T = uint32_t, typename Comparator = thrust::greater<T>>
std::unique_ptr<dh::SegmentSorter<T>>
RankSegmentSorterTestImpl(const std::vector<uint32_t> &group_indices,
const std::vector<T> &hlabels,
const std::vector<T> &expected_sorted_hlabels,
const std::vector<uint32_t> &expected_orig_pos
) {
std::unique_ptr<dh::SegmentSorter<T>> seg_sorter_ptr(new dh::SegmentSorter<T>);
dh::SegmentSorter<T> &seg_sorter(*seg_sorter_ptr);
// Create a bunch of unsorted labels on the device and sort it via the segment sorter
dh::device_vector<T> dlabels(hlabels);
seg_sorter.SortItems(dlabels.data().get(), dlabels.size(), group_indices, Comparator());
auto num_items = seg_sorter.GetItemsSpan().size();
EXPECT_EQ(num_items, group_indices.back());
EXPECT_EQ(seg_sorter.GetNumGroups(), group_indices.size() - 1);
// Check the labels
dh::device_vector<T> sorted_dlabels(num_items);
sorted_dlabels.assign(dh::tcbegin(seg_sorter.GetItemsSpan()),
dh::tcend(seg_sorter.GetItemsSpan()));
thrust::host_vector<T> sorted_hlabels(sorted_dlabels);
EXPECT_EQ(expected_sorted_hlabels, sorted_hlabels);
// Check the indices
dh::device_vector<uint32_t> dorig_pos(num_items);
dorig_pos.assign(dh::tcbegin(seg_sorter.GetOriginalPositionsSpan()),
dh::tcend(seg_sorter.GetOriginalPositionsSpan()));
dh::device_vector<uint32_t> horig_pos(dorig_pos);
EXPECT_EQ(expected_orig_pos, horig_pos);
return seg_sorter_ptr;
}
TEST(Objective, RankSegmentSorterTest) {
RankSegmentSorterTestImpl({0, 2, 4, 7, 10, 14, 18, 22, 26}, // Groups
{1, 1, // Labels
1, 2,
3, 2, 1,
1, 2, 1,
1, 3, 4, 2,
1, 2, 1, 1,
1, 2, 2, 3,
3, 3, 1, 2},
{1, 1, // Expected sorted labels
2, 1,
3, 2, 1,
2, 1, 1,
4, 3, 2, 1,
2, 1, 1, 1,
3, 2, 2, 1,
3, 3, 2, 1},
{0, 1, // Expected original positions
3, 2,
4, 5, 6,
8, 7, 9,
12, 11, 13, 10,
15, 14, 16, 17,
21, 19, 20, 18,
22, 23, 25, 24});
}
TEST(Objective, RankSegmentSorterSingleGroupTest) {
RankSegmentSorterTestImpl({0, 7}, // Groups
{6, 1, 4, 3, 0, 5, 2}, // Labels
{6, 5, 4, 3, 2, 1, 0}, // Expected sorted labels
{0, 5, 2, 3, 6, 1, 4}); // Expected original positions
}
TEST(Objective, RankSegmentSorterAscendingTest) {
RankSegmentSorterTestImpl<uint32_t, thrust::less<uint32_t>>(
{0, 4, 7}, // Groups
{3, 1, 4, 2, // Labels
6, 5, 7},
{1, 2, 3, 4, // Expected sorted labels
5, 6, 7},
{1, 3, 0, 2, // Expected original positions
5, 4, 6});
}
TEST(Objective, IndexableSortedItemsTest) {
std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels
7.8f, 5.01f, 6.96f,
10.3f, 8.7f, 11.4f, 9.45f, 11.4f};
dh::device_vector<bst_float> dlabels(hlabels);
auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
{0, 4, 7, 12}, // Groups
hlabels,
{4.4f, 3.1f, 2.3f, 1.2f, // Expected sorted labels
7.8f, 6.96f, 5.01f,
11.4f, 11.4f, 10.3f, 9.45f, 8.7f},
{3, 0, 2, 1, // Expected original positions
4, 6, 5,
9, 11, 7, 10, 8});
segment_label_sorter->CreateIndexableSortedPositions();
std::vector<uint32_t> sorted_indices(segment_label_sorter->GetNumItems());
dh::CopyDeviceSpanToVector(&sorted_indices,
segment_label_sorter->GetIndexableSortedPositionsSpan());
std::vector<uint32_t> expected_sorted_indices = {
1, 3, 2, 0,
4, 6, 5,
9, 11, 7, 10, 8};
EXPECT_EQ(expected_sorted_indices, sorted_indices);
}
TEST(Objective, ComputeAndCompareMAPStatsTest) {
std::vector<float> hlabels = {3.1f, 0.0f, 2.3f, 4.4f, // Labels
0.0f, 5.01f, 0.0f,
10.3f, 0.0f, 11.4f, 9.45f, 11.4f};
dh::device_vector<bst_float> dlabels(hlabels);
auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
{0, 4, 7, 12}, // Groups
hlabels,
{4.4f, 3.1f, 2.3f, 0.0f, // Expected sorted labels
5.01f, 0.0f, 0.0f,
11.4f, 11.4f, 10.3f, 9.45f, 0.0f},
{3, 0, 2, 1, // Expected original positions
5, 4, 6,
9, 11, 7, 10, 8});
// Create MAP stats on the device first using the objective
std::vector<bst_float> hpreds{-9.78f, 24.367f, 0.908f, -11.47f,
-1.03f, -2.79f, -3.1f,
104.22f, 103.1f, -101.7f, 100.5f, 45.1f};
dh::device_vector<bst_float> dpreds(hpreds);
xgboost::obj::MAPLambdaWeightComputer map_lw_computer(dpreds.data().get(),
dlabels.data().get(),
*segment_label_sorter);
// Get the device MAP stats on host
std::vector<xgboost::obj::MAPLambdaWeightComputer::MAPStats> dmap_stats(
segment_label_sorter->GetNumItems());
dh::CopyDeviceSpanToVector(&dmap_stats, map_lw_computer.GetMapStatsSpan());
// Compute the MAP stats on host next to compare
std::vector<uint32_t> hgroups(segment_label_sorter->GetNumGroups() + 1);
dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan());
for (size_t i = 0; i < hgroups.size() - 1; ++i) {
auto gbegin = hgroups[i];
auto gend = hgroups[i + 1];
std::vector<xgboost::obj::ListEntry> lst_entry;
for (auto j = gbegin; j < gend; ++j) {
lst_entry.emplace_back(hpreds[j], hlabels[j], j);
}
std::stable_sort(lst_entry.begin(), lst_entry.end(), xgboost::obj::ListEntry::CmpPred);
// Compute the MAP stats with this list and compare with the ones computed on the device
std::vector<xgboost::obj::MAPLambdaWeightComputer::MAPStats> hmap_stats;
xgboost::obj::MAPLambdaWeightComputer::GetMAPStats(lst_entry, &hmap_stats);
for (auto j = gbegin; j < gend; ++j) {
EXPECT_EQ(dmap_stats[j].hits, hmap_stats[j - gbegin].hits);
EXPECT_NEAR(dmap_stats[j].ap_acc, hmap_stats[j - gbegin].ap_acc, 0.01f);
EXPECT_NEAR(dmap_stats[j].ap_acc_miss, hmap_stats[j - gbegin].ap_acc_miss, 0.01f);
EXPECT_NEAR(dmap_stats[j].ap_acc_add, hmap_stats[j - gbegin].ap_acc_add, 0.01f);
}
}
}
} // namespace xgboost