Rework the NDCG objective. (#9015)

This commit is contained in:
Jiaming Yuan
2023-04-18 21:16:06 +08:00
committed by GitHub
parent ba9d24ff7b
commit ef13dd31b1
15 changed files with 1082 additions and 351 deletions

View File

@@ -5,6 +5,7 @@
#include <gtest/gtest.h> // for Test, Message, TestPartResult, CmpHel...
#include <algorithm> // for sort
#include <cstddef> // for size_t
#include <initializer_list> // for initializer_list
#include <map> // for map
@@ -13,7 +14,6 @@
#include <string> // for char_traits, basic_string, string
#include <vector> // for vector
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam
#include "../../../src/common/ranking_utils.h" // for NDCGCache, LambdaRankParam
#include "../helpers.h" // for CheckRankingObjFunction, CheckConfigReload
#include "xgboost/base.h" // for GradientPair, bst_group_t, Args
@@ -25,6 +25,126 @@
#include "xgboost/span.h" // for Span
namespace xgboost::obj {
TEST(LambdaRank, NDCGJsonIO) {
Context ctx;
TestNDCGJsonIO(&ctx);
}
void TestNDCGGPair(Context const* ctx) {
{
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
CheckConfigReload(obj, "rank:ndcg");
// No gain in swapping 2 documents.
CheckRankingObjFunction(obj,
{1, 1, 1, 1},
{1, 1, 1, 1},
{1.0f, 1.0f},
{0, 2, 4},
{0.0f, -0.0f, 0.0f, 0.0f},
{0.0f, 0.0f, 0.0f, 0.0f});
}
{
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
// Test with setting sample weight to second query group
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 0.0f},
{0, 2, 4},
{2.06611f, -2.06611f, 0.0f, 0.0f},
{2.169331f, 2.169331f, 0.0f, 0.0f});
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 2.0f},
{0, 2, 4},
{2.06611f, -2.06611f, 2.06611f, -2.06611f},
{2.169331f, 2.169331f, 2.169331f, 2.169331f});
}
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
HostDeviceVector<float> predts{0, 1, 0, 1};
MetaInfo info;
info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, GPUIDX};
info.group_ptr_ = {0, 2, 4};
info.num_row_ = 4;
HostDeviceVector<GradientPair> gpairs;
obj->GetGradient(predts, info, 0, &gpairs);
ASSERT_EQ(gpairs.Size(), predts.Size());
{
predts = {1, 0, 1, 0};
HostDeviceVector<GradientPair> gpairs;
obj->GetGradient(predts, info, 0, &gpairs);
for (size_t i = 0; i < gpairs.Size(); ++i) {
ASSERT_GT(gpairs.HostSpan()[i].GetHess(), 0);
}
ASSERT_LT(gpairs.HostSpan()[1].GetGrad(), 0);
ASSERT_LT(gpairs.HostSpan()[3].GetGrad(), 0);
ASSERT_GT(gpairs.HostSpan()[0].GetGrad(), 0);
ASSERT_GT(gpairs.HostSpan()[2].GetGrad(), 0);
info.weights_ = {2, 3};
HostDeviceVector<GradientPair> weighted_gpairs;
obj->GetGradient(predts, info, 0, &weighted_gpairs);
auto const& h_gpairs = gpairs.ConstHostSpan();
auto const& h_weighted_gpairs = weighted_gpairs.ConstHostSpan();
for (size_t i : {0ul, 1ul}) {
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetGrad(), h_gpairs[i].GetGrad() * 2.0f);
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetHess(), h_gpairs[i].GetHess() * 2.0f);
}
for (size_t i : {2ul, 3ul}) {
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetGrad(), h_gpairs[i].GetGrad() * 3.0f);
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetHess(), h_gpairs[i].GetHess() * 3.0f);
}
}
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(LambdaRank, NDCGGPair) {
Context ctx;
TestNDCGGPair(&ctx);
}
void TestUnbiasedNDCG(Context const* ctx) {
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{{"lambdarank_pair_method", "topk"},
{"lambdarank_unbiased", "true"},
{"lambdarank_bias_norm", "0"}});
std::shared_ptr<DMatrix> p_fmat{RandomDataGenerator{10, 1, 0.0f}.GenerateDMatrix(true, false, 2)};
auto h_label = p_fmat->Info().labels.HostView().Values();
// Move clicked samples to the beginning.
std::sort(h_label.begin(), h_label.end(), std::greater<>{});
HostDeviceVector<float> predt(p_fmat->Info().num_row_, 1.0f);
HostDeviceVector<GradientPair> out_gpair;
obj->GetGradient(predt, p_fmat->Info(), 0, &out_gpair);
Json config{Object{}};
obj->SaveConfig(&config);
auto ti_plus = get<F32Array const>(config["ti+"]);
ASSERT_FLOAT_EQ(ti_plus[0], 1.0);
// bias is non-increasing when prediction is constant. (constant cost on swapping documents)
for (std::size_t i = 1; i < ti_plus.size(); ++i) {
ASSERT_LE(ti_plus[i], ti_plus[i - 1]);
}
auto tj_minus = get<F32Array const>(config["tj-"]);
ASSERT_FLOAT_EQ(tj_minus[0], 1.0);
}
TEST(LambdaRank, UnbiasedNDCG) {
Context ctx;
TestUnbiasedNDCG(&ctx);
}
void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt) {
out_predt->SetDevice(ctx->gpu_id);
MetaInfo& info = *out_info;

View File

@@ -12,6 +12,18 @@
#include "test_lambdarank_obj.h"
namespace xgboost::obj {
TEST(LambdaRank, GPUNDCGJsonIO) {
Context ctx;
ctx.gpu_id = 0;
TestNDCGJsonIO(&ctx);
}
TEST(LambdaRank, GPUNDCGGPair) {
Context ctx;
ctx.gpu_id = 0;
TestNDCGGPair(&ctx);
}
void TestGPUMakePair() {
Context ctx;
ctx.gpu_id = 0;
@@ -107,6 +119,12 @@ void TestGPUMakePair() {
TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }
TEST(LambdaRank, GPUUnbiasedNDCG) {
Context ctx;
ctx.gpu_id = 0;
TestUnbiasedNDCG(&ctx);
}
template <typename CountFunctor>
void RankItemCountImpl(std::vector<std::uint32_t> const &sorted_items, CountFunctor f,
std::uint32_t find_val, std::uint32_t exp_val) {

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright (c) 2023, XGBoost Contributors
*/
#ifndef XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
#define XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
@@ -18,6 +18,25 @@
#include "../helpers.h" // for EmptyDMatrix
namespace xgboost::obj {
inline void TestNDCGJsonIO(Context const* ctx) {
std::unique_ptr<xgboost::ObjFunction> obj{ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{});
Json j_obj{Object()};
obj->SaveConfig(&j_obj);
ASSERT_EQ(get<String>(j_obj["name"]), "rank:ndcg");
auto const& j_param = j_obj["lambdarank_param"];
ASSERT_EQ(get<String>(j_param["ndcg_exp_gain"]), "1");
ASSERT_EQ(get<String>(j_param["lambdarank_num_pair_per_sample"]),
std::to_string(ltr::LambdaRankParam::NotSet()));
}
void TestNDCGGPair(Context const* ctx);
void TestUnbiasedNDCG(Context const* ctx);
/**
* \brief Initialize test data for make pair tests.
*/

View File

@@ -35,24 +35,6 @@ TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPair)) {
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(NDCG_JsonIO)) {
xgboost::Context ctx;
ctx.UpdateAllowUnknown(Args{});
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", &ctx)};
obj->Configure(Args{});
Json j_obj {Object()};
obj->SaveConfig(&j_obj);
ASSERT_EQ(get<String>(j_obj["name"]), "rank:ndcg");;
auto const& j_param = j_obj["lambda_rank_param"];
ASSERT_EQ(get<String>(j_param["num_pairsample"]), "1");
ASSERT_EQ(get<String>(j_param["fix_list_weight"]), "0");
}
TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPairSameLabels)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
@@ -71,33 +53,6 @@ TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPairSameLabels)) {
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(NDCGRankingGPair)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "rank:ndcg");
// Test with setting sample weight to second query group
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 0.0f},
{0, 2, 4},
{0.7f, -0.7f, 0.0f, 0.0f},
{0.74f, 0.74f, 0.0f, 0.0f});
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{1.0f, 1.0f},
{0, 2, 4},
{0.35f, -0.35f, 0.35f, -0.35f},
{0.368f, 0.368f, 0.368f, 0.368f});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(MAPRankingGPair)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);

View File

@@ -89,62 +89,6 @@ TEST(Objective, RankSegmentSorterAscendingTest) {
5, 4, 6});
}
TEST(Objective, NDCGLambdaWeightComputerTest) {
std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels
7.8f, 5.01f, 6.96f,
10.3f, 8.7f, 11.4f, 9.45f, 11.4f};
dh::device_vector<bst_float> dlabels(hlabels);
auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
{0, 4, 7, 12}, // Groups
hlabels,
{4.4f, 3.1f, 2.3f, 1.2f, // Expected sorted labels
7.8f, 6.96f, 5.01f,
11.4f, 11.4f, 10.3f, 9.45f, 8.7f},
{3, 0, 2, 1, // Expected original positions
4, 6, 5,
9, 11, 7, 10, 8});
// Created segmented predictions for the labels from above
std::vector<bst_float> hpreds{-9.78f, 24.367f, 0.908f, -11.47f,
-1.03f, -2.79f, -3.1f,
104.22f, 103.1f, -101.7f, 100.5f, 45.1f};
dh::device_vector<bst_float> dpreds(hpreds);
xgboost::obj::NDCGLambdaWeightComputer ndcg_lw_computer(dpreds.data().get(),
dlabels.data().get(),
*segment_label_sorter);
// Where will the predictions move from its current position, if they were sorted
// descendingly?
auto dsorted_pred_pos = ndcg_lw_computer.GetPredictionSorter().GetIndexableSortedPositionsSpan();
std::vector<uint32_t> hsorted_pred_pos(segment_label_sorter->GetNumItems());
dh::CopyDeviceSpanToVector(&hsorted_pred_pos, dsorted_pred_pos);
std::vector<uint32_t> expected_sorted_pred_pos{2, 0, 1, 3,
4, 5, 6,
7, 8, 11, 9, 10};
EXPECT_EQ(expected_sorted_pred_pos, hsorted_pred_pos);
// Check group DCG values
std::vector<float> hgroup_dcgs(segment_label_sorter->GetNumGroups());
dh::CopyDeviceSpanToVector(&hgroup_dcgs, ndcg_lw_computer.GetGroupDcgsSpan());
std::vector<uint32_t> hgroups(segment_label_sorter->GetNumGroups() + 1);
dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan());
EXPECT_EQ(hgroup_dcgs.size(), segment_label_sorter->GetNumGroups());
std::vector<float> hsorted_labels(segment_label_sorter->GetNumItems());
dh::CopyDeviceSpanToVector(&hsorted_labels, segment_label_sorter->GetItemsSpan());
for (size_t i = 0; i < hgroup_dcgs.size(); ++i) {
// Compute group DCG value on CPU and compare
auto gbegin = hgroups[i];
auto gend = hgroups[i + 1];
EXPECT_NEAR(
hgroup_dcgs[i],
xgboost::obj::NDCGLambdaWeightComputer::ComputeGroupDCGWeight(&hsorted_labels[gbegin],
gend - gbegin),
0.01f);
}
}
TEST(Objective, IndexableSortedItemsTest) {
std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels
7.8f, 5.01f, 6.96f,

View File

@@ -1,3 +1,4 @@
import json
import sys
import pytest
@@ -36,19 +37,16 @@ class TestGPUEvalMetrics:
Xy = xgboost.DMatrix(X, y, group=group)
cpu = xgboost.train(
booster = xgboost.train(
{"tree_method": "hist", "eval_metric": "auc", "objective": "rank:ndcg"},
Xy,
num_boost_round=10,
)
cpu_auc = float(cpu.eval(Xy).split(":")[1])
gpu = xgboost.train(
{"tree_method": "gpu_hist", "eval_metric": "auc", "objective": "rank:ndcg"},
Xy,
num_boost_round=10,
)
gpu_auc = float(gpu.eval(Xy).split(":")[1])
cpu_auc = float(booster.eval(Xy).split(":")[1])
booster.set_param({"gpu_id": "0"})
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
gpu_auc = float(booster.eval(Xy).split(":")[1])
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
np.testing.assert_allclose(cpu_auc, gpu_auc)