xgboost/tests/cpp/common/test_ranking_utils.cc
2023-03-13 22:16:31 +08:00

181 lines
5.4 KiB
C++

/**
* Copyright 2023 by XGBoost Contributors
*/
#include "test_ranking_utils.h"
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args, bst_group_t, kRtEps
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for MetaInfo, DMatrix
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/logging.h> // for Error
#include <xgboost/string_view.h> // for StringView
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t
#include <numeric> // for iota
#include <utility> // for move
#include <vector> // for vector
#include "../../../src/common/numeric.h" // for Iota
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
#include "../helpers.h" // for EmptyDMatrix
namespace xgboost::ltr {
TEST(RankingUtils, LambdaRankParam) {
// make sure no memory is shared in dmlc parameter.
LambdaRankParam p0;
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "3"}});
ASSERT_EQ(p0.NumPair(), 3);
LambdaRankParam p1;
p1.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "8"}});
ASSERT_EQ(p0.NumPair(), 3);
ASSERT_EQ(p1.NumPair(), 8);
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "17"}});
ASSERT_EQ(p0.NumPair(), 17);
ASSERT_EQ(p1.NumPair(), 8);
}
TEST(RankingUtils, ParseMetricName) {
std::uint32_t topn{32};
bool minus{false};
auto name = ParseMetricName("ndcg", "3-", &topn, &minus);
ASSERT_EQ(name, "ndcg@3-");
ASSERT_EQ(topn, 3);
ASSERT_TRUE(minus);
name = ParseMetricName("ndcg", "6", &topn, &minus);
ASSERT_EQ(topn, 6);
ASSERT_TRUE(minus); // unchanged
minus = false;
name = ParseMetricName("ndcg", "-", &topn, &minus);
ASSERT_EQ(topn, 6); // unchanged
ASSERT_TRUE(minus);
name = ParseMetricName("ndcg", nullptr, &topn, &minus);
ASSERT_EQ(topn, 6); // unchanged
ASSERT_TRUE(minus); // unchanged
name = ParseMetricName("ndcg", StringView{}, &topn, &minus);
ASSERT_EQ(topn, 6); // unchanged
ASSERT_TRUE(minus); // unchanged
}
TEST(RankingUtils, MakeMetricName) {
auto name = MakeMetricName("map", LambdaRankParam::NotSet(), true);
ASSERT_EQ(name, "map-");
name = MakeMetricName("map", LambdaRankParam::NotSet(), false);
ASSERT_EQ(name, "map");
name = MakeMetricName("map", 2, true);
ASSERT_EQ(name, "map@2-");
name = MakeMetricName("map", 2, false);
ASSERT_EQ(name, "map@2");
}
void TestRankingCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 16;
info.labels.Reshape(info.num_row_);
auto& h_label = info.labels.Data()->HostVector();
for (std::size_t i = 0; i < h_label.size(); ++i) {
h_label[i] = i % 2;
}
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
RankingCache cache{ctx, info, param};
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1);
}
}
TEST(RankingCache, InitFromCPU) {
Context ctx;
TestRankingCache(&ctx);
}
void TestNDCGCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
{
// empty
NDCGCache cache{ctx, info, param};
ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2);
}
info.num_row_ = 3;
info.group_ptr_ = {static_cast<bst_group_t>(0), static_cast<bst_group_t>(info.num_row_)};
{
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
// empty label
ASSERT_THROW(fail(), dmlc::Error);
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
// invalid label
ASSERT_THROW(fail(), dmlc::Error);
auto h_labels = info.labels.HostView();
for (std::size_t i = 0; i < h_labels.Size(); ++i) {
h_labels(i) *= 10;
}
param.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_EQ(inv_idcg.Size(), 1);
ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps);
}
{
param.UpdateAllowUnknown(Args{{"lambdarank_unbiased", "false"}});
std::vector<float> h_data(32);
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
info.labels.Reshape(h_data.size());
info.num_row_ = h_data.size();
info.group_ptr_.back() = info.num_row_;
info.labels.Data()->HostVector() = std::move(h_data);
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps);
}
param.UpdateAllowUnknown(
Args{{"lambdarank_num_pair_per_sample", "3"}, {"lambdarank_pair_method", "topk"}});
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps);
}
}
}
TEST(NDCGCache, InitFromCPU) {
Context ctx;
TestNDCGCache(&ctx);
}
} // namespace xgboost::ltr