Define pair generation strategies for LTR. (#8984)
This commit is contained in:
106
tests/cpp/objective/test_lambdarank_obj.cc
Normal file
106
tests/cpp/objective/test_lambdarank_obj.cc
Normal file
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include "test_lambdarank_obj.h"
|
||||
|
||||
#include <gtest/gtest.h> // for Test, Message, TestPartResult, CmpHel...
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <initializer_list> // for initializer_list
|
||||
#include <map> // for map
|
||||
#include <memory> // for unique_ptr, shared_ptr, make_shared
|
||||
#include <numeric> // for iota
|
||||
#include <string> // for char_traits, basic_string, string
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam
|
||||
#include "../../../src/common/ranking_utils.h" // for NDCGCache, LambdaRankParam
|
||||
#include "../helpers.h" // for CheckRankingObjFunction, CheckConfigReload
|
||||
#include "xgboost/base.h" // for GradientPair, bst_group_t, Args
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for MetaInfo, DMatrix
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/linalg.h" // for Tensor, All, TensorView
|
||||
#include "xgboost/objective.h" // for ObjFunction
|
||||
#include "xgboost/span.h" // for Span
|
||||
|
||||
namespace xgboost::obj {
|
||||
void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt) {
|
||||
out_predt->SetDevice(ctx->gpu_id);
|
||||
MetaInfo& info = *out_info;
|
||||
info.num_row_ = 128;
|
||||
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
|
||||
shape[0] = info.num_row_;
|
||||
shape[1] = 1;
|
||||
auto& h_data = data->HostVector();
|
||||
h_data.resize(shape[0]);
|
||||
for (std::size_t i = 0; i < h_data.size(); ++i) {
|
||||
h_data[i] = i % 2;
|
||||
}
|
||||
});
|
||||
std::vector<float> predt(info.num_row_);
|
||||
std::iota(predt.rbegin(), predt.rend(), 0.0f);
|
||||
out_predt->HostVector() = predt;
|
||||
}
|
||||
|
||||
TEST(LambdaRank, MakePair) {
|
||||
Context ctx;
|
||||
MetaInfo info;
|
||||
HostDeviceVector<float> predt;
|
||||
|
||||
InitMakePairTest(&ctx, &info, &predt);
|
||||
|
||||
ltr::LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{{"lambdarank_pair_method", "topk"}});
|
||||
ASSERT_TRUE(param.HasTruncation());
|
||||
|
||||
std::shared_ptr<ltr::RankingCache> p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);
|
||||
auto const& h_predt = predt.ConstHostVector();
|
||||
{
|
||||
auto rank_idx = p_cache->SortedIdx(&ctx, h_predt);
|
||||
for (std::size_t i = 0; i < h_predt.size(); ++i) {
|
||||
ASSERT_EQ(rank_idx[i], static_cast<std::size_t>(*(h_predt.crbegin() + i)));
|
||||
}
|
||||
std::int32_t n_pairs{0};
|
||||
MakePairs(&ctx, 0, p_cache, 0, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,
|
||||
[&](auto i, auto j) {
|
||||
ASSERT_GT(j, i);
|
||||
ASSERT_LT(i, p_cache->Param().NumPair());
|
||||
++n_pairs;
|
||||
});
|
||||
ASSERT_EQ(n_pairs, 3568);
|
||||
}
|
||||
|
||||
auto const h_label = info.labels.HostView();
|
||||
|
||||
{
|
||||
param.UpdateAllowUnknown(Args{{"lambdarank_pair_method", "mean"}});
|
||||
auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);
|
||||
ASSERT_FALSE(param.HasTruncation());
|
||||
std::int32_t n_pairs = 0;
|
||||
auto rank_idx = p_cache->SortedIdx(&ctx, h_predt);
|
||||
MakePairs(&ctx, 0, p_cache, 0, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,
|
||||
[&](auto i, auto j) {
|
||||
++n_pairs;
|
||||
// Not in the same bucket
|
||||
ASSERT_NE(h_label(rank_idx[i]), h_label(rank_idx[j]));
|
||||
});
|
||||
ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair());
|
||||
}
|
||||
|
||||
{
|
||||
param.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "2"}});
|
||||
auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);
|
||||
auto rank_idx = p_cache->SortedIdx(&ctx, h_predt);
|
||||
std::int32_t n_pairs = 0;
|
||||
MakePairs(&ctx, 0, p_cache, 0, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,
|
||||
[&](auto i, auto j) {
|
||||
++n_pairs;
|
||||
// Not in the same bucket
|
||||
ASSERT_NE(h_label(rank_idx[i]), h_label(rank_idx[j]));
|
||||
});
|
||||
ASSERT_EQ(param.NumPair(), 2);
|
||||
ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair());
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::obj
|
||||
138
tests/cpp/objective/test_lambdarank_obj.cu
Normal file
138
tests/cpp/objective/test_lambdarank_obj.cu
Normal file
@@ -0,0 +1,138 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/cuda_context.cuh" // for CUDAContext
|
||||
#include "../../../src/objective/lambdarank_obj.cuh"
|
||||
#include "test_lambdarank_obj.h"
|
||||
|
||||
namespace xgboost::obj {
|
||||
void TestGPUMakePair() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
|
||||
MetaInfo info;
|
||||
HostDeviceVector<float> predt;
|
||||
InitMakePairTest(&ctx, &info, &predt);
|
||||
|
||||
ltr::LambdaRankParam param;
|
||||
|
||||
auto make_args = [&](std::shared_ptr<ltr::RankingCache> p_cache, auto rank_idx,
|
||||
common::Span<std::size_t const> y_sorted_idx) {
|
||||
linalg::Vector<double> dummy;
|
||||
auto d = dummy.View(ctx.gpu_id);
|
||||
linalg::Vector<GradientPair> dgpair;
|
||||
auto dg = dgpair.View(ctx.gpu_id);
|
||||
cuda_impl::KernelInputs args{d,
|
||||
d,
|
||||
d,
|
||||
d,
|
||||
p_cache->DataGroupPtr(&ctx),
|
||||
p_cache->CUDAThreadsGroupPtr(),
|
||||
rank_idx,
|
||||
info.labels.View(ctx.gpu_id),
|
||||
predt.ConstDeviceSpan(),
|
||||
{},
|
||||
dg,
|
||||
nullptr,
|
||||
y_sorted_idx,
|
||||
0};
|
||||
return args;
|
||||
};
|
||||
|
||||
{
|
||||
param.UpdateAllowUnknown(Args{{"lambdarank_pair_method", "topk"}});
|
||||
auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);
|
||||
auto rank_idx = p_cache->SortedIdx(&ctx, predt.ConstDeviceSpan());
|
||||
|
||||
ASSERT_EQ(p_cache->CUDAThreads(), 3568);
|
||||
|
||||
auto args = make_args(p_cache, rank_idx, {});
|
||||
auto n_pairs = p_cache->Param().NumPair();
|
||||
auto make_pair = cuda_impl::MakePairsOp<true>{args};
|
||||
|
||||
dh::LaunchN(p_cache->CUDAThreads(), ctx.CUDACtx()->Stream(),
|
||||
[=] XGBOOST_DEVICE(std::size_t idx) {
|
||||
auto [i, j] = make_pair(idx, 0);
|
||||
SPAN_CHECK(j > i);
|
||||
SPAN_CHECK(i < n_pairs);
|
||||
});
|
||||
}
|
||||
{
|
||||
param.UpdateAllowUnknown(Args{{"lambdarank_pair_method", "mean"}});
|
||||
auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);
|
||||
auto rank_idx = p_cache->SortedIdx(&ctx, predt.ConstDeviceSpan());
|
||||
auto y_sorted_idx = cuda_impl::SortY(&ctx, info, rank_idx, p_cache);
|
||||
|
||||
ASSERT_FALSE(param.HasTruncation());
|
||||
ASSERT_EQ(p_cache->CUDAThreads(), info.num_row_ * param.NumPair());
|
||||
|
||||
auto args = make_args(p_cache, rank_idx, y_sorted_idx);
|
||||
auto make_pair = cuda_impl::MakePairsOp<false>{args};
|
||||
auto n_pairs = p_cache->Param().NumPair();
|
||||
ASSERT_EQ(n_pairs, 1);
|
||||
|
||||
dh::LaunchN(
|
||||
p_cache->CUDAThreads(), ctx.CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t idx) {
|
||||
idx = 97;
|
||||
auto [i, j] = make_pair(idx, 0);
|
||||
// Not in the same bucket
|
||||
SPAN_CHECK(make_pair.args.labels(rank_idx[i]) != make_pair.args.labels(rank_idx[j]));
|
||||
});
|
||||
}
|
||||
{
|
||||
param.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "2"}});
|
||||
auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);
|
||||
auto rank_idx = p_cache->SortedIdx(&ctx, predt.ConstDeviceSpan());
|
||||
auto y_sorted_idx = cuda_impl::SortY(&ctx, info, rank_idx, p_cache);
|
||||
|
||||
auto args = make_args(p_cache, rank_idx, y_sorted_idx);
|
||||
auto make_pair = cuda_impl::MakePairsOp<false>{args};
|
||||
|
||||
dh::LaunchN(
|
||||
p_cache->CUDAThreads(), ctx.CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t idx) {
|
||||
auto [i, j] = make_pair(idx, 0);
|
||||
// Not in the same bucket
|
||||
SPAN_CHECK(make_pair.args.labels(rank_idx[i]) != make_pair.args.labels(rank_idx[j]));
|
||||
});
|
||||
ASSERT_EQ(param.NumPair(), 2);
|
||||
ASSERT_EQ(p_cache->CUDAThreads(), info.num_row_ * param.NumPair());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }
|
||||
|
||||
template <typename CountFunctor>
|
||||
void RankItemCountImpl(std::vector<std::uint32_t> const &sorted_items, CountFunctor f,
|
||||
std::uint32_t find_val, std::uint32_t exp_val) {
|
||||
EXPECT_NE(std::find(sorted_items.begin(), sorted_items.end(), find_val), sorted_items.end());
|
||||
EXPECT_EQ(f(&sorted_items[0], sorted_items.size(), find_val), exp_val);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, RankItemCountOnLeft) {
|
||||
// Items sorted descendingly
|
||||
std::vector<std::uint32_t> sorted_items{10, 10, 6, 4, 4, 4, 4, 1, 1, 1, 1, 1, 0};
|
||||
auto wrapper = [](auto const &...args) { return cuda_impl::CountNumItemsToTheLeftOf(args...); };
|
||||
RankItemCountImpl(sorted_items, wrapper, 10, static_cast<uint32_t>(0));
|
||||
RankItemCountImpl(sorted_items, wrapper, 6, static_cast<uint32_t>(2));
|
||||
RankItemCountImpl(sorted_items, wrapper, 4, static_cast<uint32_t>(3));
|
||||
RankItemCountImpl(sorted_items, wrapper, 1, static_cast<uint32_t>(7));
|
||||
RankItemCountImpl(sorted_items, wrapper, 0, static_cast<uint32_t>(12));
|
||||
}
|
||||
|
||||
TEST(LambdaRank, RankItemCountOnRight) {
|
||||
// Items sorted descendingly
|
||||
std::vector<std::uint32_t> sorted_items{10, 10, 6, 4, 4, 4, 4, 1, 1, 1, 1, 1, 0};
|
||||
auto wrapper = [](auto const &...args) { return cuda_impl::CountNumItemsToTheRightOf(args...); };
|
||||
RankItemCountImpl(sorted_items, wrapper, 10, static_cast<uint32_t>(11));
|
||||
RankItemCountImpl(sorted_items, wrapper, 6, static_cast<uint32_t>(10));
|
||||
RankItemCountImpl(sorted_items, wrapper, 4, static_cast<uint32_t>(6));
|
||||
RankItemCountImpl(sorted_items, wrapper, 1, static_cast<uint32_t>(1));
|
||||
RankItemCountImpl(sorted_items, wrapper, 0, static_cast<uint32_t>(0));
|
||||
}
|
||||
} // namespace xgboost::obj
|
||||
26
tests/cpp/objective/test_lambdarank_obj.h
Normal file
26
tests/cpp/objective/test_lambdarank_obj.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
|
||||
#define XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h> // for MetaInfo
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/linalg.h> // for All
|
||||
#include <xgboost/objective.h> // for ObjFunction
|
||||
|
||||
#include <memory> // for shared_ptr, make_shared
|
||||
#include <numeric> // for iota
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, MAPCache
|
||||
#include "../../../src/objective/lambdarank_obj.h" // for MAPStat
|
||||
#include "../helpers.h" // for EmptyDMatrix
|
||||
|
||||
namespace xgboost::obj {
|
||||
/**
|
||||
* \brief Initialize test data for make pair tests.
|
||||
*/
|
||||
void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt);
|
||||
} // namespace xgboost::obj
|
||||
#endif // XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
|
||||
@@ -89,43 +89,6 @@ TEST(Objective, RankSegmentSorterAscendingTest) {
|
||||
5, 4, 6});
|
||||
}
|
||||
|
||||
using CountFunctor = uint32_t (*)(const int *, uint32_t, int);
|
||||
void RankItemCountImpl(const std::vector<int> &sorted_items, CountFunctor f,
|
||||
int find_val, uint32_t exp_val) {
|
||||
EXPECT_NE(std::find(sorted_items.begin(), sorted_items.end(), find_val), sorted_items.end());
|
||||
EXPECT_EQ(f(&sorted_items[0], sorted_items.size(), find_val), exp_val);
|
||||
}
|
||||
|
||||
TEST(Objective, RankItemCountOnLeft) {
|
||||
// Items sorted descendingly
|
||||
std::vector<int> sorted_items{10, 10, 6, 4, 4, 4, 4, 1, 1, 1, 1, 1, 0};
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
|
||||
10, static_cast<uint32_t>(0));
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
|
||||
6, static_cast<uint32_t>(2));
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
|
||||
4, static_cast<uint32_t>(3));
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
|
||||
1, static_cast<uint32_t>(7));
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
|
||||
0, static_cast<uint32_t>(12));
|
||||
}
|
||||
|
||||
TEST(Objective, RankItemCountOnRight) {
|
||||
// Items sorted descendingly
|
||||
std::vector<int> sorted_items{10, 10, 6, 4, 4, 4, 4, 1, 1, 1, 1, 1, 0};
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
|
||||
10, static_cast<uint32_t>(11));
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
|
||||
6, static_cast<uint32_t>(10));
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
|
||||
4, static_cast<uint32_t>(6));
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
|
||||
1, static_cast<uint32_t>(1));
|
||||
RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
|
||||
0, static_cast<uint32_t>(0));
|
||||
}
|
||||
|
||||
TEST(Objective, NDCGLambdaWeightComputerTest) {
|
||||
std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels
|
||||
7.8f, 5.01f, 6.96f,
|
||||
|
||||
Reference in New Issue
Block a user