Implement NDCG cache. (#8893)
This commit is contained in:
parent
9bade7203a
commit
8be6095ece
@ -6,9 +6,7 @@
|
||||
#include <algorithm> // for copy_n, max, min, none_of, all_of
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdio> // for sscanf
|
||||
#include <exception> // for exception
|
||||
#include <functional> // for greater
|
||||
#include <iterator> // for reverse_iterator
|
||||
#include <string> // for char_traits, string
|
||||
|
||||
#include "algorithm.h" // for ArgSort
|
||||
@ -18,10 +16,102 @@
|
||||
#include "xgboost/base.h" // for bst_group_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/linalg.h" // for All, TensorView, Range, Tensor, Vector
|
||||
#include "xgboost/logging.h" // for Error, LogCheck_EQ, CHECK_EQ
|
||||
#include "xgboost/linalg.h" // for All, TensorView, Range
|
||||
#include "xgboost/logging.h" // for CHECK_EQ
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void RankingCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
|
||||
if (info.group_ptr_.empty()) {
|
||||
group_ptr_.Resize(2, 0);
|
||||
group_ptr_.HostVector()[1] = info.num_row_;
|
||||
} else {
|
||||
group_ptr_.HostVector() = info.group_ptr_;
|
||||
}
|
||||
|
||||
auto const& gptr = group_ptr_.ConstHostVector();
|
||||
for (std::size_t i = 1; i < gptr.size(); ++i) {
|
||||
std::size_t n = gptr[i] - gptr[i - 1];
|
||||
max_group_size_ = std::max(max_group_size_, n);
|
||||
}
|
||||
|
||||
double sum_weights = 0;
|
||||
auto n_groups = Groups();
|
||||
auto weight = common::MakeOptionalWeights(ctx, info.weights_);
|
||||
for (bst_omp_uint k = 0; k < n_groups; ++k) {
|
||||
sum_weights += weight[k];
|
||||
}
|
||||
weight_norm_ = static_cast<double>(n_groups) / sum_weights;
|
||||
}
|
||||
|
||||
common::Span<std::size_t const> RankingCache::MakeRankOnCPU(Context const* ctx,
|
||||
common::Span<float const> predt) {
|
||||
auto gptr = this->DataGroupPtr(ctx);
|
||||
auto rank = this->sorted_idx_cache_.HostSpan();
|
||||
CHECK_EQ(rank.size(), predt.size());
|
||||
|
||||
common::ParallelFor(this->Groups(), ctx->Threads(), [&](auto g) {
|
||||
auto cnt = gptr[g + 1] - gptr[g];
|
||||
auto g_predt = predt.subspan(gptr[g], cnt);
|
||||
auto g_rank = rank.subspan(gptr[g], cnt);
|
||||
auto sorted_idx = common::ArgSort<std::size_t>(
|
||||
ctx, g_predt.data(), g_predt.data() + g_predt.size(), std::greater<>{});
|
||||
CHECK_EQ(g_rank.size(), sorted_idx.size());
|
||||
std::copy_n(sorted_idx.data(), sorted_idx.size(), g_rank.data());
|
||||
});
|
||||
|
||||
return rank;
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
||||
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,
|
||||
common::Span<float const>) {
|
||||
common::AssertGPUSupport();
|
||||
return {};
|
||||
}
|
||||
#endif // !defined()
|
||||
|
||||
void NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
|
||||
auto const h_group_ptr = this->DataGroupPtr(ctx);
|
||||
|
||||
discounts_.Resize(MaxGroupSize(), 0);
|
||||
auto& h_discounts = discounts_.HostVector();
|
||||
for (std::size_t i = 0; i < MaxGroupSize(); ++i) {
|
||||
h_discounts[i] = CalcDCGDiscount(i);
|
||||
}
|
||||
|
||||
auto n_groups = h_group_ptr.size() - 1;
|
||||
auto h_labels = info.labels.HostView().Slice(linalg::All(), 0);
|
||||
|
||||
CheckNDCGLabels(this->Param(), h_labels,
|
||||
[](auto beg, auto end, auto op) { return std::none_of(beg, end, op); });
|
||||
|
||||
inv_idcg_.Reshape(n_groups);
|
||||
auto h_inv_idcg = inv_idcg_.HostView();
|
||||
std::size_t topk = this->Param().TopK();
|
||||
auto const exp_gain = this->Param().ndcg_exp_gain;
|
||||
|
||||
common::ParallelFor(n_groups, ctx->Threads(), [&](auto g) {
|
||||
auto g_labels = h_labels.Slice(linalg::Range(h_group_ptr[g], h_group_ptr[g + 1]));
|
||||
auto sorted_idx = common::ArgSort<std::size_t>(ctx, linalg::cbegin(g_labels),
|
||||
linalg::cend(g_labels), std::greater<>{});
|
||||
|
||||
double idcg{0.0};
|
||||
for (std::size_t i = 0; i < std::min(g_labels.Size(), topk); ++i) {
|
||||
if (exp_gain) {
|
||||
idcg += h_discounts[i] * CalcDCGGain(g_labels(sorted_idx[i]));
|
||||
} else {
|
||||
idcg += h_discounts[i] * g_labels(sorted_idx[i]);
|
||||
}
|
||||
}
|
||||
h_inv_idcg(g) = CalcInvIDCG(idcg);
|
||||
});
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
DMLC_REGISTER_PARAMETER(LambdaRankParam);
|
||||
|
||||
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
|
||||
|
||||
207
src/common/ranking_utils.cu
Normal file
207
src/common/ranking_utils.cu
Normal file
@ -0,0 +1,207 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/functional.h> // for maximum
|
||||
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
|
||||
#include <thrust/logical.h> // for none_of, all_of
|
||||
#include <thrust/pair.h> // for pair, make_pair
|
||||
#include <thrust/reduce.h> // for reduce
|
||||
#include <thrust/scan.h> // for inclusive_scan
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
|
||||
#include "algorithm.cuh" // for SegmentedArgSort
|
||||
#include "cuda_context.cuh" // for CUDAContext
|
||||
#include "device_helpers.cuh" // for MakeTransformIterator, LaunchN
|
||||
#include "optional_weight.h" // for MakeOptionalWeights, OptionalWeights
|
||||
#include "ranking_utils.cuh" // for ThreadsForMean
|
||||
#include "ranking_utils.h"
|
||||
#include "threading_utils.cuh" // for SegmentedTrapezoidThreads
|
||||
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/linalg.h" // for VectorView, All, Range
|
||||
#include "xgboost/logging.h" // for CHECK
|
||||
#include "xgboost/span.h" // for Span
|
||||
|
||||
namespace xgboost::ltr {
|
||||
namespace cuda_impl {
|
||||
void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels,
|
||||
common::Span<std::size_t const> d_sorted_idx, bool exp_gain,
|
||||
common::Span<bst_group_t const> d_group_ptr, std::size_t k,
|
||||
linalg::VectorView<double> out_dcg) {
|
||||
CHECK_EQ(d_group_ptr.size() - 1, out_dcg.Size());
|
||||
using IdxGroup = thrust::pair<std::size_t, std::size_t>;
|
||||
auto group_it = dh::MakeTransformIterator<IdxGroup>(
|
||||
thrust::make_counting_iterator(0ull), [=] XGBOOST_DEVICE(std::size_t idx) {
|
||||
return thrust::make_pair(idx, dh::SegmentId(d_group_ptr, idx)); // NOLINT
|
||||
});
|
||||
auto value_it = dh::MakeTransformIterator<double>(
|
||||
group_it,
|
||||
[exp_gain, d_labels, d_group_ptr, k,
|
||||
d_sorted_idx] XGBOOST_DEVICE(IdxGroup const& l) -> double {
|
||||
auto g_begin = d_group_ptr[l.second];
|
||||
auto g_size = d_group_ptr[l.second + 1] - g_begin;
|
||||
|
||||
auto idx_in_group = l.first - g_begin;
|
||||
if (idx_in_group >= k) {
|
||||
return 0.0;
|
||||
}
|
||||
double gain{0.0};
|
||||
auto g_sorted_idx = d_sorted_idx.subspan(g_begin, g_size);
|
||||
auto g_labels = d_labels.Slice(linalg::Range(g_begin, g_begin + g_size));
|
||||
|
||||
if (exp_gain) {
|
||||
gain = ltr::CalcDCGGain(g_labels(g_sorted_idx[idx_in_group]));
|
||||
} else {
|
||||
gain = g_labels(g_sorted_idx[idx_in_group]);
|
||||
}
|
||||
double discount = CalcDCGDiscount(idx_in_group);
|
||||
return gain * discount;
|
||||
});
|
||||
|
||||
CHECK(out_dcg.Contiguous());
|
||||
std::size_t bytes;
|
||||
cub::DeviceSegmentedReduce::Sum(nullptr, bytes, value_it, out_dcg.Values().data(),
|
||||
d_group_ptr.size() - 1, d_group_ptr.data(),
|
||||
d_group_ptr.data() + 1, ctx->CUDACtx()->Stream());
|
||||
dh::TemporaryArray<char> temp(bytes);
|
||||
cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, value_it, out_dcg.Values().data(),
|
||||
d_group_ptr.size() - 1, d_group_ptr.data(),
|
||||
d_group_ptr.data() + 1, ctx->CUDACtx()->Stream());
|
||||
}
|
||||
|
||||
void CalcQueriesInvIDCG(Context const* ctx, linalg::VectorView<float const> d_labels,
|
||||
common::Span<bst_group_t const> d_group_ptr,
|
||||
linalg::VectorView<double> out_inv_IDCG, ltr::LambdaRankParam const& p) {
|
||||
CHECK_GE(d_group_ptr.size(), 2ul);
|
||||
size_t n_groups = d_group_ptr.size() - 1;
|
||||
CHECK_EQ(out_inv_IDCG.Size(), n_groups);
|
||||
dh::device_vector<std::size_t> sorted_idx(d_labels.Size());
|
||||
auto d_sorted_idx = dh::ToSpan(sorted_idx);
|
||||
common::SegmentedArgSort<false, true>(ctx, d_labels.Values(), d_group_ptr, d_sorted_idx);
|
||||
CalcQueriesDCG(ctx, d_labels, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(), out_inv_IDCG);
|
||||
dh::LaunchN(out_inv_IDCG.Size(), ctx->CUDACtx()->Stream(),
|
||||
[out_inv_IDCG] XGBOOST_DEVICE(size_t idx) mutable {
|
||||
double idcg = out_inv_IDCG(idx);
|
||||
out_inv_IDCG(idx) = CalcInvIDCG(idcg);
|
||||
});
|
||||
}
|
||||
} // namespace cuda_impl
|
||||
|
||||
namespace {
|
||||
struct CheckNDCGOp {
|
||||
CUDAContext const* cuctx;
|
||||
template <typename It, typename Op>
|
||||
bool operator()(It beg, It end, Op op) {
|
||||
return thrust::none_of(cuctx->CTP(), beg, end, op);
|
||||
}
|
||||
};
|
||||
struct CheckMAPOp {
|
||||
CUDAContext const* cuctx;
|
||||
template <typename It, typename Op>
|
||||
bool operator()(It beg, It end, Op op) {
|
||||
return thrust::all_of(cuctx->CTP(), beg, end, op);
|
||||
}
|
||||
};
|
||||
|
||||
struct ThreadGroupOp {
|
||||
common::Span<bst_group_t const> d_group_ptr;
|
||||
std::size_t n_pairs;
|
||||
|
||||
common::Span<std::size_t> out_thread_group_ptr;
|
||||
|
||||
XGBOOST_DEVICE void operator()(std::size_t i) {
|
||||
out_thread_group_ptr[i + 1] =
|
||||
cuda_impl::ThreadsForMean(d_group_ptr[i + 1] - d_group_ptr[i], n_pairs);
|
||||
}
|
||||
};
|
||||
|
||||
struct GroupSizeOp {
|
||||
common::Span<bst_group_t const> d_group_ptr;
|
||||
|
||||
XGBOOST_DEVICE auto operator()(std::size_t i) -> std::size_t {
|
||||
return d_group_ptr[i + 1] - d_group_ptr[i];
|
||||
}
|
||||
};
|
||||
|
||||
struct WeightOp {
|
||||
common::OptionalWeights d_weight;
|
||||
XGBOOST_DEVICE auto operator()(std::size_t i) -> double { return d_weight[i]; }
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
|
||||
CUDAContext const* cuctx = ctx->CUDACtx();
|
||||
|
||||
group_ptr_.SetDevice(ctx->gpu_id);
|
||||
if (info.group_ptr_.empty()) {
|
||||
group_ptr_.Resize(2, 0);
|
||||
group_ptr_.HostVector()[1] = info.num_row_;
|
||||
} else {
|
||||
auto const& h_group_ptr = info.group_ptr_;
|
||||
group_ptr_.Resize(h_group_ptr.size());
|
||||
auto d_group_ptr = group_ptr_.DeviceSpan();
|
||||
dh::safe_cuda(cudaMemcpyAsync(d_group_ptr.data(), h_group_ptr.data(), d_group_ptr.size_bytes(),
|
||||
cudaMemcpyHostToDevice, cuctx->Stream()));
|
||||
}
|
||||
|
||||
auto d_group_ptr = DataGroupPtr(ctx);
|
||||
std::size_t n_groups = Groups();
|
||||
|
||||
auto it = dh::MakeTransformIterator<std::size_t>(thrust::make_counting_iterator(0ul),
|
||||
GroupSizeOp{d_group_ptr});
|
||||
max_group_size_ =
|
||||
thrust::reduce(cuctx->CTP(), it, it + n_groups, 0ul, thrust::maximum<std::size_t>{});
|
||||
|
||||
threads_group_ptr_.SetDevice(ctx->gpu_id);
|
||||
threads_group_ptr_.Resize(n_groups + 1, 0);
|
||||
auto d_threads_group_ptr = threads_group_ptr_.DeviceSpan();
|
||||
if (param_.HasTruncation()) {
|
||||
n_cuda_threads_ =
|
||||
common::SegmentedTrapezoidThreads(d_group_ptr, d_threads_group_ptr, Param().NumPair());
|
||||
} else {
|
||||
auto n_pairs = Param().NumPair();
|
||||
dh::LaunchN(n_groups, cuctx->Stream(),
|
||||
ThreadGroupOp{d_group_ptr, n_pairs, d_threads_group_ptr});
|
||||
thrust::inclusive_scan(cuctx->CTP(), dh::tcbegin(d_threads_group_ptr),
|
||||
dh::tcend(d_threads_group_ptr), dh::tbegin(d_threads_group_ptr));
|
||||
n_cuda_threads_ = info.num_row_ * param_.NumPair();
|
||||
}
|
||||
|
||||
sorted_idx_cache_.SetDevice(ctx->gpu_id);
|
||||
sorted_idx_cache_.Resize(info.labels.Size(), 0);
|
||||
|
||||
auto weight = common::MakeOptionalWeights(ctx, info.weights_);
|
||||
auto w_it =
|
||||
dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), WeightOp{weight});
|
||||
weight_norm_ = static_cast<double>(n_groups) / thrust::reduce(w_it, w_it + n_groups);
|
||||
}
|
||||
|
||||
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const* ctx,
|
||||
common::Span<float const> predt) {
|
||||
auto d_sorted_idx = sorted_idx_cache_.DeviceSpan();
|
||||
auto d_group_ptr = DataGroupPtr(ctx);
|
||||
common::SegmentedArgSort<false, true>(ctx, predt, d_group_ptr, d_sorted_idx);
|
||||
return d_sorted_idx;
|
||||
}
|
||||
|
||||
void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
|
||||
CUDAContext const* cuctx = ctx->CUDACtx();
|
||||
auto labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
|
||||
CheckNDCGLabels(this->Param(), labels, CheckNDCGOp{cuctx});
|
||||
|
||||
auto d_group_ptr = this->DataGroupPtr(ctx);
|
||||
|
||||
std::size_t n_groups = d_group_ptr.size() - 1;
|
||||
inv_idcg_ = linalg::Zeros<double>(ctx, n_groups);
|
||||
auto d_inv_idcg = inv_idcg_.View(ctx->gpu_id);
|
||||
cuda_impl::CalcQueriesInvIDCG(ctx, labels, d_group_ptr, d_inv_idcg, this->Param());
|
||||
CHECK_GE(this->Param().NumPair(), 1ul);
|
||||
|
||||
discounts_.SetDevice(ctx->gpu_id);
|
||||
discounts_.Resize(MaxGroupSize());
|
||||
auto d_discount = discounts_.DeviceSpan();
|
||||
dh::LaunchN(MaxGroupSize(), cuctx->Stream(),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { d_discount[i] = CalcDCGDiscount(i); });
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
40
src/common/ranking_utils.cuh
Normal file
40
src/common/ranking_utils.cuh
Normal file
@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_RANKING_UTILS_CUH_
|
||||
#define XGBOOST_COMMON_RANKING_UTILS_CUH_
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
|
||||
#include "ranking_utils.h" // for LambdaRankParam
|
||||
#include "xgboost/base.h" // for bst_group_t, XGBOOST_DEVICE
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/linalg.h" // for VectorView
|
||||
#include "xgboost/span.h" // for Span
|
||||
|
||||
namespace xgboost {
|
||||
namespace ltr {
|
||||
namespace cuda_impl {
|
||||
void CalcQueriesDCG(Context const *ctx, linalg::VectorView<float const> d_labels,
|
||||
common::Span<std::size_t const> d_sorted_idx, bool exp_gain,
|
||||
common::Span<bst_group_t const> d_group_ptr, std::size_t k,
|
||||
linalg::VectorView<double> out_dcg);
|
||||
|
||||
void CalcQueriesInvIDCG(Context const *ctx, linalg::VectorView<float const> d_labels,
|
||||
common::Span<bst_group_t const> d_group_ptr,
|
||||
linalg::VectorView<double> out_inv_IDCG, ltr::LambdaRankParam const &p);
|
||||
|
||||
// Functions for creating number of threads for CUDA, and getting back the number of pairs
|
||||
// from the number of threads.
|
||||
XGBOOST_DEVICE __forceinline__ std::size_t ThreadsForMean(std::size_t group_size,
|
||||
std::size_t n_pairs) {
|
||||
return group_size * n_pairs;
|
||||
}
|
||||
XGBOOST_DEVICE __forceinline__ std::size_t PairsForGroup(std::size_t n_threads,
|
||||
std::size_t group_size) {
|
||||
return n_threads / group_size;
|
||||
}
|
||||
} // namespace cuda_impl
|
||||
} // namespace ltr
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_RANKING_UTILS_CUH_
|
||||
@ -11,7 +11,6 @@
|
||||
#include <string> // for char_traits, string
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "./math.h" // for CloseTo
|
||||
#include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD
|
||||
#include "error_msg.h" // for GroupWeight, GroupSize
|
||||
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
|
||||
@ -19,7 +18,7 @@
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/linalg.h" // for Vector, VectorView, Tensor
|
||||
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK
|
||||
#include "xgboost/logging.h" // for CHECK_EQ, CHECK
|
||||
#include "xgboost/parameter.h" // for XGBoostParameter
|
||||
#include "xgboost/span.h" // for Span
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
@ -34,6 +33,25 @@ using rel_degree_t = std::uint32_t; // NOLINT
|
||||
*/
|
||||
using position_t = std::uint32_t; // NOLINT
|
||||
|
||||
/**
|
||||
* \brief Maximum relevance degree for NDCG
|
||||
*/
|
||||
constexpr std::size_t MaxRel() { return sizeof(rel_degree_t) * 8 - 1; }
|
||||
static_assert(MaxRel() == 31);
|
||||
|
||||
XGBOOST_DEVICE inline double CalcDCGGain(rel_degree_t label) {
|
||||
return static_cast<double>((1u << label) - 1);
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE inline double CalcDCGDiscount(std::size_t idx) {
|
||||
return 1.0 / std::log2(static_cast<double>(idx) + 2.0);
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE inline double CalcInvIDCG(double idcg) {
|
||||
auto inv_idcg = (idcg == 0.0 ? 0.0 : (1.0 / idcg)); // handle irrelevant document
|
||||
return inv_idcg;
|
||||
}
|
||||
|
||||
enum class PairMethod : std::int32_t {
|
||||
kTopK = 0,
|
||||
kMean = 1,
|
||||
@ -115,7 +133,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
.describe("Number of pairs for each sample in the list.");
|
||||
DMLC_DECLARE_FIELD(lambdarank_unbiased)
|
||||
.set_default(false)
|
||||
.describe("Unbiased lambda mart. Use IPW to debias click position");
|
||||
.describe("Unbiased lambda mart. Use extended IPW to debias click position");
|
||||
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
|
||||
.set_default(2.0)
|
||||
.set_lower_bound(0.0)
|
||||
@ -126,6 +144,220 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Common cached items for ranking tasks.
|
||||
*/
|
||||
class RankingCache {
|
||||
private:
|
||||
void InitOnCPU(Context const* ctx, MetaInfo const& info);
|
||||
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
|
||||
// Cached parameter
|
||||
LambdaRankParam param_;
|
||||
// offset to data groups.
|
||||
HostDeviceVector<bst_group_t> group_ptr_;
|
||||
// store the sorted index of prediction.
|
||||
HostDeviceVector<std::size_t> sorted_idx_cache_;
|
||||
// Maximum size of group
|
||||
std::size_t max_group_size_{0};
|
||||
// Normalization for weight
|
||||
double weight_norm_{1.0};
|
||||
/**
|
||||
* CUDA cache
|
||||
*/
|
||||
// offset to threads assigned to each group for gradient calculation
|
||||
HostDeviceVector<std::size_t> threads_group_ptr_;
|
||||
// Sorted index of label for finding buckets.
|
||||
HostDeviceVector<std::size_t> y_sorted_idx_cache_;
|
||||
// Cached labels sorted by the model
|
||||
HostDeviceVector<float> y_ranked_by_model_;
|
||||
// store rounding factor for objective for each group
|
||||
linalg::Vector<GradientPair> roundings_;
|
||||
// rounding factor for cost
|
||||
HostDeviceVector<double> cost_rounding_;
|
||||
// temporary storage for creating rounding factors. Stored as byte to avoid having cuda
|
||||
// data structure in here.
|
||||
HostDeviceVector<std::uint8_t> max_lambdas_;
|
||||
// total number of cuda threads used for gradient calculation
|
||||
std::size_t n_cuda_threads_{0};
|
||||
|
||||
// Create model rank list on GPU
|
||||
common::Span<std::size_t const> MakeRankOnCUDA(Context const* ctx,
|
||||
common::Span<float const> predt);
|
||||
// Create model rank list on CPU
|
||||
common::Span<std::size_t const> MakeRankOnCPU(Context const* ctx,
|
||||
common::Span<float const> predt);
|
||||
|
||||
protected:
|
||||
[[nodiscard]] std::size_t MaxGroupSize() const { return max_group_size_; }
|
||||
|
||||
public:
|
||||
RankingCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p) : param_{p} {
|
||||
CHECK(param_.GetInitialised());
|
||||
if (!info.group_ptr_.empty()) {
|
||||
CHECK_EQ(info.group_ptr_.back(), info.labels.Size())
|
||||
<< error::GroupSize() << "the size of label.";
|
||||
}
|
||||
if (ctx->IsCPU()) {
|
||||
this->InitOnCPU(ctx, info);
|
||||
} else {
|
||||
this->InitOnCUDA(ctx, info);
|
||||
}
|
||||
if (!info.weights_.Empty()) {
|
||||
CHECK_EQ(Groups(), info.weights_.Size()) << error::GroupWeight();
|
||||
}
|
||||
}
|
||||
[[nodiscard]] std::size_t MaxPositionSize() const {
|
||||
// Use truncation level as bound.
|
||||
if (param_.HasTruncation()) {
|
||||
return param_.NumPair();
|
||||
}
|
||||
// Hardcoded maximum size of positions to track. We don't need too many of them as the
|
||||
// bias decreases exponentially.
|
||||
return std::min(max_group_size_, static_cast<std::size_t>(32));
|
||||
}
|
||||
// Constructed as [1, n_samples] if group ptr is not supplied by the user
|
||||
common::Span<bst_group_t const> DataGroupPtr(Context const* ctx) const {
|
||||
group_ptr_.SetDevice(ctx->gpu_id);
|
||||
return ctx->IsCPU() ? group_ptr_.ConstHostSpan() : group_ptr_.ConstDeviceSpan();
|
||||
}
|
||||
|
||||
[[nodiscard]] auto const& Param() const { return param_; }
|
||||
[[nodiscard]] std::size_t Groups() const { return group_ptr_.Size() - 1; }
|
||||
[[nodiscard]] double WeightNorm() const { return weight_norm_; }
|
||||
|
||||
// Create a rank list by model prediction
|
||||
common::Span<std::size_t const> SortedIdx(Context const* ctx, common::Span<float const> predt) {
|
||||
if (sorted_idx_cache_.Empty()) {
|
||||
sorted_idx_cache_.SetDevice(ctx->gpu_id);
|
||||
sorted_idx_cache_.Resize(predt.size());
|
||||
}
|
||||
if (ctx->IsCPU()) {
|
||||
return this->MakeRankOnCPU(ctx, predt);
|
||||
} else {
|
||||
return this->MakeRankOnCUDA(ctx, predt);
|
||||
}
|
||||
}
|
||||
// The function simply returns a uninitialized buffer as this is only used by the
|
||||
// objective for creating pairs.
|
||||
common::Span<std::size_t> SortedIdxY(Context const* ctx, std::size_t n_samples) {
|
||||
CHECK(ctx->IsCUDA());
|
||||
if (y_sorted_idx_cache_.Empty()) {
|
||||
y_sorted_idx_cache_.SetDevice(ctx->gpu_id);
|
||||
y_sorted_idx_cache_.Resize(n_samples);
|
||||
}
|
||||
return y_sorted_idx_cache_.DeviceSpan();
|
||||
}
|
||||
common::Span<float> RankedY(Context const* ctx, std::size_t n_samples) {
|
||||
CHECK(ctx->IsCUDA());
|
||||
if (y_ranked_by_model_.Empty()) {
|
||||
y_ranked_by_model_.SetDevice(ctx->gpu_id);
|
||||
y_ranked_by_model_.Resize(n_samples);
|
||||
}
|
||||
return y_ranked_by_model_.DeviceSpan();
|
||||
}
|
||||
|
||||
// CUDA cache getters, the cache is shared between metric and objective, some of these
|
||||
// fields are lazy initialized to avoid unnecessary allocation.
|
||||
[[nodiscard]] common::Span<std::size_t const> CUDAThreadsGroupPtr() const {
|
||||
CHECK(!threads_group_ptr_.Empty());
|
||||
return threads_group_ptr_.ConstDeviceSpan();
|
||||
}
|
||||
[[nodiscard]] std::size_t CUDAThreads() const { return n_cuda_threads_; }
|
||||
|
||||
linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
|
||||
if (roundings_.Size() == 0) {
|
||||
roundings_.SetDevice(ctx->gpu_id);
|
||||
roundings_.Reshape(Groups());
|
||||
}
|
||||
return roundings_.View(ctx->gpu_id);
|
||||
}
|
||||
common::Span<double> CUDACostRounding(Context const* ctx) {
|
||||
if (cost_rounding_.Size() == 0) {
|
||||
cost_rounding_.SetDevice(ctx->gpu_id);
|
||||
cost_rounding_.Resize(1);
|
||||
}
|
||||
return cost_rounding_.DeviceSpan();
|
||||
}
|
||||
template <typename Type>
|
||||
common::Span<Type> MaxLambdas(Context const* ctx, std::size_t n) {
|
||||
max_lambdas_.SetDevice(ctx->gpu_id);
|
||||
std::size_t bytes = n * sizeof(Type);
|
||||
if (bytes != max_lambdas_.Size()) {
|
||||
max_lambdas_.Resize(bytes);
|
||||
}
|
||||
return common::Span<Type>{reinterpret_cast<Type*>(max_lambdas_.DevicePointer()), n};
|
||||
}
|
||||
};
|
||||
|
||||
class NDCGCache : public RankingCache {
|
||||
// NDCG discount
|
||||
HostDeviceVector<double> discounts_;
|
||||
// 1.0 / IDCG
|
||||
linalg::Vector<double> inv_idcg_;
|
||||
/**
|
||||
* CUDA cache
|
||||
*/
|
||||
// store the intermediate DCG calculation result for metric
|
||||
linalg::Vector<double> dcg_;
|
||||
|
||||
public:
|
||||
void InitOnCPU(Context const* ctx, MetaInfo const& info);
|
||||
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
|
||||
|
||||
public:
|
||||
NDCGCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
|
||||
: RankingCache{ctx, info, p} {
|
||||
if (ctx->IsCPU()) {
|
||||
this->InitOnCPU(ctx, info);
|
||||
} else {
|
||||
this->InitOnCUDA(ctx, info);
|
||||
}
|
||||
}
|
||||
|
||||
linalg::VectorView<double const> InvIDCG(Context const* ctx) const {
|
||||
return inv_idcg_.View(ctx->gpu_id);
|
||||
}
|
||||
common::Span<double const> Discount(Context const* ctx) const {
|
||||
return ctx->IsCPU() ? discounts_.ConstHostSpan() : discounts_.ConstDeviceSpan();
|
||||
}
|
||||
linalg::VectorView<double> Dcg(Context const* ctx) {
|
||||
if (dcg_.Size() == 0) {
|
||||
dcg_.SetDevice(ctx->gpu_id);
|
||||
dcg_.Reshape(this->Groups());
|
||||
}
|
||||
return dcg_.View(ctx->gpu_id);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Validate label for NDCG
|
||||
*
|
||||
* \tparam NoneOf Implementation of std::none_of. Specified as a parameter to reuse the
|
||||
* check for both CPU and GPU.
|
||||
*/
|
||||
template <typename NoneOf>
|
||||
void CheckNDCGLabels(ltr::LambdaRankParam const& p, linalg::VectorView<float const> labels,
|
||||
NoneOf none_of) {
|
||||
auto d_labels = labels.Values();
|
||||
if (p.ndcg_exp_gain) {
|
||||
auto label_is_integer =
|
||||
none_of(d_labels.data(), d_labels.data() + d_labels.size(), [] XGBOOST_DEVICE(float v) {
|
||||
auto l = std::floor(v);
|
||||
return std::fabs(l - v) > kRtEps || v < 0.0f;
|
||||
});
|
||||
CHECK(label_is_integer)
|
||||
<< "When using relevance degree as target, label must be either 0 or positive integer.";
|
||||
}
|
||||
|
||||
if (p.ndcg_exp_gain) {
|
||||
auto label_is_valid = none_of(d_labels.data(), d_labels.data() + d_labels.size(),
|
||||
[] XGBOOST_DEVICE(ltr::rel_degree_t v) { return v > MaxRel(); });
|
||||
CHECK(label_is_valid) << "Relevance degress must be lesser than or equal to " << MaxRel()
|
||||
<< " when the exponential NDCG gain function is used. "
|
||||
<< "Set `ndcg_exp_gain` to false to use custom DCG gain.";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Parse name for ranking metric given parameters.
|
||||
*
|
||||
|
||||
@ -1,16 +1,25 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR...
|
||||
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
|
||||
#include <xgboost/base.h> // for Args
|
||||
#include "test_ranking_utils.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for Args, bst_group_t, kRtEps
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for MetaInfo, DMatrix
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/logging.h> // for Error
|
||||
#include <xgboost/string_view.h> // for StringView
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <utility> // for pair
|
||||
#include <numeric> // for iota
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/numeric.h" // for Iota
|
||||
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
|
||||
#include "../helpers.h" // for EmptyDMatrix
|
||||
|
||||
namespace xgboost::ltr {
|
||||
TEST(RankingUtils, LambdaRankParam) {
|
||||
@ -66,4 +75,106 @@ TEST(RankingUtils, MakeMetricName) {
|
||||
name = MakeMetricName("map", 2, false);
|
||||
ASSERT_EQ(name, "map@2");
|
||||
}
|
||||
|
||||
void TestRankingCache(Context const* ctx) {
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
|
||||
info.num_row_ = 16;
|
||||
info.labels.Reshape(info.num_row_);
|
||||
auto& h_label = info.labels.Data()->HostVector();
|
||||
for (std::size_t i = 0; i < h_label.size(); ++i) {
|
||||
h_label[i] = i % 2;
|
||||
}
|
||||
|
||||
LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
RankingCache cache{ctx, info, param};
|
||||
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
|
||||
ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RankingCache, InitFromCPU) {
|
||||
Context ctx;
|
||||
TestRankingCache(&ctx);
|
||||
}
|
||||
|
||||
void TestNDCGCache(Context const* ctx) {
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
{
|
||||
// empty
|
||||
NDCGCache cache{ctx, info, param};
|
||||
ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2);
|
||||
}
|
||||
|
||||
info.num_row_ = 3;
|
||||
info.group_ptr_ = {static_cast<bst_group_t>(0), static_cast<bst_group_t>(info.num_row_)};
|
||||
|
||||
{
|
||||
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
|
||||
// empty label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
|
||||
// invalid label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
auto h_labels = info.labels.HostView();
|
||||
for (std::size_t i = 0; i < h_labels.Size(); ++i) {
|
||||
h_labels(i) *= 10;
|
||||
}
|
||||
param.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
|
||||
NDCGCache cache{ctx, info, param};
|
||||
Context cpuctx;
|
||||
auto inv_idcg = cache.InvIDCG(&cpuctx);
|
||||
ASSERT_EQ(inv_idcg.Size(), 1);
|
||||
ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps);
|
||||
}
|
||||
|
||||
{
|
||||
param.UpdateAllowUnknown(Args{{"lambdarank_unbiased", "false"}});
|
||||
|
||||
std::vector<float> h_data(32);
|
||||
|
||||
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
|
||||
info.labels.Reshape(h_data.size());
|
||||
info.num_row_ = h_data.size();
|
||||
info.group_ptr_.back() = info.num_row_;
|
||||
info.labels.Data()->HostVector() = std::move(h_data);
|
||||
|
||||
{
|
||||
NDCGCache cache{ctx, info, param};
|
||||
Context cpuctx;
|
||||
auto inv_idcg = cache.InvIDCG(&cpuctx);
|
||||
ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps);
|
||||
}
|
||||
|
||||
param.UpdateAllowUnknown(
|
||||
Args{{"lambdarank_num_pair_per_sample", "3"}, {"lambdarank_pair_method", "topk"}});
|
||||
{
|
||||
NDCGCache cache{ctx, info, param};
|
||||
Context cpuctx;
|
||||
auto inv_idcg = cache.InvIDCG(&cpuctx);
|
||||
ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NDCGCache, InitFromCPU) {
|
||||
Context ctx;
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
98
tests/cpp/common/test_ranking_utils.cu
Normal file
98
tests/cpp/common/test_ranking_utils.cu
Normal file
@ -0,0 +1,98 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for Args, XGBOOST_DEVICE, bst_group_t, kRtEps
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/linalg.h> // for MakeTensorView, Vector
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for shared_ptr
|
||||
#include <numeric> // for iota
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/algorithm.cuh" // for SegmentedSequence
|
||||
#include "../../../src/common/cuda_context.cuh" // for CUDAContext
|
||||
#include "../../../src/common/device_helpers.cuh" // for device_vector, ToSpan
|
||||
#include "../../../src/common/ranking_utils.cuh" // for CalcQueriesInvIDCG
|
||||
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, RankingCache
|
||||
#include "../helpers.h" // for EmptyDMatrix
|
||||
#include "test_ranking_utils.h" // for TestNDCGCache
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void TestCalcQueriesInvIDCG() {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
std::size_t n_groups = 5, n_samples_per_group = 32;
|
||||
|
||||
dh::device_vector<float> scores(n_samples_per_group * n_groups);
|
||||
dh::device_vector<bst_group_t> group_ptr(n_groups + 1);
|
||||
auto d_group_ptr = dh::ToSpan(group_ptr);
|
||||
dh::LaunchN(d_group_ptr.size(), ctx.CUDACtx()->Stream(),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { d_group_ptr[i] = i * n_samples_per_group; });
|
||||
|
||||
auto d_scores = dh::ToSpan(scores);
|
||||
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
|
||||
|
||||
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
|
||||
|
||||
ltr::LambdaRankParam p;
|
||||
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
|
||||
|
||||
cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
|
||||
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
|
||||
for (std::size_t i = 0; i < n_groups; ++i) {
|
||||
double inv_idcg = inv_IDCG(i);
|
||||
ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RankingUtils, CalcQueriesInvIDCG) { TestCalcQueriesInvIDCG(); }
|
||||
|
||||
namespace {
|
||||
void TestRankingCache(Context const* ctx) {
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
|
||||
info.num_row_ = 16;
|
||||
info.labels.Reshape(info.num_row_);
|
||||
auto& h_label = info.labels.Data()->HostVector();
|
||||
for (std::size_t i = 0; i < h_label.size(); ++i) {
|
||||
h_label[i] = i % 2;
|
||||
}
|
||||
|
||||
LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
RankingCache cache{ctx, info, param};
|
||||
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
std::vector<std::size_t> h_rank_idx(rank_idx.size());
|
||||
dh::CopyDeviceSpanToVector(&h_rank_idx, rank_idx);
|
||||
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
|
||||
ASSERT_EQ(h_rank_idx[i], h_rank_idx.size() - i - 1);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(RankingCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestRankingCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(NDCGCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
9
tests/cpp/common/test_ranking_utils.h
Normal file
9
tests/cpp/common/test_ranking_utils.h
Normal file
@ -0,0 +1,9 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void TestNDCGCache(Context const* ctx);
|
||||
} // namespace xgboost::ltr
|
||||
Loading…
x
Reference in New Issue
Block a user