Implement NDCG cache. (#8893)
This commit is contained in:
@@ -11,7 +11,6 @@
|
||||
#include <string> // for char_traits, string
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "./math.h" // for CloseTo
|
||||
#include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD
|
||||
#include "error_msg.h" // for GroupWeight, GroupSize
|
||||
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
|
||||
@@ -19,7 +18,7 @@
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/linalg.h" // for Vector, VectorView, Tensor
|
||||
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK
|
||||
#include "xgboost/logging.h" // for CHECK_EQ, CHECK
|
||||
#include "xgboost/parameter.h" // for XGBoostParameter
|
||||
#include "xgboost/span.h" // for Span
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
@@ -34,6 +33,25 @@ using rel_degree_t = std::uint32_t; // NOLINT
|
||||
*/
|
||||
using position_t = std::uint32_t; // NOLINT
|
||||
|
||||
/**
|
||||
* \brief Maximum relevance degree for NDCG
|
||||
*/
|
||||
constexpr std::size_t MaxRel() { return sizeof(rel_degree_t) * 8 - 1; }
|
||||
static_assert(MaxRel() == 31);
|
||||
|
||||
XGBOOST_DEVICE inline double CalcDCGGain(rel_degree_t label) {
|
||||
return static_cast<double>((1u << label) - 1);
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE inline double CalcDCGDiscount(std::size_t idx) {
|
||||
return 1.0 / std::log2(static_cast<double>(idx) + 2.0);
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE inline double CalcInvIDCG(double idcg) {
|
||||
auto inv_idcg = (idcg == 0.0 ? 0.0 : (1.0 / idcg)); // handle irrelevant document
|
||||
return inv_idcg;
|
||||
}
|
||||
|
||||
enum class PairMethod : std::int32_t {
|
||||
kTopK = 0,
|
||||
kMean = 1,
|
||||
@@ -115,7 +133,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
.describe("Number of pairs for each sample in the list.");
|
||||
DMLC_DECLARE_FIELD(lambdarank_unbiased)
|
||||
.set_default(false)
|
||||
.describe("Unbiased lambda mart. Use IPW to debias click position");
|
||||
.describe("Unbiased lambda mart. Use extended IPW to debias click position");
|
||||
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
|
||||
.set_default(2.0)
|
||||
.set_lower_bound(0.0)
|
||||
@@ -126,6 +144,220 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Common cached items for ranking tasks.
|
||||
*/
|
||||
class RankingCache {
|
||||
private:
|
||||
void InitOnCPU(Context const* ctx, MetaInfo const& info);
|
||||
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
|
||||
// Cached parameter
|
||||
LambdaRankParam param_;
|
||||
// offset to data groups.
|
||||
HostDeviceVector<bst_group_t> group_ptr_;
|
||||
// store the sorted index of prediction.
|
||||
HostDeviceVector<std::size_t> sorted_idx_cache_;
|
||||
// Maximum size of group
|
||||
std::size_t max_group_size_{0};
|
||||
// Normalization for weight
|
||||
double weight_norm_{1.0};
|
||||
/**
|
||||
* CUDA cache
|
||||
*/
|
||||
// offset to threads assigned to each group for gradient calculation
|
||||
HostDeviceVector<std::size_t> threads_group_ptr_;
|
||||
// Sorted index of label for finding buckets.
|
||||
HostDeviceVector<std::size_t> y_sorted_idx_cache_;
|
||||
// Cached labels sorted by the model
|
||||
HostDeviceVector<float> y_ranked_by_model_;
|
||||
// store rounding factor for objective for each group
|
||||
linalg::Vector<GradientPair> roundings_;
|
||||
// rounding factor for cost
|
||||
HostDeviceVector<double> cost_rounding_;
|
||||
// temporary storage for creating rounding factors. Stored as byte to avoid having cuda
|
||||
// data structure in here.
|
||||
HostDeviceVector<std::uint8_t> max_lambdas_;
|
||||
// total number of cuda threads used for gradient calculation
|
||||
std::size_t n_cuda_threads_{0};
|
||||
|
||||
// Create model rank list on GPU
|
||||
common::Span<std::size_t const> MakeRankOnCUDA(Context const* ctx,
|
||||
common::Span<float const> predt);
|
||||
// Create model rank list on CPU
|
||||
common::Span<std::size_t const> MakeRankOnCPU(Context const* ctx,
|
||||
common::Span<float const> predt);
|
||||
|
||||
protected:
|
||||
[[nodiscard]] std::size_t MaxGroupSize() const { return max_group_size_; }
|
||||
|
||||
public:
|
||||
RankingCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p) : param_{p} {
|
||||
CHECK(param_.GetInitialised());
|
||||
if (!info.group_ptr_.empty()) {
|
||||
CHECK_EQ(info.group_ptr_.back(), info.labels.Size())
|
||||
<< error::GroupSize() << "the size of label.";
|
||||
}
|
||||
if (ctx->IsCPU()) {
|
||||
this->InitOnCPU(ctx, info);
|
||||
} else {
|
||||
this->InitOnCUDA(ctx, info);
|
||||
}
|
||||
if (!info.weights_.Empty()) {
|
||||
CHECK_EQ(Groups(), info.weights_.Size()) << error::GroupWeight();
|
||||
}
|
||||
}
|
||||
[[nodiscard]] std::size_t MaxPositionSize() const {
|
||||
// Use truncation level as bound.
|
||||
if (param_.HasTruncation()) {
|
||||
return param_.NumPair();
|
||||
}
|
||||
// Hardcoded maximum size of positions to track. We don't need too many of them as the
|
||||
// bias decreases exponentially.
|
||||
return std::min(max_group_size_, static_cast<std::size_t>(32));
|
||||
}
|
||||
// Constructed as [1, n_samples] if group ptr is not supplied by the user
|
||||
common::Span<bst_group_t const> DataGroupPtr(Context const* ctx) const {
|
||||
group_ptr_.SetDevice(ctx->gpu_id);
|
||||
return ctx->IsCPU() ? group_ptr_.ConstHostSpan() : group_ptr_.ConstDeviceSpan();
|
||||
}
|
||||
|
||||
[[nodiscard]] auto const& Param() const { return param_; }
|
||||
[[nodiscard]] std::size_t Groups() const { return group_ptr_.Size() - 1; }
|
||||
[[nodiscard]] double WeightNorm() const { return weight_norm_; }
|
||||
|
||||
// Create a rank list by model prediction
|
||||
common::Span<std::size_t const> SortedIdx(Context const* ctx, common::Span<float const> predt) {
|
||||
if (sorted_idx_cache_.Empty()) {
|
||||
sorted_idx_cache_.SetDevice(ctx->gpu_id);
|
||||
sorted_idx_cache_.Resize(predt.size());
|
||||
}
|
||||
if (ctx->IsCPU()) {
|
||||
return this->MakeRankOnCPU(ctx, predt);
|
||||
} else {
|
||||
return this->MakeRankOnCUDA(ctx, predt);
|
||||
}
|
||||
}
|
||||
// The function simply returns a uninitialized buffer as this is only used by the
|
||||
// objective for creating pairs.
|
||||
common::Span<std::size_t> SortedIdxY(Context const* ctx, std::size_t n_samples) {
|
||||
CHECK(ctx->IsCUDA());
|
||||
if (y_sorted_idx_cache_.Empty()) {
|
||||
y_sorted_idx_cache_.SetDevice(ctx->gpu_id);
|
||||
y_sorted_idx_cache_.Resize(n_samples);
|
||||
}
|
||||
return y_sorted_idx_cache_.DeviceSpan();
|
||||
}
|
||||
common::Span<float> RankedY(Context const* ctx, std::size_t n_samples) {
|
||||
CHECK(ctx->IsCUDA());
|
||||
if (y_ranked_by_model_.Empty()) {
|
||||
y_ranked_by_model_.SetDevice(ctx->gpu_id);
|
||||
y_ranked_by_model_.Resize(n_samples);
|
||||
}
|
||||
return y_ranked_by_model_.DeviceSpan();
|
||||
}
|
||||
|
||||
// CUDA cache getters, the cache is shared between metric and objective, some of these
|
||||
// fields are lazy initialized to avoid unnecessary allocation.
|
||||
[[nodiscard]] common::Span<std::size_t const> CUDAThreadsGroupPtr() const {
|
||||
CHECK(!threads_group_ptr_.Empty());
|
||||
return threads_group_ptr_.ConstDeviceSpan();
|
||||
}
|
||||
[[nodiscard]] std::size_t CUDAThreads() const { return n_cuda_threads_; }
|
||||
|
||||
linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
|
||||
if (roundings_.Size() == 0) {
|
||||
roundings_.SetDevice(ctx->gpu_id);
|
||||
roundings_.Reshape(Groups());
|
||||
}
|
||||
return roundings_.View(ctx->gpu_id);
|
||||
}
|
||||
common::Span<double> CUDACostRounding(Context const* ctx) {
|
||||
if (cost_rounding_.Size() == 0) {
|
||||
cost_rounding_.SetDevice(ctx->gpu_id);
|
||||
cost_rounding_.Resize(1);
|
||||
}
|
||||
return cost_rounding_.DeviceSpan();
|
||||
}
|
||||
template <typename Type>
|
||||
common::Span<Type> MaxLambdas(Context const* ctx, std::size_t n) {
|
||||
max_lambdas_.SetDevice(ctx->gpu_id);
|
||||
std::size_t bytes = n * sizeof(Type);
|
||||
if (bytes != max_lambdas_.Size()) {
|
||||
max_lambdas_.Resize(bytes);
|
||||
}
|
||||
return common::Span<Type>{reinterpret_cast<Type*>(max_lambdas_.DevicePointer()), n};
|
||||
}
|
||||
};
|
||||
|
||||
class NDCGCache : public RankingCache {
|
||||
// NDCG discount
|
||||
HostDeviceVector<double> discounts_;
|
||||
// 1.0 / IDCG
|
||||
linalg::Vector<double> inv_idcg_;
|
||||
/**
|
||||
* CUDA cache
|
||||
*/
|
||||
// store the intermediate DCG calculation result for metric
|
||||
linalg::Vector<double> dcg_;
|
||||
|
||||
public:
|
||||
void InitOnCPU(Context const* ctx, MetaInfo const& info);
|
||||
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
|
||||
|
||||
public:
|
||||
NDCGCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
|
||||
: RankingCache{ctx, info, p} {
|
||||
if (ctx->IsCPU()) {
|
||||
this->InitOnCPU(ctx, info);
|
||||
} else {
|
||||
this->InitOnCUDA(ctx, info);
|
||||
}
|
||||
}
|
||||
|
||||
linalg::VectorView<double const> InvIDCG(Context const* ctx) const {
|
||||
return inv_idcg_.View(ctx->gpu_id);
|
||||
}
|
||||
common::Span<double const> Discount(Context const* ctx) const {
|
||||
return ctx->IsCPU() ? discounts_.ConstHostSpan() : discounts_.ConstDeviceSpan();
|
||||
}
|
||||
linalg::VectorView<double> Dcg(Context const* ctx) {
|
||||
if (dcg_.Size() == 0) {
|
||||
dcg_.SetDevice(ctx->gpu_id);
|
||||
dcg_.Reshape(this->Groups());
|
||||
}
|
||||
return dcg_.View(ctx->gpu_id);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Validate label for NDCG
|
||||
*
|
||||
* \tparam NoneOf Implementation of std::none_of. Specified as a parameter to reuse the
|
||||
* check for both CPU and GPU.
|
||||
*/
|
||||
template <typename NoneOf>
|
||||
void CheckNDCGLabels(ltr::LambdaRankParam const& p, linalg::VectorView<float const> labels,
|
||||
NoneOf none_of) {
|
||||
auto d_labels = labels.Values();
|
||||
if (p.ndcg_exp_gain) {
|
||||
auto label_is_integer =
|
||||
none_of(d_labels.data(), d_labels.data() + d_labels.size(), [] XGBOOST_DEVICE(float v) {
|
||||
auto l = std::floor(v);
|
||||
return std::fabs(l - v) > kRtEps || v < 0.0f;
|
||||
});
|
||||
CHECK(label_is_integer)
|
||||
<< "When using relevance degree as target, label must be either 0 or positive integer.";
|
||||
}
|
||||
|
||||
if (p.ndcg_exp_gain) {
|
||||
auto label_is_valid = none_of(d_labels.data(), d_labels.data() + d_labels.size(),
|
||||
[] XGBOOST_DEVICE(ltr::rel_degree_t v) { return v > MaxRel(); });
|
||||
CHECK(label_is_valid) << "Relevance degress must be lesser than or equal to " << MaxRel()
|
||||
<< " when the exponential NDCG gain function is used. "
|
||||
<< "Set `ndcg_exp_gain` to false to use custom DCG gain.";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Parse name for ranking metric given parameters.
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user