Define a new ranking parameter. (#8887)
This commit is contained in:
parent
e8a69013e6
commit
46dfcc7d22
24
src/common/error_msg.h
Normal file
24
src/common/error_msg.h
Normal file
@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
*
|
||||
* \brief Common error message for various checks.
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_ERROR_MSG_H_
|
||||
#define XGBOOST_COMMON_ERROR_MSG_H_
|
||||
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
namespace xgboost::error {
|
||||
constexpr StringView GroupWeight() {
|
||||
return "Size of weight must equal to the number of query groups when ranking group is used.";
|
||||
}
|
||||
|
||||
constexpr StringView GroupSize() {
|
||||
return "Invalid query group structure. The number of rows obtained from group doesn't equal to ";
|
||||
}
|
||||
|
||||
constexpr StringView LabelScoreSize() {
|
||||
return "The size of label doesn't match the size of prediction.";
|
||||
}
|
||||
} // namespace xgboost::error
|
||||
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
||||
@ -3,15 +3,28 @@
|
||||
*/
|
||||
#include "ranking_utils.h"
|
||||
|
||||
#include <cstdint> // std::uint32_t
|
||||
#include <sstream> // std::ostringstream
|
||||
#include <string> // std::string,std::sscanf
|
||||
#include <algorithm> // for copy_n, max, min, none_of, all_of
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdio> // for sscanf
|
||||
#include <exception> // for exception
|
||||
#include <functional> // for greater
|
||||
#include <iterator> // for reverse_iterator
|
||||
#include <string> // for char_traits, string
|
||||
|
||||
#include "xgboost/string_view.h" // StringView
|
||||
#include "algorithm.h" // for ArgSort
|
||||
#include "linalg_op.h" // for cbegin, cend
|
||||
#include "optional_weight.h" // for MakeOptionalWeights
|
||||
#include "threading_utils.h" // for ParallelFor
|
||||
#include "xgboost/base.h" // for bst_group_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/linalg.h" // for All, TensorView, Range, Tensor, Vector
|
||||
#include "xgboost/logging.h" // for Error, LogCheck_EQ, CHECK_EQ
|
||||
|
||||
namespace xgboost {
|
||||
namespace ltr {
|
||||
std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus) {
|
||||
namespace xgboost::ltr {
|
||||
DMLC_REGISTER_PARAMETER(LambdaRankParam);
|
||||
|
||||
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
|
||||
std::string out_name;
|
||||
if (!param.empty()) {
|
||||
std::ostringstream os;
|
||||
@ -30,5 +43,18 @@ std::string MakeMetricName(StringView name, StringView param, std::uint32_t* top
|
||||
}
|
||||
return out_name;
|
||||
}
|
||||
} // namespace ltr
|
||||
} // namespace xgboost
|
||||
|
||||
std::string MakeMetricName(StringView name, position_t topn, bool minus) {
|
||||
std::ostringstream ss;
|
||||
if (topn == LambdaRankParam::NotSet()) {
|
||||
ss << name;
|
||||
} else {
|
||||
ss << name << "@" << topn;
|
||||
}
|
||||
if (minus) {
|
||||
ss << "-";
|
||||
}
|
||||
std::string out_name = ss.str();
|
||||
return out_name;
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
@ -3,17 +3,131 @@
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_RANKING_UTILS_H_
|
||||
#define XGBOOST_COMMON_RANKING_UTILS_H_
|
||||
#include <algorithm> // for min
|
||||
#include <cmath> // for log2, fabs, floor
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for uint32_t, uint8_t, int32_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <string> // for char_traits, string
|
||||
#include <vector> // for vector
|
||||
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstdint> // std::uint32_t
|
||||
#include <string> // std::string
|
||||
#include "./math.h" // for CloseTo
|
||||
#include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD
|
||||
#include "error_msg.h" // for GroupWeight, GroupSize
|
||||
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/linalg.h" // for Vector, VectorView, Tensor
|
||||
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK
|
||||
#include "xgboost/parameter.h" // for XGBoostParameter
|
||||
#include "xgboost/span.h" // for Span
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
#include "xgboost/string_view.h" // StringView
|
||||
|
||||
namespace xgboost {
|
||||
namespace ltr {
|
||||
namespace xgboost::ltr {
|
||||
/**
|
||||
* \brief Construct name for ranking metric given parameters.
|
||||
* \brief Relevance degree
|
||||
*/
|
||||
using rel_degree_t = std::uint32_t; // NOLINT
|
||||
/**
|
||||
* \brief top-k position
|
||||
*/
|
||||
using position_t = std::uint32_t; // NOLINT
|
||||
|
||||
enum class PairMethod : std::int32_t {
|
||||
kTopK = 0,
|
||||
kMean = 1,
|
||||
};
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::ltr::PairMethod);
|
||||
|
||||
namespace xgboost::ltr {
|
||||
struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
private:
|
||||
static constexpr position_t DefaultK() { return 32; }
|
||||
static constexpr position_t DefaultSamplePairs() { return 1; }
|
||||
|
||||
protected:
|
||||
// pairs
|
||||
// should be accessed by getter for auto configuration.
|
||||
// nolint so that we can keep the string name.
|
||||
PairMethod lambdarank_pair_method{PairMethod::kMean}; // NOLINT
|
||||
std::size_t lambdarank_num_pair_per_sample{NotSet()}; // NOLINT
|
||||
|
||||
public:
|
||||
static constexpr position_t NotSet() { return std::numeric_limits<position_t>::max(); }
|
||||
|
||||
// unbiased
|
||||
bool lambdarank_unbiased{false};
|
||||
double lambdarank_bias_norm{2.0};
|
||||
// ndcg
|
||||
bool ndcg_exp_gain{true};
|
||||
|
||||
bool operator==(LambdaRankParam const& that) const {
|
||||
return lambdarank_pair_method == that.lambdarank_pair_method &&
|
||||
lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&
|
||||
lambdarank_unbiased == that.lambdarank_unbiased &&
|
||||
lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;
|
||||
}
|
||||
bool operator!=(LambdaRankParam const& that) const { return !(*this == that); }
|
||||
|
||||
[[nodiscard]] double Regularizer() const { return 1.0 / (1.0 + this->lambdarank_bias_norm); }
|
||||
|
||||
/**
|
||||
* \brief Get number of pairs for each sample
|
||||
*/
|
||||
[[nodiscard]] position_t NumPair() const {
|
||||
if (lambdarank_num_pair_per_sample == NotSet()) {
|
||||
switch (lambdarank_pair_method) {
|
||||
case PairMethod::kMean:
|
||||
return DefaultSamplePairs();
|
||||
case PairMethod::kTopK:
|
||||
return DefaultK();
|
||||
}
|
||||
} else {
|
||||
return lambdarank_num_pair_per_sample;
|
||||
}
|
||||
LOG(FATAL) << "Unreachable.";
|
||||
return 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasTruncation() const { return lambdarank_pair_method == PairMethod::kTopK; }
|
||||
|
||||
// Used for evaluation metric and cache initialization, iterate through top-k or the whole list
|
||||
[[nodiscard]] auto TopK() const {
|
||||
if (HasTruncation()) {
|
||||
return NumPair();
|
||||
} else {
|
||||
return NotSet();
|
||||
}
|
||||
}
|
||||
|
||||
DMLC_DECLARE_PARAMETER(LambdaRankParam) {
|
||||
DMLC_DECLARE_FIELD(lambdarank_pair_method)
|
||||
.set_default(PairMethod::kMean)
|
||||
.add_enum("mean", PairMethod::kMean)
|
||||
.add_enum("topk", PairMethod::kTopK)
|
||||
.describe("Method for constructing pairs.");
|
||||
DMLC_DECLARE_FIELD(lambdarank_num_pair_per_sample)
|
||||
.set_default(NotSet())
|
||||
.set_lower_bound(1)
|
||||
.describe("Number of pairs for each sample in the list.");
|
||||
DMLC_DECLARE_FIELD(lambdarank_unbiased)
|
||||
.set_default(false)
|
||||
.describe("Unbiased lambda mart. Use IPW to debias click position");
|
||||
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
|
||||
.set_default(2.0)
|
||||
.set_lower_bound(0.0)
|
||||
.describe("Lp regularization for unbiased lambdarank.");
|
||||
DMLC_DECLARE_FIELD(ndcg_exp_gain)
|
||||
.set_default(true)
|
||||
.describe("When set to true, the label gain is 2^rel - 1, otherwise it's rel.");
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Parse name for ranking metric given parameters.
|
||||
*
|
||||
* \param [in] name Null terminated string for metric name
|
||||
* \param [in] param Null terminated string for parameter like the `3-` in `ndcg@3-`.
|
||||
@ -23,7 +137,11 @@ namespace ltr {
|
||||
*
|
||||
* \return The name of the metric.
|
||||
*/
|
||||
std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus);
|
||||
} // namespace ltr
|
||||
} // namespace xgboost
|
||||
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus);
|
||||
|
||||
/**
|
||||
* \brief Parse name for ranking metric given parameters.
|
||||
*/
|
||||
std::string MakeMetricName(StringView name, position_t topn, bool minus);
|
||||
} // namespace xgboost::ltr
|
||||
#endif // XGBOOST_COMMON_RANKING_UTILS_H_
|
||||
|
||||
@ -43,27 +43,24 @@ XGBOOST_DEVICE inline std::size_t DiscreteTrapezoidArea(std::size_t n, std::size
|
||||
* with h <= n
|
||||
*/
|
||||
template <typename U>
|
||||
inline size_t
|
||||
SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
|
||||
xgboost::common::Span<size_t> out_group_threads_ptr,
|
||||
size_t h) {
|
||||
std::size_t SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
|
||||
xgboost::common::Span<std::size_t> out_group_threads_ptr,
|
||||
std::size_t h) {
|
||||
CHECK_GE(group_ptr.size(), 1);
|
||||
CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size());
|
||||
dh::LaunchN(
|
||||
group_ptr.size(), [=] XGBOOST_DEVICE(size_t idx) {
|
||||
dh::LaunchN(group_ptr.size(), [=] XGBOOST_DEVICE(std::size_t idx) {
|
||||
if (idx == 0) {
|
||||
out_group_threads_ptr[0] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
size_t cnt = static_cast<size_t>(group_ptr[idx] - group_ptr[idx - 1]);
|
||||
std::size_t cnt = static_cast<std::size_t>(group_ptr[idx] - group_ptr[idx - 1]);
|
||||
out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
|
||||
});
|
||||
dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(),
|
||||
out_group_threads_ptr.size());
|
||||
size_t total = 0;
|
||||
dh::safe_cuda(cudaMemcpy(
|
||||
&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
|
||||
std::size_t total = 0;
|
||||
dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
|
||||
sizeof(total), cudaMemcpyDeviceToHost));
|
||||
return total;
|
||||
}
|
||||
@ -71,8 +68,8 @@ SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
|
||||
/**
|
||||
* Called inside kernel to obtain coordinate from trapezoid grid.
|
||||
*/
|
||||
XGBOOST_DEVICE inline void UnravelTrapeziodIdx(size_t i_idx, size_t n,
|
||||
size_t *out_i, size_t *out_j) {
|
||||
XGBOOST_DEVICE inline void UnravelTrapeziodIdx(std::size_t i_idx, std::size_t n, std::size_t *out_i,
|
||||
std::size_t *out_j) {
|
||||
auto &i = *out_i;
|
||||
auto &j = *out_j;
|
||||
double idx = static_cast<double>(i_idx);
|
||||
|
||||
@ -234,7 +234,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
|
||||
|
||||
protected:
|
||||
explicit EvalRank(const char* name, const char* param) {
|
||||
this->name = ltr::MakeMetricName(name, param, &topn, &minus);
|
||||
this->name = ltr::ParseMetricName(name, param, &topn, &minus);
|
||||
}
|
||||
|
||||
virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;
|
||||
|
||||
@ -1,38 +1,69 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR...
|
||||
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
|
||||
#include <xgboost/base.h> // for Args
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/string_view.h> // for StringView
|
||||
|
||||
#include <cstdint> // std::uint32_t
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <utility> // for pair
|
||||
|
||||
#include "../../../src/common/ranking_utils.h"
|
||||
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
|
||||
|
||||
namespace xgboost {
|
||||
namespace ltr {
|
||||
TEST(RankingUtils, MakeMetricName) {
|
||||
namespace xgboost::ltr {
|
||||
TEST(RankingUtils, LambdaRankParam) {
|
||||
// make sure no memory is shared in dmlc parameter.
|
||||
LambdaRankParam p0;
|
||||
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "3"}});
|
||||
ASSERT_EQ(p0.NumPair(), 3);
|
||||
|
||||
LambdaRankParam p1;
|
||||
p1.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "8"}});
|
||||
|
||||
ASSERT_EQ(p0.NumPair(), 3);
|
||||
ASSERT_EQ(p1.NumPair(), 8);
|
||||
|
||||
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "17"}});
|
||||
ASSERT_EQ(p0.NumPair(), 17);
|
||||
ASSERT_EQ(p1.NumPair(), 8);
|
||||
}
|
||||
|
||||
TEST(RankingUtils, ParseMetricName) {
|
||||
std::uint32_t topn{32};
|
||||
bool minus{false};
|
||||
auto name = MakeMetricName("ndcg", "3-", &topn, &minus);
|
||||
auto name = ParseMetricName("ndcg", "3-", &topn, &minus);
|
||||
ASSERT_EQ(name, "ndcg@3-");
|
||||
ASSERT_EQ(topn, 3);
|
||||
ASSERT_TRUE(minus);
|
||||
|
||||
name = MakeMetricName("ndcg", "6", &topn, &minus);
|
||||
name = ParseMetricName("ndcg", "6", &topn, &minus);
|
||||
ASSERT_EQ(topn, 6);
|
||||
ASSERT_TRUE(minus); // unchanged
|
||||
|
||||
minus = false;
|
||||
name = MakeMetricName("ndcg", "-", &topn, &minus);
|
||||
name = ParseMetricName("ndcg", "-", &topn, &minus);
|
||||
ASSERT_EQ(topn, 6); // unchanged
|
||||
ASSERT_TRUE(minus);
|
||||
|
||||
name = MakeMetricName("ndcg", nullptr, &topn, &minus);
|
||||
name = ParseMetricName("ndcg", nullptr, &topn, &minus);
|
||||
ASSERT_EQ(topn, 6); // unchanged
|
||||
ASSERT_TRUE(minus); // unchanged
|
||||
|
||||
name = MakeMetricName("ndcg", StringView{}, &topn, &minus);
|
||||
name = ParseMetricName("ndcg", StringView{}, &topn, &minus);
|
||||
ASSERT_EQ(topn, 6); // unchanged
|
||||
ASSERT_TRUE(minus); // unchanged
|
||||
}
|
||||
} // namespace ltr
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(RankingUtils, MakeMetricName) {
|
||||
auto name = MakeMetricName("map", LambdaRankParam::NotSet(), true);
|
||||
ASSERT_EQ(name, "map-");
|
||||
name = MakeMetricName("map", LambdaRankParam::NotSet(), false);
|
||||
ASSERT_EQ(name, "map");
|
||||
name = MakeMetricName("map", 2, true);
|
||||
ASSERT_EQ(name, "map@2-");
|
||||
name = MakeMetricName("map", 2, false);
|
||||
ASSERT_EQ(name, "map@2");
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user