Define a new ranking parameter. (#8887)
This commit is contained in:
parent
e8a69013e6
commit
46dfcc7d22
24
src/common/error_msg.h
Normal file
24
src/common/error_msg.h
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2023 by XGBoost contributors
|
||||||
|
*
|
||||||
|
* \brief Common error message for various checks.
|
||||||
|
*/
|
||||||
|
#ifndef XGBOOST_COMMON_ERROR_MSG_H_
|
||||||
|
#define XGBOOST_COMMON_ERROR_MSG_H_
|
||||||
|
|
||||||
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
|
namespace xgboost::error {
|
||||||
|
constexpr StringView GroupWeight() {
|
||||||
|
return "Size of weight must equal to the number of query groups when ranking group is used.";
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr StringView GroupSize() {
|
||||||
|
return "Invalid query group structure. The number of rows obtained from group doesn't equal to ";
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr StringView LabelScoreSize() {
|
||||||
|
return "The size of label doesn't match the size of prediction.";
|
||||||
|
}
|
||||||
|
} // namespace xgboost::error
|
||||||
|
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
||||||
@ -3,15 +3,28 @@
|
|||||||
*/
|
*/
|
||||||
#include "ranking_utils.h"
|
#include "ranking_utils.h"
|
||||||
|
|
||||||
#include <cstdint> // std::uint32_t
|
#include <algorithm> // for copy_n, max, min, none_of, all_of
|
||||||
#include <sstream> // std::ostringstream
|
#include <cstddef> // for size_t
|
||||||
#include <string> // std::string,std::sscanf
|
#include <cstdio> // for sscanf
|
||||||
|
#include <exception> // for exception
|
||||||
|
#include <functional> // for greater
|
||||||
|
#include <iterator> // for reverse_iterator
|
||||||
|
#include <string> // for char_traits, string
|
||||||
|
|
||||||
#include "xgboost/string_view.h" // StringView
|
#include "algorithm.h" // for ArgSort
|
||||||
|
#include "linalg_op.h" // for cbegin, cend
|
||||||
|
#include "optional_weight.h" // for MakeOptionalWeights
|
||||||
|
#include "threading_utils.h" // for ParallelFor
|
||||||
|
#include "xgboost/base.h" // for bst_group_t
|
||||||
|
#include "xgboost/context.h" // for Context
|
||||||
|
#include "xgboost/data.h" // for MetaInfo
|
||||||
|
#include "xgboost/linalg.h" // for All, TensorView, Range, Tensor, Vector
|
||||||
|
#include "xgboost/logging.h" // for Error, LogCheck_EQ, CHECK_EQ
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::ltr {
|
||||||
namespace ltr {
|
DMLC_REGISTER_PARAMETER(LambdaRankParam);
|
||||||
std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus) {
|
|
||||||
|
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
|
||||||
std::string out_name;
|
std::string out_name;
|
||||||
if (!param.empty()) {
|
if (!param.empty()) {
|
||||||
std::ostringstream os;
|
std::ostringstream os;
|
||||||
@ -30,5 +43,18 @@ std::string MakeMetricName(StringView name, StringView param, std::uint32_t* top
|
|||||||
}
|
}
|
||||||
return out_name;
|
return out_name;
|
||||||
}
|
}
|
||||||
} // namespace ltr
|
|
||||||
} // namespace xgboost
|
std::string MakeMetricName(StringView name, position_t topn, bool minus) {
|
||||||
|
std::ostringstream ss;
|
||||||
|
if (topn == LambdaRankParam::NotSet()) {
|
||||||
|
ss << name;
|
||||||
|
} else {
|
||||||
|
ss << name << "@" << topn;
|
||||||
|
}
|
||||||
|
if (minus) {
|
||||||
|
ss << "-";
|
||||||
|
}
|
||||||
|
std::string out_name = ss.str();
|
||||||
|
return out_name;
|
||||||
|
}
|
||||||
|
} // namespace xgboost::ltr
|
||||||
|
|||||||
@ -3,17 +3,131 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef XGBOOST_COMMON_RANKING_UTILS_H_
|
#ifndef XGBOOST_COMMON_RANKING_UTILS_H_
|
||||||
#define XGBOOST_COMMON_RANKING_UTILS_H_
|
#define XGBOOST_COMMON_RANKING_UTILS_H_
|
||||||
|
#include <algorithm> // for min
|
||||||
|
#include <cmath> // for log2, fabs, floor
|
||||||
|
#include <cstddef> // for size_t
|
||||||
|
#include <cstdint> // for uint32_t, uint8_t, int32_t
|
||||||
|
#include <limits> // for numeric_limits
|
||||||
|
#include <string> // for char_traits, string
|
||||||
|
#include <vector> // for vector
|
||||||
|
|
||||||
#include <cstddef> // std::size_t
|
#include "./math.h" // for CloseTo
|
||||||
#include <cstdint> // std::uint32_t
|
#include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD
|
||||||
#include <string> // std::string
|
#include "error_msg.h" // for GroupWeight, GroupSize
|
||||||
|
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
|
||||||
|
#include "xgboost/context.h" // for Context
|
||||||
|
#include "xgboost/data.h" // for MetaInfo
|
||||||
|
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||||
|
#include "xgboost/linalg.h" // for Vector, VectorView, Tensor
|
||||||
|
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK
|
||||||
|
#include "xgboost/parameter.h" // for XGBoostParameter
|
||||||
|
#include "xgboost/span.h" // for Span
|
||||||
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
#include "xgboost/string_view.h" // StringView
|
namespace xgboost::ltr {
|
||||||
|
|
||||||
namespace xgboost {
|
|
||||||
namespace ltr {
|
|
||||||
/**
|
/**
|
||||||
* \brief Construct name for ranking metric given parameters.
|
* \brief Relevance degree
|
||||||
|
*/
|
||||||
|
using rel_degree_t = std::uint32_t; // NOLINT
|
||||||
|
/**
|
||||||
|
* \brief top-k position
|
||||||
|
*/
|
||||||
|
using position_t = std::uint32_t; // NOLINT
|
||||||
|
|
||||||
|
enum class PairMethod : std::int32_t {
|
||||||
|
kTopK = 0,
|
||||||
|
kMean = 1,
|
||||||
|
};
|
||||||
|
} // namespace xgboost::ltr
|
||||||
|
|
||||||
|
DECLARE_FIELD_ENUM_CLASS(xgboost::ltr::PairMethod);
|
||||||
|
|
||||||
|
namespace xgboost::ltr {
|
||||||
|
struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||||
|
private:
|
||||||
|
static constexpr position_t DefaultK() { return 32; }
|
||||||
|
static constexpr position_t DefaultSamplePairs() { return 1; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// pairs
|
||||||
|
// should be accessed by getter for auto configuration.
|
||||||
|
// nolint so that we can keep the string name.
|
||||||
|
PairMethod lambdarank_pair_method{PairMethod::kMean}; // NOLINT
|
||||||
|
std::size_t lambdarank_num_pair_per_sample{NotSet()}; // NOLINT
|
||||||
|
|
||||||
|
public:
|
||||||
|
static constexpr position_t NotSet() { return std::numeric_limits<position_t>::max(); }
|
||||||
|
|
||||||
|
// unbiased
|
||||||
|
bool lambdarank_unbiased{false};
|
||||||
|
double lambdarank_bias_norm{2.0};
|
||||||
|
// ndcg
|
||||||
|
bool ndcg_exp_gain{true};
|
||||||
|
|
||||||
|
bool operator==(LambdaRankParam const& that) const {
|
||||||
|
return lambdarank_pair_method == that.lambdarank_pair_method &&
|
||||||
|
lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&
|
||||||
|
lambdarank_unbiased == that.lambdarank_unbiased &&
|
||||||
|
lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;
|
||||||
|
}
|
||||||
|
bool operator!=(LambdaRankParam const& that) const { return !(*this == that); }
|
||||||
|
|
||||||
|
[[nodiscard]] double Regularizer() const { return 1.0 / (1.0 + this->lambdarank_bias_norm); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Get number of pairs for each sample
|
||||||
|
*/
|
||||||
|
[[nodiscard]] position_t NumPair() const {
|
||||||
|
if (lambdarank_num_pair_per_sample == NotSet()) {
|
||||||
|
switch (lambdarank_pair_method) {
|
||||||
|
case PairMethod::kMean:
|
||||||
|
return DefaultSamplePairs();
|
||||||
|
case PairMethod::kTopK:
|
||||||
|
return DefaultK();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return lambdarank_num_pair_per_sample;
|
||||||
|
}
|
||||||
|
LOG(FATAL) << "Unreachable.";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool HasTruncation() const { return lambdarank_pair_method == PairMethod::kTopK; }
|
||||||
|
|
||||||
|
// Used for evaluation metric and cache initialization, iterate through top-k or the whole list
|
||||||
|
[[nodiscard]] auto TopK() const {
|
||||||
|
if (HasTruncation()) {
|
||||||
|
return NumPair();
|
||||||
|
} else {
|
||||||
|
return NotSet();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DMLC_DECLARE_PARAMETER(LambdaRankParam) {
|
||||||
|
DMLC_DECLARE_FIELD(lambdarank_pair_method)
|
||||||
|
.set_default(PairMethod::kMean)
|
||||||
|
.add_enum("mean", PairMethod::kMean)
|
||||||
|
.add_enum("topk", PairMethod::kTopK)
|
||||||
|
.describe("Method for constructing pairs.");
|
||||||
|
DMLC_DECLARE_FIELD(lambdarank_num_pair_per_sample)
|
||||||
|
.set_default(NotSet())
|
||||||
|
.set_lower_bound(1)
|
||||||
|
.describe("Number of pairs for each sample in the list.");
|
||||||
|
DMLC_DECLARE_FIELD(lambdarank_unbiased)
|
||||||
|
.set_default(false)
|
||||||
|
.describe("Unbiased lambda mart. Use IPW to debias click position");
|
||||||
|
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
|
||||||
|
.set_default(2.0)
|
||||||
|
.set_lower_bound(0.0)
|
||||||
|
.describe("Lp regularization for unbiased lambdarank.");
|
||||||
|
DMLC_DECLARE_FIELD(ndcg_exp_gain)
|
||||||
|
.set_default(true)
|
||||||
|
.describe("When set to true, the label gain is 2^rel - 1, otherwise it's rel.");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Parse name for ranking metric given parameters.
|
||||||
*
|
*
|
||||||
* \param [in] name Null terminated string for metric name
|
* \param [in] name Null terminated string for metric name
|
||||||
* \param [in] param Null terminated string for parameter like the `3-` in `ndcg@3-`.
|
* \param [in] param Null terminated string for parameter like the `3-` in `ndcg@3-`.
|
||||||
@ -23,7 +137,11 @@ namespace ltr {
|
|||||||
*
|
*
|
||||||
* \return The name of the metric.
|
* \return The name of the metric.
|
||||||
*/
|
*/
|
||||||
std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus);
|
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus);
|
||||||
} // namespace ltr
|
|
||||||
} // namespace xgboost
|
/**
|
||||||
|
* \brief Parse name for ranking metric given parameters.
|
||||||
|
*/
|
||||||
|
std::string MakeMetricName(StringView name, position_t topn, bool minus);
|
||||||
|
} // namespace xgboost::ltr
|
||||||
#endif // XGBOOST_COMMON_RANKING_UTILS_H_
|
#endif // XGBOOST_COMMON_RANKING_UTILS_H_
|
||||||
|
|||||||
@ -43,36 +43,33 @@ XGBOOST_DEVICE inline std::size_t DiscreteTrapezoidArea(std::size_t n, std::size
|
|||||||
* with h <= n
|
* with h <= n
|
||||||
*/
|
*/
|
||||||
template <typename U>
|
template <typename U>
|
||||||
inline size_t
|
std::size_t SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
|
||||||
SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
|
xgboost::common::Span<std::size_t> out_group_threads_ptr,
|
||||||
xgboost::common::Span<size_t> out_group_threads_ptr,
|
std::size_t h) {
|
||||||
size_t h) {
|
|
||||||
CHECK_GE(group_ptr.size(), 1);
|
CHECK_GE(group_ptr.size(), 1);
|
||||||
CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size());
|
CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size());
|
||||||
dh::LaunchN(
|
dh::LaunchN(group_ptr.size(), [=] XGBOOST_DEVICE(std::size_t idx) {
|
||||||
group_ptr.size(), [=] XGBOOST_DEVICE(size_t idx) {
|
if (idx == 0) {
|
||||||
if (idx == 0) {
|
out_group_threads_ptr[0] = 0;
|
||||||
out_group_threads_ptr[0] = 0;
|
return;
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
size_t cnt = static_cast<size_t>(group_ptr[idx] - group_ptr[idx - 1]);
|
std::size_t cnt = static_cast<std::size_t>(group_ptr[idx] - group_ptr[idx - 1]);
|
||||||
out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
|
out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
|
||||||
});
|
});
|
||||||
dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(),
|
dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(),
|
||||||
out_group_threads_ptr.size());
|
out_group_threads_ptr.size());
|
||||||
size_t total = 0;
|
std::size_t total = 0;
|
||||||
dh::safe_cuda(cudaMemcpy(
|
dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
|
||||||
&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
|
sizeof(total), cudaMemcpyDeviceToHost));
|
||||||
sizeof(total), cudaMemcpyDeviceToHost));
|
|
||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called inside kernel to obtain coordinate from trapezoid grid.
|
* Called inside kernel to obtain coordinate from trapezoid grid.
|
||||||
*/
|
*/
|
||||||
XGBOOST_DEVICE inline void UnravelTrapeziodIdx(size_t i_idx, size_t n,
|
XGBOOST_DEVICE inline void UnravelTrapeziodIdx(std::size_t i_idx, std::size_t n, std::size_t *out_i,
|
||||||
size_t *out_i, size_t *out_j) {
|
std::size_t *out_j) {
|
||||||
auto &i = *out_i;
|
auto &i = *out_i;
|
||||||
auto &j = *out_j;
|
auto &j = *out_j;
|
||||||
double idx = static_cast<double>(i_idx);
|
double idx = static_cast<double>(i_idx);
|
||||||
|
|||||||
@ -234,7 +234,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit EvalRank(const char* name, const char* param) {
|
explicit EvalRank(const char* name, const char* param) {
|
||||||
this->name = ltr::MakeMetricName(name, param, &topn, &minus);
|
this->name = ltr::ParseMetricName(name, param, &topn, &minus);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;
|
virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;
|
||||||
|
|||||||
@ -1,38 +1,69 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2023 by XGBoost Contributors
|
* Copyright 2023 by XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR...
|
||||||
|
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
|
||||||
|
#include <xgboost/base.h> // for Args
|
||||||
|
#include <xgboost/context.h> // for Context
|
||||||
|
#include <xgboost/string_view.h> // for StringView
|
||||||
|
|
||||||
#include <cstdint> // std::uint32_t
|
#include <cstdint> // for uint32_t
|
||||||
|
#include <utility> // for pair
|
||||||
|
|
||||||
#include "../../../src/common/ranking_utils.h"
|
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::ltr {
|
||||||
namespace ltr {
|
TEST(RankingUtils, LambdaRankParam) {
|
||||||
TEST(RankingUtils, MakeMetricName) {
|
// make sure no memory is shared in dmlc parameter.
|
||||||
|
LambdaRankParam p0;
|
||||||
|
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "3"}});
|
||||||
|
ASSERT_EQ(p0.NumPair(), 3);
|
||||||
|
|
||||||
|
LambdaRankParam p1;
|
||||||
|
p1.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "8"}});
|
||||||
|
|
||||||
|
ASSERT_EQ(p0.NumPair(), 3);
|
||||||
|
ASSERT_EQ(p1.NumPair(), 8);
|
||||||
|
|
||||||
|
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "17"}});
|
||||||
|
ASSERT_EQ(p0.NumPair(), 17);
|
||||||
|
ASSERT_EQ(p1.NumPair(), 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(RankingUtils, ParseMetricName) {
|
||||||
std::uint32_t topn{32};
|
std::uint32_t topn{32};
|
||||||
bool minus{false};
|
bool minus{false};
|
||||||
auto name = MakeMetricName("ndcg", "3-", &topn, &minus);
|
auto name = ParseMetricName("ndcg", "3-", &topn, &minus);
|
||||||
ASSERT_EQ(name, "ndcg@3-");
|
ASSERT_EQ(name, "ndcg@3-");
|
||||||
ASSERT_EQ(topn, 3);
|
ASSERT_EQ(topn, 3);
|
||||||
ASSERT_TRUE(minus);
|
ASSERT_TRUE(minus);
|
||||||
|
|
||||||
name = MakeMetricName("ndcg", "6", &topn, &minus);
|
name = ParseMetricName("ndcg", "6", &topn, &minus);
|
||||||
ASSERT_EQ(topn, 6);
|
ASSERT_EQ(topn, 6);
|
||||||
ASSERT_TRUE(minus); // unchanged
|
ASSERT_TRUE(minus); // unchanged
|
||||||
|
|
||||||
minus = false;
|
minus = false;
|
||||||
name = MakeMetricName("ndcg", "-", &topn, &minus);
|
name = ParseMetricName("ndcg", "-", &topn, &minus);
|
||||||
ASSERT_EQ(topn, 6); // unchanged
|
ASSERT_EQ(topn, 6); // unchanged
|
||||||
ASSERT_TRUE(minus);
|
ASSERT_TRUE(minus);
|
||||||
|
|
||||||
name = MakeMetricName("ndcg", nullptr, &topn, &minus);
|
name = ParseMetricName("ndcg", nullptr, &topn, &minus);
|
||||||
ASSERT_EQ(topn, 6); // unchanged
|
ASSERT_EQ(topn, 6); // unchanged
|
||||||
ASSERT_TRUE(minus); // unchanged
|
ASSERT_TRUE(minus); // unchanged
|
||||||
|
|
||||||
name = MakeMetricName("ndcg", StringView{}, &topn, &minus);
|
name = ParseMetricName("ndcg", StringView{}, &topn, &minus);
|
||||||
ASSERT_EQ(topn, 6); // unchanged
|
ASSERT_EQ(topn, 6); // unchanged
|
||||||
ASSERT_TRUE(minus); // unchanged
|
ASSERT_TRUE(minus); // unchanged
|
||||||
}
|
}
|
||||||
} // namespace ltr
|
|
||||||
} // namespace xgboost
|
TEST(RankingUtils, MakeMetricName) {
|
||||||
|
auto name = MakeMetricName("map", LambdaRankParam::NotSet(), true);
|
||||||
|
ASSERT_EQ(name, "map-");
|
||||||
|
name = MakeMetricName("map", LambdaRankParam::NotSet(), false);
|
||||||
|
ASSERT_EQ(name, "map");
|
||||||
|
name = MakeMetricName("map", 2, true);
|
||||||
|
ASSERT_EQ(name, "map@2-");
|
||||||
|
name = MakeMetricName("map", 2, false);
|
||||||
|
ASSERT_EQ(name, "map@2");
|
||||||
|
}
|
||||||
|
} // namespace xgboost::ltr
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user