Define a new ranking parameter. (#8887)

This commit is contained in:
Jiaming Yuan 2023-03-09 17:46:24 +08:00 committed by GitHub
parent e8a69013e6
commit 46dfcc7d22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 249 additions and 53 deletions

24
src/common/error_msg.h Normal file
View File

@ -0,0 +1,24 @@
/**
* Copyright 2023 by XGBoost contributors
*
* \brief Common error message for various checks.
*/
#ifndef XGBOOST_COMMON_ERROR_MSG_H_
#define XGBOOST_COMMON_ERROR_MSG_H_
#include "xgboost/string_view.h" // for StringView
namespace xgboost::error {
constexpr StringView GroupWeight() {
return "Size of weight must equal to the number of query groups when ranking group is used.";
}
constexpr StringView GroupSize() {
return "Invalid query group structure. The number of rows obtained from group doesn't equal to ";
}
constexpr StringView LabelScoreSize() {
return "The size of label doesn't match the size of prediction.";
}
} // namespace xgboost::error
#endif // XGBOOST_COMMON_ERROR_MSG_H_

View File

@ -3,15 +3,28 @@
*/
#include "ranking_utils.h"
#include <cstdint> // std::uint32_t
#include <sstream> // std::ostringstream
#include <string> // std::string,std::sscanf
#include <algorithm> // for copy_n, max, min, none_of, all_of
#include <cstddef> // for size_t
#include <cstdio> // for sscanf
#include <exception> // for exception
#include <functional> // for greater
#include <iterator> // for reverse_iterator
#include <string> // for char_traits, string
#include "xgboost/string_view.h" // StringView
#include "algorithm.h" // for ArgSort
#include "linalg_op.h" // for cbegin, cend
#include "optional_weight.h" // for MakeOptionalWeights
#include "threading_utils.h" // for ParallelFor
#include "xgboost/base.h" // for bst_group_t
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/linalg.h" // for All, TensorView, Range, Tensor, Vector
#include "xgboost/logging.h" // for Error, LogCheck_EQ, CHECK_EQ
namespace xgboost {
namespace ltr {
std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus) {
namespace xgboost::ltr {
DMLC_REGISTER_PARAMETER(LambdaRankParam);
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
std::string out_name;
if (!param.empty()) {
std::ostringstream os;
@ -30,5 +43,18 @@ std::string MakeMetricName(StringView name, StringView param, std::uint32_t* top
}
return out_name;
}
} // namespace ltr
} // namespace xgboost
std::string MakeMetricName(StringView name, position_t topn, bool minus) {
std::ostringstream ss;
if (topn == LambdaRankParam::NotSet()) {
ss << name;
} else {
ss << name << "@" << topn;
}
if (minus) {
ss << "-";
}
std::string out_name = ss.str();
return out_name;
}
} // namespace xgboost::ltr

View File

@ -3,17 +3,131 @@
*/
#ifndef XGBOOST_COMMON_RANKING_UTILS_H_
#define XGBOOST_COMMON_RANKING_UTILS_H_
#include <algorithm> // for min
#include <cmath> // for log2, fabs, floor
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t, uint8_t, int32_t
#include <limits> // for numeric_limits
#include <string> // for char_traits, string
#include <vector> // for vector
#include <cstddef> // std::size_t
#include <cstdint> // std::uint32_t
#include <string> // std::string
#include "./math.h" // for CloseTo
#include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD
#include "error_msg.h" // for GroupWeight, GroupSize
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/linalg.h" // for Vector, VectorView, Tensor
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK
#include "xgboost/parameter.h" // for XGBoostParameter
#include "xgboost/span.h" // for Span
#include "xgboost/string_view.h" // for StringView
#include "xgboost/string_view.h" // StringView
namespace xgboost {
namespace ltr {
namespace xgboost::ltr {
/**
* \brief Construct name for ranking metric given parameters.
* \brief Relevance degree
*/
using rel_degree_t = std::uint32_t; // NOLINT
/**
* \brief top-k position
*/
using position_t = std::uint32_t; // NOLINT
enum class PairMethod : std::int32_t {
kTopK = 0,
kMean = 1,
};
} // namespace xgboost::ltr
DECLARE_FIELD_ENUM_CLASS(xgboost::ltr::PairMethod);
namespace xgboost::ltr {
struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
private:
static constexpr position_t DefaultK() { return 32; }
static constexpr position_t DefaultSamplePairs() { return 1; }
protected:
// pairs
// should be accessed by getter for auto configuration.
// nolint so that we can keep the string name.
PairMethod lambdarank_pair_method{PairMethod::kMean}; // NOLINT
std::size_t lambdarank_num_pair_per_sample{NotSet()}; // NOLINT
public:
static constexpr position_t NotSet() { return std::numeric_limits<position_t>::max(); }
// unbiased
bool lambdarank_unbiased{false};
double lambdarank_bias_norm{2.0};
// ndcg
bool ndcg_exp_gain{true};
bool operator==(LambdaRankParam const& that) const {
return lambdarank_pair_method == that.lambdarank_pair_method &&
lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&
lambdarank_unbiased == that.lambdarank_unbiased &&
lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;
}
bool operator!=(LambdaRankParam const& that) const { return !(*this == that); }
[[nodiscard]] double Regularizer() const { return 1.0 / (1.0 + this->lambdarank_bias_norm); }
/**
* \brief Get number of pairs for each sample
*/
[[nodiscard]] position_t NumPair() const {
if (lambdarank_num_pair_per_sample == NotSet()) {
switch (lambdarank_pair_method) {
case PairMethod::kMean:
return DefaultSamplePairs();
case PairMethod::kTopK:
return DefaultK();
}
} else {
return lambdarank_num_pair_per_sample;
}
LOG(FATAL) << "Unreachable.";
return 0;
}
[[nodiscard]] bool HasTruncation() const { return lambdarank_pair_method == PairMethod::kTopK; }
// Used for evaluation metric and cache initialization, iterate through top-k or the whole list
[[nodiscard]] auto TopK() const {
if (HasTruncation()) {
return NumPair();
} else {
return NotSet();
}
}
DMLC_DECLARE_PARAMETER(LambdaRankParam) {
DMLC_DECLARE_FIELD(lambdarank_pair_method)
.set_default(PairMethod::kMean)
.add_enum("mean", PairMethod::kMean)
.add_enum("topk", PairMethod::kTopK)
.describe("Method for constructing pairs.");
DMLC_DECLARE_FIELD(lambdarank_num_pair_per_sample)
.set_default(NotSet())
.set_lower_bound(1)
.describe("Number of pairs for each sample in the list.");
DMLC_DECLARE_FIELD(lambdarank_unbiased)
.set_default(false)
.describe("Unbiased lambda mart. Use IPW to debias click position");
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
.set_default(2.0)
.set_lower_bound(0.0)
.describe("Lp regularization for unbiased lambdarank.");
DMLC_DECLARE_FIELD(ndcg_exp_gain)
.set_default(true)
.describe("When set to true, the label gain is 2^rel - 1, otherwise it's rel.");
}
};
/**
* \brief Parse name for ranking metric given parameters.
*
* \param [in] name Null terminated string for metric name
* \param [in] param Null terminated string for parameter like the `3-` in `ndcg@3-`.
@ -23,7 +137,11 @@ namespace ltr {
*
* \return The name of the metric.
*/
std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus);
} // namespace ltr
} // namespace xgboost
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus);
/**
* \brief Parse name for ranking metric given parameters.
*/
std::string MakeMetricName(StringView name, position_t topn, bool minus);
} // namespace xgboost::ltr
#endif // XGBOOST_COMMON_RANKING_UTILS_H_

View File

@ -43,36 +43,33 @@ XGBOOST_DEVICE inline std::size_t DiscreteTrapezoidArea(std::size_t n, std::size
* with h <= n
*/
template <typename U>
inline size_t
SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
xgboost::common::Span<size_t> out_group_threads_ptr,
size_t h) {
std::size_t SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
xgboost::common::Span<std::size_t> out_group_threads_ptr,
std::size_t h) {
CHECK_GE(group_ptr.size(), 1);
CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size());
dh::LaunchN(
group_ptr.size(), [=] XGBOOST_DEVICE(size_t idx) {
if (idx == 0) {
out_group_threads_ptr[0] = 0;
return;
}
dh::LaunchN(group_ptr.size(), [=] XGBOOST_DEVICE(std::size_t idx) {
if (idx == 0) {
out_group_threads_ptr[0] = 0;
return;
}
size_t cnt = static_cast<size_t>(group_ptr[idx] - group_ptr[idx - 1]);
out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
});
std::size_t cnt = static_cast<std::size_t>(group_ptr[idx] - group_ptr[idx - 1]);
out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
});
dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(),
out_group_threads_ptr.size());
size_t total = 0;
dh::safe_cuda(cudaMemcpy(
&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
sizeof(total), cudaMemcpyDeviceToHost));
std::size_t total = 0;
dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
sizeof(total), cudaMemcpyDeviceToHost));
return total;
}
/**
* Called inside kernel to obtain coordinate from trapezoid grid.
*/
XGBOOST_DEVICE inline void UnravelTrapeziodIdx(size_t i_idx, size_t n,
size_t *out_i, size_t *out_j) {
XGBOOST_DEVICE inline void UnravelTrapeziodIdx(std::size_t i_idx, std::size_t n, std::size_t *out_i,
std::size_t *out_j) {
auto &i = *out_i;
auto &j = *out_j;
double idx = static_cast<double>(i_idx);

View File

@ -234,7 +234,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
protected:
explicit EvalRank(const char* name, const char* param) {
this->name = ltr::MakeMetricName(name, param, &topn, &minus);
this->name = ltr::ParseMetricName(name, param, &topn, &minus);
}
virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;

View File

@ -1,38 +1,69 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR...
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
#include <xgboost/base.h> // for Args
#include <xgboost/context.h> // for Context
#include <xgboost/string_view.h> // for StringView
#include <cstdint> // std::uint32_t
#include <cstdint> // for uint32_t
#include <utility> // for pair
#include "../../../src/common/ranking_utils.h"
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
namespace xgboost {
namespace ltr {
TEST(RankingUtils, MakeMetricName) {
namespace xgboost::ltr {
TEST(RankingUtils, LambdaRankParam) {
// make sure no memory is shared in dmlc parameter.
LambdaRankParam p0;
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "3"}});
ASSERT_EQ(p0.NumPair(), 3);
LambdaRankParam p1;
p1.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "8"}});
ASSERT_EQ(p0.NumPair(), 3);
ASSERT_EQ(p1.NumPair(), 8);
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "17"}});
ASSERT_EQ(p0.NumPair(), 17);
ASSERT_EQ(p1.NumPair(), 8);
}
TEST(RankingUtils, ParseMetricName) {
std::uint32_t topn{32};
bool minus{false};
auto name = MakeMetricName("ndcg", "3-", &topn, &minus);
auto name = ParseMetricName("ndcg", "3-", &topn, &minus);
ASSERT_EQ(name, "ndcg@3-");
ASSERT_EQ(topn, 3);
ASSERT_TRUE(minus);
name = MakeMetricName("ndcg", "6", &topn, &minus);
name = ParseMetricName("ndcg", "6", &topn, &minus);
ASSERT_EQ(topn, 6);
ASSERT_TRUE(minus); // unchanged
minus = false;
name = MakeMetricName("ndcg", "-", &topn, &minus);
name = ParseMetricName("ndcg", "-", &topn, &minus);
ASSERT_EQ(topn, 6); // unchanged
ASSERT_TRUE(minus);
name = MakeMetricName("ndcg", nullptr, &topn, &minus);
name = ParseMetricName("ndcg", nullptr, &topn, &minus);
ASSERT_EQ(topn, 6); // unchanged
ASSERT_TRUE(minus); // unchanged
name = MakeMetricName("ndcg", StringView{}, &topn, &minus);
name = ParseMetricName("ndcg", StringView{}, &topn, &minus);
ASSERT_EQ(topn, 6); // unchanged
ASSERT_TRUE(minus); // unchanged
}
} // namespace ltr
} // namespace xgboost
TEST(RankingUtils, MakeMetricName) {
auto name = MakeMetricName("map", LambdaRankParam::NotSet(), true);
ASSERT_EQ(name, "map-");
name = MakeMetricName("map", LambdaRankParam::NotSet(), false);
ASSERT_EQ(name, "map");
name = MakeMetricName("map", 2, true);
ASSERT_EQ(name, "map@2-");
name = MakeMetricName("map", 2, false);
ASSERT_EQ(name, "map@2");
}
} // namespace xgboost::ltr