Fix pairwise objective with NDCG metric along with custom gain. (#10100)

* Fix pairwise objective with NDCG metric. - Allow setting `ndcg_exp_gain` for `rank:pairwise`. This is useful when using pairwise for objective but ndcg for metric.
2024-03-11 14:54:10 +08:00 · 2024-03-11 14:54:10 +08:00 · 1450aebb74
commit 1450aebb74
parent 06c9702028
3 changed files with 26 additions and 2 deletions
--- a/src/objective/lambdarank_obj.cc
+++ b/src/objective/lambdarank_obj.cc
@ -474,7 +474,6 @@ class LambdaRankMAP : public LambdaRankObj<LambdaRankMAP, ltr::MAPCache> {
 public:
  void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
                       const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
    CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the MAP objective.";
    if (ctx_->IsCUDA()) {
      return cuda_impl::LambdaRankGetGradientMAP(
          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
@ -564,7 +563,6 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
 public:
  void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
                       const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
    CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the pairwise objective.";
    if (ctx_->IsCUDA()) {
      return cuda_impl::LambdaRankGetGradientPairwise(
          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
@ -610,6 +608,13 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
  [[nodiscard]] const char* DefaultEvalMetric() const override {
    return this->RankEvalMetric("ndcg");
  }
  [[nodiscard]] Json DefaultMetricConfig() const override {
    Json config{Object{}};
    config["name"] = String{DefaultEvalMetric()};
    config["lambdarank_param"] = ToJson(param_);
    return config;
  }
 };
 #if !defined(XGBOOST_USE_CUDA)
--- a/tests/cpp/common/test_parameter.cc
+++ b/tests/cpp/common/test_parameter.cc
@ -97,4 +97,9 @@ TEST(XGBoostParameter, Update) {
    ASSERT_NEAR(p.f, 2.71828f, kRtEps);
    ASSERT_NEAR(p.d, 2.71828, kRtEps);  // default
  }
  // Just in case dmlc's use of global memory has any impact in parameters.
  UpdatableParam a, b;
  a.UpdateAllowUnknown(xgboost::Args{{"f", "2.71828"}});
  ASSERT_NE(a.f, b.f);
 }
--- a/tests/python/test_ranking.py
+++ b/tests/python/test_ranking.py
@ -54,6 +54,20 @@ def test_ndcg_custom_gain():
    assert byxgb.evals_result() == bynp.evals_result()
    assert byxgb_json == bynp_json
    # test pairwise can handle max_rel > 31, while ndcg metric is using custom gain
    X, y, q, w = tm.make_ltr(n_samples=1024, n_features=4, n_query_groups=3, max_rel=33)
    ranknet = xgboost.XGBRanker(
        tree_method="hist",
        ndcg_exp_gain=False,
        n_estimators=10,
        objective="rank:pairwise",
    )
    ranknet.fit(X, y, qid=q, eval_set=[(X, y)], eval_qid=[q])
    history = ranknet.evals_result()
    assert (
        history["validation_0"]["ndcg@32"][0] < history["validation_0"]["ndcg@32"][-1]
    )
 def test_ranking_with_unweighted_data():
    Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])