Rework the MAP metric. (#8931)

- The new implementation is more strict as only binary labels are accepted. The previous implementation converts values greater than 1 to 1. - Deterministic GPU. (no atomic add). - Fix top-k handling. - Precise definition of MAP. (There are other variants on how to handle top-k). - Refactor GPU ranking tests.
2023-03-22 17:45:20 +08:00
parent b240f055d3
commit 5891f752c8
18 changed files with 458 additions and 323 deletions
--- a/src/common/error_msg.h
+++ b/src/common/error_msg.h
@@ -22,7 +22,7 @@ constexpr StringView LabelScoreSize() {
 }

 constexpr StringView InfInData() {
-  return "Input data contains `inf` while `missing` is not set to `inf`";
+  return "Input data contains `inf` or a value too large, while `missing` is not set to `inf`";
 }
 }  // namespace xgboost::error
 #endif  // XGBOOST_COMMON_ERROR_MSG_H_
--- a/src/common/numeric.h
+++ b/src/common/numeric.h
@@ -1,13 +1,15 @@
-/*!
- * Copyright 2022, XGBoost contributors.
+/**
+ * Copyright 2022-2023 by XGBoost contributors.
 */
 #ifndef XGBOOST_COMMON_NUMERIC_H_
 #define XGBOOST_COMMON_NUMERIC_H_

 #include <dmlc/common.h>  // OMPException

-#include <algorithm>  // std::max
-#include <iterator>   // std::iterator_traits
+#include <algorithm>  // for std::max
+#include <cstddef>    // for size_t
+#include <cstdint>    // for int32_t
+#include <iterator>   // for iterator_traits
 #include <vector>

 #include "common.h"                      // AssertGPUSupport
@@ -15,8 +17,7 @@
 #include "xgboost/context.h"             // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector

-namespace xgboost {
-namespace common {
+namespace xgboost::common {

 /**
 * \brief Run length encode on CPU, input must be sorted.
@@ -111,11 +112,11 @@ inline double Reduce(Context const*, HostDeviceVector<float> const&) {
 namespace cpu_impl {
 template <typename It, typename V = typename It::value_type>
 V Reduce(Context const* ctx, It first, It second, V const& init) {
-  size_t n = std::distance(first, second);
-  common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(ctx->Threads(), init);
-  common::ParallelFor(n, ctx->Threads(),
-                      [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
-  auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + ctx->Threads(), init);
+  std::size_t n = std::distance(first, second);
+  auto n_threads = static_cast<std::size_t>(std::min(n, static_cast<std::size_t>(ctx->Threads())));
+  common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(n_threads, init);
+  common::ParallelFor(n, n_threads, [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
+  auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + n_threads, init);
  return result;
 }
 }  // namespace cpu_impl
@@ -144,7 +145,6 @@ void Iota(Context const* ctx, It first, It last,
    });
  }
 }
-}  // namespace common
-}  // namespace xgboost
+}  // namespace xgboost::common

 #endif  // XGBOOST_COMMON_NUMERIC_H_
--- a/src/common/ranking_utils.cc
+++ b/src/common/ranking_utils.cc
@@ -114,6 +114,15 @@ void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUS

 DMLC_REGISTER_PARAMETER(LambdaRankParam);

+void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {
+  auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0);
+  CheckMapLabels(h_label, [](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
+}
+
+#if !defined(XGBOOST_USE_CUDA)
+void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
+#endif  // !defined(XGBOOST_USE_CUDA)
+
 std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
  std::string out_name;
  if (!param.empty()) {
--- a/src/common/ranking_utils.cu
+++ b/src/common/ranking_utils.cu
@@ -204,4 +204,9 @@ void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
  dh::LaunchN(MaxGroupSize(), cuctx->Stream(),
              [=] XGBOOST_DEVICE(std::size_t i) { d_discount[i] = CalcDCGDiscount(i); });
 }
+
+void MAPCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
+  auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
+  CheckMapLabels(d_label, CheckMAPOp{ctx->CUDACtx()});
+}
 }  // namespace xgboost::ltr
--- a/src/common/ranking_utils.h
+++ b/src/common/ranking_utils.h
@@ -358,6 +358,71 @@ void CheckNDCGLabels(ltr::LambdaRankParam const& p, linalg::VectorView<float con
  }
 }

+template <typename AllOf>
+bool IsBinaryRel(linalg::VectorView<float const> label, AllOf all_of) {
+  auto s_label = label.Values();
+  return all_of(s_label.data(), s_label.data() + s_label.size(), [] XGBOOST_DEVICE(float y) {
+    return std::abs(y - 1.0f) < kRtEps || std::abs(y - 0.0f) < kRtEps;
+  });
+}
+/**
+ * \brief Validate label for MAP
+ *
+ * \tparam Implementation of std::all_of. Specified as a parameter to reuse the check for
+ *         both CPU and GPU.
+ */
+template <typename AllOf>
+void CheckMapLabels(linalg::VectorView<float const> label, AllOf all_of) {
+  auto s_label = label.Values();
+  auto is_binary = IsBinaryRel(label, all_of);
+  CHECK(is_binary) << "MAP can only be used with binary labels.";
+}
+
+class MAPCache : public RankingCache {
+  // Total number of relevant documents for each group
+  HostDeviceVector<double> n_rel_;
+  // \sum l_k/k
+  HostDeviceVector<double> acc_;
+  HostDeviceVector<double> map_;
+  // Number of samples in this dataset.
+  std::size_t n_samples_{0};
+
+  void InitOnCPU(Context const* ctx, MetaInfo const& info);
+  void InitOnCUDA(Context const* ctx, MetaInfo const& info);
+
+ public:
+  MAPCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
+      : RankingCache{ctx, info, p}, n_samples_{static_cast<std::size_t>(info.num_row_)} {
+    if (ctx->IsCPU()) {
+      this->InitOnCPU(ctx, info);
+    } else {
+      this->InitOnCUDA(ctx, info);
+    }
+  }
+
+  common::Span<double> NumRelevant(Context const* ctx) {
+    if (n_rel_.Empty()) {
+      n_rel_.SetDevice(ctx->gpu_id);
+      n_rel_.Resize(n_samples_);
+    }
+    return ctx->IsCPU() ? n_rel_.HostSpan() : n_rel_.DeviceSpan();
+  }
+  common::Span<double> Acc(Context const* ctx) {
+    if (acc_.Empty()) {
+      acc_.SetDevice(ctx->gpu_id);
+      acc_.Resize(n_samples_);
+    }
+    return ctx->IsCPU() ? acc_.HostSpan() : acc_.DeviceSpan();
+  }
+  common::Span<double> Map(Context const* ctx) {
+    if (map_.Empty()) {
+      map_.SetDevice(ctx->gpu_id);
+      map_.Resize(this->Groups());
+    }
+    return ctx->IsCPU() ? map_.HostSpan() : map_.DeviceSpan();
+  }
+};
+
 /**
 * \brief Parse name for ranking metric given parameters.
 *
--- a/src/common/threading_utils.h
+++ b/src/common/threading_utils.h
@@ -8,9 +8,11 @@
 #include <dmlc/omp.h>

 #include <algorithm>
-#include <cstdint>  // std::int32_t
+#include <cstdint>  // for int32_t
+#include <cstdlib>  // for malloc, free
 #include <limits>
-#include <type_traits>  // std::is_signed
+#include <new>          // for bad_alloc
+#include <type_traits>  // for is_signed
 #include <vector>

 #include "xgboost/logging.h"
@@ -266,7 +268,7 @@ class MemStackAllocator {
    if (MaxStackSize >= required_size_) {
      ptr_ = stack_mem_;
    } else {
-      ptr_ = reinterpret_cast<T*>(malloc(required_size_ * sizeof(T)));
+      ptr_ = reinterpret_cast<T*>(std::malloc(required_size_ * sizeof(T)));
    }
    if (!ptr_) {
      throw std::bad_alloc{};
@@ -278,7 +280,7 @@ class MemStackAllocator {

  ~MemStackAllocator() {
    if (required_size_ > MaxStackSize) {
-      free(ptr_);
+      std::free(ptr_);
    }
  }
  T& operator[](size_t i) { return ptr_[i]; }