Pass DMatrix into metric for caching. (#8790)

2023-02-13 22:15:05 +08:00
parent 31d3ec07af
commit 81b2ee1153
17 changed files with 95 additions and 70 deletions
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -1339,7 +1339,7 @@ class LearnerImpl : public LearnerIO {

      obj_->EvalTransform(&out);
      for (auto& ev : metrics_) {
-        os << '\t' << data_names[i] << '-' << ev->Name() << ':' << ev->Eval(out, m->Info());
+        os << '\t' << data_names[i] << '-' << ev->Name() << ':' << ev->Evaluate(out, m);
      }
    }

--- a/src/metric/auc.cc
+++ b/src/metric/auc.cc
@@ -16,6 +16,7 @@

 #include "../common/math.h"
 #include "../common/optional_weight.h"  // OptionalWeights
+#include "metric_common.h"              // MetricNoCache
 #include "xgboost/host_device_vector.h"
 #include "xgboost/linalg.h"
 #include "xgboost/metric.h"
@@ -253,7 +254,7 @@ std::pair<double, uint32_t> RankingAUC(std::vector<float> const &predts,
 }

 template <typename Curve>
-class EvalAUC : public Metric {
+class EvalAUC : public MetricNoCache {
  double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info) override {
    double auc {0};
    if (ctx_->gpu_id != Context::kCpuId) {
--- a/src/metric/elementwise_metric.cu
+++ b/src/metric/elementwise_metric.cu
@@ -11,7 +11,7 @@
 #include <cmath>

 #include "../collective/communicator-inl.h"
-#include "../common/common.h"
+#include "../common/common.h"           // MetricNoCache
 #include "../common/math.h"
 #include "../common/optional_weight.h"  // OptionalWeights
 #include "../common/pseudo_huber.h"
@@ -23,8 +23,8 @@
 #if defined(XGBOOST_USE_CUDA)
 #include <thrust/execution_policy.h>  // thrust::cuda::par
 #include <thrust/functional.h>        // thrust::plus<>
-#include <thrust/transform_reduce.h>
 #include <thrust/iterator/counting_iterator.h>
+#include <thrust/transform_reduce.h>

 #include "../common/device_helpers.cuh"
 #endif  // XGBOOST_USE_CUDA
@@ -167,7 +167,7 @@ struct EvalRowLogLoss {
  }
 };

-class PseudoErrorLoss : public Metric {
+class PseudoErrorLoss : public MetricNoCache {
  PesudoHuberParam param_;

 public:
@@ -339,7 +339,7 @@ struct EvalTweedieNLogLik {
 * \tparam Derived the name of subclass
 */
 template <typename Policy>
-struct EvalEWiseBase : public Metric {
+struct EvalEWiseBase : public MetricNoCache {
  EvalEWiseBase() = default;
  explicit EvalEWiseBase(char const* policy_param) : policy_{policy_param} {}

--- a/src/metric/metric.cc
+++ b/src/metric/metric.cc
@@ -53,20 +53,21 @@ Metric::Create(const std::string& name, Context const* ctx) {
  return metric;
 }

-Metric *
-GPUMetric::CreateGPUMetric(const std::string& name, Context const* ctx) {
+GPUMetric* GPUMetric::CreateGPUMetric(const std::string& name, Context const* ctx) {
  auto metric = CreateMetricImpl<MetricGPUReg>(name);
  if (metric == nullptr) {
    LOG(WARNING) << "Cannot find a GPU metric builder for metric " << name
                 << ". Resorting to the CPU builder";
-    return metric;
+    return nullptr;
  }

  // Narrowing reference only for the compiler to allow assignment to a base class member.
  // As such, using this narrowed reference to refer to derived members will be an illegal op.
  // This is moot, as this type is stateless.
-  static_cast<GPUMetric *>(metric)->ctx_ = ctx;
-  return metric;
+  auto casted = static_cast<GPUMetric*>(metric);
+  CHECK(casted);
+  casted->ctx_ = ctx;
+  return casted;
 }
 }  // namespace xgboost

--- a/src/metric/metric_common.h
+++ b/src/metric/metric_common.h
@@ -13,12 +13,21 @@

 namespace xgboost {
 struct Context;
+// Metric that doesn't need to cache anything based on input data.
+class MetricNoCache : public Metric {
+ public:
+  virtual double Eval(HostDeviceVector<float> const &predts, MetaInfo const &info) = 0;
+
+  double Evaluate(HostDeviceVector<float> const &predts, std::shared_ptr<DMatrix> p_fmat) final {
+    return this->Eval(predts, p_fmat->Info());
+  }
+};

 // This creates a GPU metric instance dynamically and adds it to the GPU metric registry, if not
 // present already. This is created when there is a device ordinal present and if xgboost
 // is compiled with CUDA support
-struct GPUMetric : Metric {
-  static Metric *CreateGPUMetric(const std::string &name, Context const *tparam);
+struct GPUMetric : public MetricNoCache {
+  static GPUMetric *CreateGPUMetric(const std::string &name, Context const *tparam);
 };

 /*!
--- a/src/metric/multiclass_metric.cu
+++ b/src/metric/multiclass_metric.cu
@@ -9,16 +9,16 @@
 #include <atomic>
 #include <cmath>

-#include "metric_common.h"
 #include "../collective/communicator-inl.h"
 #include "../common/math.h"
 #include "../common/threading_utils.h"
+#include "metric_common.h"  // MetricNoCache

 #if defined(XGBOOST_USE_CUDA)
 #include <thrust/execution_policy.h>  // thrust::cuda::par
 #include <thrust/functional.h>        // thrust::plus<>
-#include <thrust/transform_reduce.h>
 #include <thrust/iterator/counting_iterator.h>
+#include <thrust/transform_reduce.h>

 #include "../common/device_helpers.cuh"
 #endif  // XGBOOST_USE_CUDA
@@ -162,7 +162,7 @@ class MultiClassMetricsReduction {
 * \tparam Derived the name of subclass
 */
 template<typename Derived>
-struct EvalMClassBase : public Metric {
+struct EvalMClassBase : public MetricNoCache {
  double Eval(const HostDeviceVector<float> &preds, const MetaInfo &info) override {
    if (info.labels.Size() == 0) {
      CHECK_EQ(preds.Size(), 0);
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@@ -92,7 +92,7 @@ namespace metric {
 DMLC_REGISTRY_FILE_TAG(rank_metric);

 /*! \brief AMS: also records best threshold */
-struct EvalAMS : public Metric {
+struct EvalAMS : public MetricNoCache {
 public:
  explicit EvalAMS(const char* param) {
    CHECK(param != nullptr)  // NOLINT
@@ -155,10 +155,10 @@ struct EvalAMS : public Metric {
 };

 /*! \brief Evaluate rank list */
-struct EvalRank : public Metric, public EvalRankConfig {
+struct EvalRank : public MetricNoCache, public EvalRankConfig {
 private:
  // This is used to compute the ranking metrics on the GPU - for training jobs that run on the GPU.
-  std::unique_ptr<xgboost::Metric> rank_gpu_;
+  std::unique_ptr<MetricNoCache> rank_gpu_;

 public:
  double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {
@@ -322,7 +322,7 @@ struct EvalMAP : public EvalRank {
 };

 /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */
-struct EvalCox : public Metric {
+struct EvalCox : public MetricNoCache {
 public:
  EvalCox() = default;
  double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {
--- a/src/metric/rank_metric.cu
+++ b/src/metric/rank_metric.cu
@@ -1,21 +1,20 @@
 /**
 * Copyright 2020-2023 by XGBoost Contributors
- * \file rank_metric.cu
- * \brief prediction rank based metrics.
- * \author Kailong Chen, Tianqi Chen
 */
 #include <dmlc/registry.h>
-
+#include <thrust/iterator/counting_iterator.h>  // make_counting_iterator
+#include <thrust/reduce.h>                      // reduce
 #include <xgboost/metric.h>
-#include <xgboost/host_device_vector.h>
-#include <thrust/iterator/discard_iterator.h>

-#include <vector>
+#include <cstddef>                       // std::size_t
+#include <memory>                        // std::shared_ptr

+#include "../common/cuda_context.cuh"    // CUDAContext
 #include "metric_common.h"
-
-#include "../common/math.h"
-#include "../common/device_helpers.cuh"
+#include "xgboost/base.h"                // XGBOOST_DEVICE
+#include "xgboost/context.h"             // Context
+#include "xgboost/data.h"                // MetaInfo
+#include "xgboost/host_device_vector.h"  // HostDeviceVector

 namespace xgboost {
 namespace metric {
--- a/src/metric/survival_metric.cu
+++ b/src/metric/survival_metric.cu
@@ -10,15 +10,14 @@
 #include <memory>
 #include <vector>

-#include "xgboost/json.h"
-#include "xgboost/metric.h"
-#include "xgboost/host_device_vector.h"
-
-#include "metric_common.h"
 #include "../collective/communicator-inl.h"
 #include "../common/math.h"
 #include "../common/survival_util.h"
-#include  "../common/threading_utils.h"
+#include "../common/threading_utils.h"
+#include "metric_common.h"  // MetricNoCache
+#include "xgboost/host_device_vector.h"
+#include "xgboost/json.h"
+#include "xgboost/metric.h"

 #if defined(XGBOOST_USE_CUDA)
 #include <thrust/execution_policy.h>  // thrust::cuda::par
@@ -194,10 +193,9 @@ struct EvalAFTNLogLik {
  AFTParam param_;
 };

-template <typename Policy> struct EvalEWiseSurvivalBase : public Metric {
-  explicit EvalEWiseSurvivalBase(Context const *ctx) {
-    ctx_ = ctx;
-  }
+template <typename Policy>
+struct EvalEWiseSurvivalBase : public MetricNoCache {
+  explicit EvalEWiseSurvivalBase(Context const* ctx) { ctx_ = ctx; }
  EvalEWiseSurvivalBase() = default;

  void Configure(const Args& args) override {
@@ -230,7 +228,7 @@ template <typename Policy> struct EvalEWiseSurvivalBase : public Metric {

 // This class exists because we want to perform dispatch according to the distribution type at
 // configuration time, not at prediction time.
-struct AFTNLogLikDispatcher : public Metric {
+struct AFTNLogLikDispatcher : public MetricNoCache {
  const char* Name() const override {
    return "aft-nloglik";
  }
@@ -270,7 +268,7 @@ struct AFTNLogLikDispatcher : public Metric {

 private:
  AFTParam param_;
-  std::unique_ptr<Metric> metric_;
+  std::unique_ptr<MetricNoCache> metric_;
 };

 XGBOOST_REGISTER_METRIC(AFTNLogLik, "aft-nloglik")