merge 23Mar01

2023-05-02 00:05:58 +02:00
parent 313a74b582 08ce495b5d
commit 5446c501af
258 changed files with 7471 additions and 5379 deletions
--- a/src/metric/auc.cc
+++ b/src/metric/auc.cc
@@ -116,8 +116,7 @@ double MultiClassOVR(Context const *ctx, common::Span<float const> predts, MetaI

  // we have 2 averages going in here, first is among workers, second is among
  // classes. allreduce sums up fp/tp auc for each class.
-  collective::Allreduce<collective::Operation::kSum>(results.Values().data(),
-                                                     results.Values().size());
+  collective::GlobalSum(info, &results.Values());
  double auc_sum{0};
  double tp_sum{0};
  for (size_t c = 0; c < n_classes; ++c) {
@@ -268,7 +267,9 @@ class EvalAUC : public MetricNoCache {
    }
    //  We use the global size to handle empty dataset.
    std::array<size_t, 2> meta{info.labels.Size(), preds.Size()};
-    collective::Allreduce<collective::Operation::kMax>(meta.data(), meta.size());
+    if (!info.IsVerticalFederated()) {
+      collective::Allreduce<collective::Operation::kMax>(meta.data(), meta.size());
+    }
    if (meta[0] == 0) {
      // Empty across all workers, which is not supported.
      auc = std::numeric_limits<double>::quiet_NaN();
@@ -289,15 +290,8 @@ class EvalAUC : public MetricNoCache {
        InvalidGroupAUC();
      }

-      std::array<double, 2> results{auc, static_cast<double>(valid_groups)};
-      collective::Allreduce<collective::Operation::kSum>(results.data(), results.size());
-      auc = results[0];
-      valid_groups = static_cast<uint32_t>(results[1]);
-
-      if (valid_groups <= 0) {
-        auc = std::numeric_limits<double>::quiet_NaN();
-      } else {
-        auc /= valid_groups;
+      auc = collective::GlobalRatio(info, auc, static_cast<double>(valid_groups));
+      if (!std::isnan(auc)) {
        CHECK_LE(auc, 1) << "Total AUC across groups: " << auc * valid_groups
                         << ", valid groups: " << valid_groups;
      }
@@ -317,17 +311,9 @@ class EvalAUC : public MetricNoCache {
        std::tie(fp, tp, auc) =
            static_cast<Curve *>(this)->EvalBinary(preds, info);
      }
-      double local_area = fp * tp;
-      std::array<double, 2> result{auc, local_area};
-      collective::Allreduce<collective::Operation::kSum>(result.data(), result.size());
-      std::tie(auc, local_area) = common::UnpackArr(std::move(result));
-      if (local_area <= 0) {
-        // the dataset across all workers have only positive or negative sample
-        auc = std::numeric_limits<double>::quiet_NaN();
-      } else {
-        CHECK_LE(auc, local_area);
-        // normalization
-        auc = auc / local_area;
+      auc = collective::GlobalRatio(info, auc, fp * tp);
+      if (!std::isnan(auc)) {
+        CHECK_LE(auc, 1.0);
      }
    }
    if (std::isnan(auc)) {
--- a/src/metric/elementwise_metric.cu
+++ b/src/metric/elementwise_metric.cu
@@ -8,6 +8,7 @@
 */
 #include <dmlc/registry.h>

+#include <array>
 #include <cmath>

 #include "../collective/communicator-inl.h"
@@ -213,10 +214,8 @@ class PseudoErrorLoss : public MetricNoCache {
          auto v = common::Sqr(slope) * (std::sqrt((1 + common::Sqr(a / slope))) - 1) * wt;
          return std::make_tuple(v, wt);
        });
-    double dat[2]{result.Residue(), result.Weights()};
-    if (collective::IsDistributed()) {
-      collective::Allreduce<collective::Operation::kSum>(dat, 2);
-    }
+    std::array<double, 2> dat{result.Residue(), result.Weights()};
+    collective::GlobalSum(info, &dat);
    return EvalRowMAPE::GetFinal(dat[0], dat[1]);
  }
 };
@@ -233,7 +232,7 @@ struct EvalError {
    }
  }
  const char *Name() const {
-    static std::string name;
+    static thread_local std::string name;
    if (has_param_) {
      std::ostringstream os;
      os << "error";
@@ -331,7 +330,7 @@ struct EvalTweedieNLogLik {
        << "tweedie variance power must be in interval [1, 2)";
  }
  const char *Name() const {
-    static std::string name;
+    static thread_local std::string name;
    std::ostringstream os;
    os << "tweedie-nloglik@" << rho_;
    name = os.str();
@@ -382,8 +381,8 @@ struct EvalEWiseBase : public MetricNoCache {
          return std::make_tuple(residue, wt);
        });

-    double dat[2]{result.Residue(), result.Weights()};
-    collective::Allreduce<collective::Operation::kSum>(dat, 2);
+    std::array<double, 2> dat{result.Residue(), result.Weights()};
+    collective::GlobalSum(info, &dat);
    return Policy::GetFinal(dat[0], dat[1]);
  }

@@ -454,8 +453,8 @@ class QuantileError : public MetricNoCache {
    CHECK(!alpha_.Empty());
    if (info.num_row_ == 0) {
      // empty DMatrix on distributed env
-      double dat[2]{0.0, 0.0};
-      collective::Allreduce<collective::Operation::kSum>(dat, 2);
+      std::array<double, 2> dat{0.0, 0.0};
+      collective::GlobalSum(info, &dat);
      CHECK_GT(dat[1], 0);
      return dat[0] / dat[1];
    }
@@ -492,8 +491,8 @@ class QuantileError : public MetricNoCache {
              loss(y_predt(sample_id, quantile_id, target_id), y_true(sample_id, target_id)) * w;
          return std::make_tuple(l, w);
        });
-    double dat[2]{result.Residue(), result.Weights()};
-    collective::Allreduce<collective::Operation::kSum>(dat, 2);
+    std::array<double, 2> dat{result.Residue(), result.Weights()};
+    collective::GlobalSum(info, &dat);
    CHECK_GT(dat[1], 0);
    return dat[0] / dat[1];
  }
--- a/src/metric/metric_common.h
+++ b/src/metric/metric_common.h
@@ -9,6 +9,8 @@
 #include <memory>  // shared_ptr
 #include <string>

+#include "../collective/aggregator.h"
+#include "../collective/communicator-inl.h"
 #include "../common/common.h"
 #include "xgboost/metric.h"

@@ -20,7 +22,12 @@ class MetricNoCache : public Metric {
  virtual double Eval(HostDeviceVector<float> const &predts, MetaInfo const &info) = 0;

  double Evaluate(HostDeviceVector<float> const &predts, std::shared_ptr<DMatrix> p_fmat) final {
-    return this->Eval(predts, p_fmat->Info());
+    double result{0.0};
+    auto const& info = p_fmat->Info();
+    collective::ApplyWithLabels(info, &result, sizeof(double), [&] {
+      result = this->Eval(predts, info);
+    });
+    return result;
  }
 };

--- a/src/metric/multiclass_metric.cu
+++ b/src/metric/multiclass_metric.cu
@@ -6,6 +6,7 @@
 */
 #include <xgboost/metric.h>

+#include <array>
 #include <atomic>
 #include <cmath>

@@ -196,7 +197,7 @@ struct EvalMClassBase : public MetricNoCache {
    } else {
      CHECK(preds.Size() % info.labels.Size() == 0) << "label and prediction size not match";
    }
-    double dat[2] { 0.0, 0.0 };
+    std::array<double, 2> dat{0.0, 0.0};
    if (info.labels.Size() != 0) {
      const size_t nclass = preds.Size() / info.labels.Size();
      CHECK_GE(nclass, 1U)
@@ -208,7 +209,7 @@ struct EvalMClassBase : public MetricNoCache {
      dat[0] = result.Residue();
      dat[1] = result.Weights();
    }
-    collective::Allreduce<collective::Operation::kSum>(dat, 2);
+    collective::GlobalSum(info, &dat);
    return Derived::GetFinal(dat[0], dat[1]);
  }
  /*!
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@@ -28,9 +28,8 @@
 #include <algorithm>                         // for stable_sort, copy, fill_n, min, max
 #include <array>                             // for array
 #include <cmath>                             // for log, sqrt
-#include <cstddef>                           // for size_t, std
-#include <cstdint>                           // for uint32_t
 #include <functional>                        // for less, greater
+#include <limits>                            // for numeric_limits
 #include <map>                               // for operator!=, _Rb_tree_const_iterator
 #include <memory>                            // for allocator, unique_ptr, shared_ptr, __shared_...
 #include <numeric>                           // for accumulate
@@ -39,15 +38,11 @@
 #include <utility>                           // for pair, make_pair
 #include <vector>                            // for vector

-#include "../collective/communicator-inl.h"  // for IsDistributed, Allreduce
-#include "../collective/communicator.h"      // for Operation
+#include "../collective/aggregator.h"        // for ApplyWithLabels
 #include "../common/algorithm.h"             // for ArgSort, Sort
 #include "../common/linalg_op.h"             // for cbegin, cend
 #include "../common/math.h"                  // for CmpFirst
 #include "../common/optional_weight.h"       // for OptionalWeights, MakeOptionalWeights
-#include "../common/ranking_utils.h"         // for LambdaRankParam, NDCGCache, ParseMetricName
-#include "../common/threading_utils.h"       // for ParallelFor
-#include "../common/transform_iterator.h"    // for IndexTransformIter
 #include "dmlc/common.h"                     // for OMPException
 #include "metric_common.h"                   // for MetricNoCache, GPUMetric, PackedReduceResult
 #include "xgboost/base.h"                    // for bst_float, bst_omp_uint, bst_group_t, Args
@@ -59,7 +54,6 @@
 #include "xgboost/linalg.h"                  // for Tensor, TensorView, Range, VectorView, MakeT...
 #include "xgboost/logging.h"                 // for CHECK, ConsoleLogger, LOG_INFO, CHECK_EQ
 #include "xgboost/metric.h"                  // for MetricReg, XGBOOST_REGISTER_METRIC, Metric
-#include "xgboost/span.h"                    // for Span, operator!=
 #include "xgboost/string_view.h"             // for StringView

 namespace {
@@ -244,14 +238,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
      exc.Rethrow();
    }

-    if (collective::IsDistributed()) {
-      double dat[2]{sum_metric, static_cast<double>(ngroups)};
-      // approximately estimate the metric using mean
-      collective::Allreduce<collective::Operation::kSum>(dat, 2);
-      return dat[0] / dat[1];
-    } else {
-      return sum_metric / ngroups;
-    }
+    return collective::GlobalRatio(info, sum_metric, static_cast<double>(ngroups));
  }

  const char* Name() const override {
@@ -385,15 +372,19 @@ class EvalRankWithCache : public Metric {
  }

  double Evaluate(HostDeviceVector<float> const& preds, std::shared_ptr<DMatrix> p_fmat) override {
+    double result{0.0};
    auto const& info = p_fmat->Info();
-    auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_);
-    if (p_cache->Param() != param_) {
-      p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_);
-    }
-    CHECK(p_cache->Param() == param_);
-    CHECK_EQ(preds.Size(), info.labels.Size());
+    collective::ApplyWithLabels(info, &result, sizeof(double), [&] {
+      auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_);
+      if (p_cache->Param() != param_) {
+        p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_);
+      }
+      CHECK(p_cache->Param() == param_);
+      CHECK_EQ(preds.Size(), info.labels.Size());

-    return this->Eval(preds, info, p_cache);
+      result = this->Eval(preds, info, p_cache);
+    });
+    return result;
  }

  virtual double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
@@ -401,9 +392,10 @@ class EvalRankWithCache : public Metric {
 };

 namespace {
-double Finalize(double score, double sw) {
+double Finalize(MetaInfo const& info, double score, double sw) {
  std::array<double, 2> dat{score, sw};
-  collective::Allreduce<collective::Operation::kSum>(dat.data(), dat.size());
+  collective::GlobalSum(info, &dat);
+  std::tie(score, sw) = std::tuple_cat(dat);
  if (sw > 0.0) {
    score = score / sw;
  }
@@ -430,7 +422,7 @@ class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
              std::shared_ptr<ltr::NDCGCache> p_cache) override {
    if (ctx_->IsCUDA()) {
      auto ndcg = cuda_impl::NDCGScore(ctx_, info, preds, minus_, p_cache);
-      return Finalize(ndcg.Residue(), ndcg.Weights());
+      return Finalize(info, ndcg.Residue(), ndcg.Weights());
    }

    // group local ndcg
@@ -476,7 +468,7 @@ class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
      sum_w = std::accumulate(weights.weights.cbegin(), weights.weights.cend(), 0.0);
    }
    auto ndcg = std::accumulate(linalg::cbegin(ndcg_gloc), linalg::cend(ndcg_gloc), 0.0);
-    return Finalize(ndcg, sum_w);
+    return Finalize(info, ndcg, sum_w);
  }
 };

@@ -489,7 +481,7 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
              std::shared_ptr<ltr::MAPCache> p_cache) override {
    if (ctx_->IsCUDA()) {
      auto map = cuda_impl::MAPScore(ctx_, info, predt, minus_, p_cache);
-      return Finalize(map.Residue(), map.Weights());
+      return Finalize(info, map.Residue(), map.Weights());
    }

    auto gptr = p_cache->DataGroupPtr(ctx_);
@@ -501,7 +493,6 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
    auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());

    common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {
-      auto g_predt = h_predt.Slice(linalg::Range(gptr[g], gptr[g + 1]));
      auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));
      auto g_rank = rank_idx.subspan(gptr[g]);

@@ -532,7 +523,7 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
      sw += weight[i];
    }
    auto sum = std::accumulate(map_gloc.cbegin(), map_gloc.cend(), 0.0);
-    return Finalize(sum, sw);
+    return Finalize(info, sum, sw);
  }
 };

--- a/src/metric/survival_metric.cu
+++ b/src/metric/survival_metric.cu
@@ -7,6 +7,7 @@

 #include <dmlc/registry.h>

+#include <array>
 #include <memory>
 #include <vector>

@@ -234,8 +235,8 @@ struct EvalEWiseSurvivalBase : public MetricNoCache {
    auto result = reducer_.Reduce(*ctx_, info.weights_, info.labels_lower_bound_,
                                  info.labels_upper_bound_, preds);

-    double dat[2]{result.Residue(), result.Weights()};
-    collective::Allreduce<collective::Operation::kSum>(dat, 2);
+    std::array<double, 2> dat{result.Residue(), result.Weights()};
+    collective::GlobalSum(info, &dat);
    return Policy::GetFinal(dat[0], dat[1]);
  }