More refactoring to take advantage of collective aggregators (#9081)

2023-04-25 12:36:09 -07:00
parent 49ccae7fb9
commit a320b402a5
10 changed files with 81 additions and 81 deletions
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -45,20 +45,18 @@ HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, b

  if (!use_sorted) {
    HostSketchContainer container(max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
-                                  HostSketchContainer::UseGroup(info),
-                                  m->Info().IsColumnSplit(), n_threads);
+                                  HostSketchContainer::UseGroup(info), n_threads);
    for (auto const& page : m->GetBatches<SparsePage>()) {
      container.PushRowPage(page, info, hessian);
    }
-    container.MakeCuts(&out);
+    container.MakeCuts(m->Info(), &out);
  } else {
    SortedSketchContainer container{max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
-                                    HostSketchContainer::UseGroup(info),
-                                    m->Info().IsColumnSplit(), n_threads};
+                                    HostSketchContainer::UseGroup(info), n_threads};
    for (auto const& page : m->GetBatches<SortedCSCPage>()) {
      container.PushColPage(page, info, hessian);
    }
-    container.MakeCuts(&out);
+    container.MakeCuts(m->Info(), &out);
  }

  return out;
--- a/src/common/quantile.cc
+++ b/src/common/quantile.cc
@@ -6,6 +6,7 @@
 #include <limits>
 #include <utility>

+#include "../collective/aggregator.h"
 #include "../collective/communicator-inl.h"
 #include "../data/adapter.h"
 #include "categorical.h"
@@ -18,13 +19,12 @@ template <typename WQSketch>
 SketchContainerImpl<WQSketch>::SketchContainerImpl(std::vector<bst_row_t> columns_size,
                                                   int32_t max_bins,
                                                   Span<FeatureType const> feature_types,
-                                                   bool use_group, bool col_split,
+                                                   bool use_group,
                                                   int32_t n_threads)
    : feature_types_(feature_types.cbegin(), feature_types.cend()),
      columns_size_{std::move(columns_size)},
      max_bins_{max_bins},
      use_group_ind_{use_group},
-      col_split_{col_split},
      n_threads_{n_threads} {
  monitor_.Init(__func__);
  CHECK_NE(columns_size_.size(), 0);
@@ -202,10 +202,10 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
 }

 template <typename WQSketch>
-void SketchContainerImpl<WQSketch>::AllreduceCategories() {
+void SketchContainerImpl<WQSketch>::AllreduceCategories(MetaInfo const& info) {
  auto world_size = collective::GetWorldSize();
  auto rank = collective::GetRank();
-  if (world_size == 1 || col_split_) {
+  if (world_size == 1 || info.IsColumnSplit()) {
    return;
  }

@@ -273,6 +273,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories() {

 template <typename WQSketch>
 void SketchContainerImpl<WQSketch>::AllReduce(
+    MetaInfo const& info,
    std::vector<typename WQSketch::SummaryContainer> *p_reduced,
    std::vector<int32_t>* p_num_cuts) {
  monitor_.Start(__func__);
@@ -281,7 +282,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(
  collective::Allreduce<collective::Operation::kMax>(&n_columns, 1);
  CHECK_EQ(n_columns, sketches_.size()) << "Number of columns differs across workers";

-  AllreduceCategories();
+  AllreduceCategories(info);

  auto& num_cuts = *p_num_cuts;
  CHECK_EQ(num_cuts.size(), 0);
@@ -292,10 +293,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(

  // Prune the intermediate num cuts for synchronization.
  std::vector<bst_row_t> global_column_size(columns_size_);
-  if (!col_split_) {
-    collective::Allreduce<collective::Operation::kSum>(global_column_size.data(),
-                                                       global_column_size.size());
-  }
+  collective::GlobalSum(info, &global_column_size);

  ParallelFor(sketches_.size(), n_threads_, [&](size_t i) {
    int32_t intermediate_num_cuts = static_cast<int32_t>(
@@ -316,7 +314,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(
  });

  auto world = collective::GetWorldSize();
-  if (world == 1 || col_split_) {
+  if (world == 1 || info.IsColumnSplit()) {
    monitor_.Stop(__func__);
    return;
  }
@@ -382,11 +380,11 @@ auto AddCategories(std::set<float> const &categories, HistogramCuts *cuts) {
 }

 template <typename WQSketch>
-void SketchContainerImpl<WQSketch>::MakeCuts(HistogramCuts* cuts) {
+void SketchContainerImpl<WQSketch>::MakeCuts(MetaInfo const& info, HistogramCuts* cuts) {
  monitor_.Start(__func__);
  std::vector<typename WQSketch::SummaryContainer> reduced;
  std::vector<int32_t> num_cuts;
-  this->AllReduce(&reduced, &num_cuts);
+  this->AllReduce(info, &reduced, &num_cuts);

  cuts->min_vals_.HostVector().resize(sketches_.size(), 0.0f);
  std::vector<typename WQSketch::SummaryContainer> final_summaries(reduced.size());
@@ -443,8 +441,8 @@ template class SketchContainerImpl<WXQuantileSketch<float, float>>;

 HostSketchContainer::HostSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
                                         std::vector<size_t> columns_size, bool use_group,
-                                         bool col_split, int32_t n_threads)
-    : SketchContainerImpl{columns_size, max_bins, ft, use_group, col_split, n_threads} {
+                                         int32_t n_threads)
+    : SketchContainerImpl{columns_size, max_bins, ft, use_group, n_threads} {
  monitor_.Init(__func__);
  ParallelFor(sketches_.size(), n_threads_, Sched::Auto(), [&](auto i) {
    auto n_bins = std::min(static_cast<size_t>(max_bins_), columns_size_[i]);
--- a/src/common/quantile.h
+++ b/src/common/quantile.h
@@ -789,7 +789,6 @@ class SketchContainerImpl {
  std::vector<bst_row_t> columns_size_;
  int32_t max_bins_;
  bool use_group_ind_{false};
-  bool col_split_;
  int32_t n_threads_;
  bool has_categorical_{false};
  Monitor monitor_;
@@ -802,7 +801,7 @@ class SketchContainerImpl {
   * \param use_group whether is assigned to group to data instance.
   */
  SketchContainerImpl(std::vector<bst_row_t> columns_size, int32_t max_bins,
-                      common::Span<FeatureType const> feature_types, bool use_group, bool col_split,
+                      common::Span<FeatureType const> feature_types, bool use_group,
                      int32_t n_threads);

  static bool UseGroup(MetaInfo const &info) {
@@ -829,7 +828,7 @@ class SketchContainerImpl {
                        std::vector<bst_row_t> *p_sketches_scan,
                        std::vector<typename WQSketch::Entry> *p_global_sketches);
  // Merge sketches from all workers.
-  void AllReduce(std::vector<typename WQSketch::SummaryContainer> *p_reduced,
+  void AllReduce(MetaInfo const& info, std::vector<typename WQSketch::SummaryContainer> *p_reduced,
                 std::vector<int32_t> *p_num_cuts);

  template <typename Batch, typename IsValid>
@@ -883,11 +882,11 @@ class SketchContainerImpl {
  /* \brief Push a CSR matrix. */
  void PushRowPage(SparsePage const &page, MetaInfo const &info, Span<float const> hessian = {});

-  void MakeCuts(HistogramCuts* cuts);
+  void MakeCuts(MetaInfo const& info, HistogramCuts* cuts);

 private:
  // Merge all categories from other workers.
-  void AllreduceCategories();
+  void AllreduceCategories(MetaInfo const& info);
 };

 class HostSketchContainer : public SketchContainerImpl<WQuantileSketch<float, float>> {
@@ -896,8 +895,7 @@ class HostSketchContainer : public SketchContainerImpl<WQuantileSketch<float, fl

 public:
  HostSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
-                      std::vector<size_t> columns_size, bool use_group, bool col_split,
-                      int32_t n_threads);
+                      std::vector<size_t> columns_size, bool use_group, int32_t n_threads);

  template <typename Batch>
  void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing);
@@ -993,9 +991,9 @@ class SortedSketchContainer : public SketchContainerImpl<WXQuantileSketch<float,

 public:
  explicit SortedSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
-                                 std::vector<size_t> columns_size, bool use_group, bool col_split,
+                                 std::vector<size_t> columns_size, bool use_group,
                                 int32_t n_threads)
-      : SketchContainerImpl{columns_size, max_bins, ft, use_group, col_split, n_threads} {
+      : SketchContainerImpl{columns_size, max_bins, ft, use_group, n_threads} {
    monitor_.Init(__func__);
    sketches_.resize(columns_size.size());
    size_t i = 0;