More collective aggregators (#9060)
This commit is contained in:
@@ -6,8 +6,9 @@
|
||||
#include <algorithm>
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <limits>
|
||||
#include <vector> // std::vector
|
||||
#include <vector> // std::vector
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/common.h"
|
||||
#include "xgboost/base.h" // bst_node_t
|
||||
@@ -41,10 +42,7 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
|
||||
auto& quantiles = *p_quantiles;
|
||||
auto const& h_node_idx = nidx;
|
||||
|
||||
size_t n_leaf{h_node_idx.size()};
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kMax>(&n_leaf, 1);
|
||||
}
|
||||
size_t n_leaf = collective::GlobalMax(info, h_node_idx.size());
|
||||
CHECK(quantiles.empty() || quantiles.size() == n_leaf);
|
||||
if (quantiles.empty()) {
|
||||
quantiles.resize(n_leaf, std::numeric_limits<float>::quiet_NaN());
|
||||
@@ -54,16 +52,12 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
|
||||
std::vector<int32_t> n_valids(quantiles.size());
|
||||
std::transform(quantiles.cbegin(), quantiles.cend(), n_valids.begin(),
|
||||
[](float q) { return static_cast<int32_t>(!std::isnan(q)); });
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(n_valids.data(), n_valids.size());
|
||||
}
|
||||
collective::GlobalSum(info, &n_valids);
|
||||
// convert to 0 for all reduce
|
||||
std::replace_if(
|
||||
quantiles.begin(), quantiles.end(), [](float q) { return std::isnan(q); }, 0.f);
|
||||
// use the mean value
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(quantiles.data(), quantiles.size());
|
||||
}
|
||||
collective::GlobalSum(info, &quantiles);
|
||||
for (size_t i = 0; i < n_leaf; ++i) {
|
||||
if (n_valids[i] > 0) {
|
||||
quantiles[i] /= static_cast<float>(n_valids[i]);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
*/
|
||||
#include <array> // std::array
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <vector> // std::vector
|
||||
@@ -170,10 +171,9 @@ class QuantileRegression : public ObjFunction {
|
||||
common::Mean(ctx_, *base_score, &temp);
|
||||
double meanq = temp(0) * sw;
|
||||
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(&meanq, 1);
|
||||
collective::Allreduce<collective::Operation::kSum>(&sw, 1);
|
||||
}
|
||||
std::array<double, 2> dat{meanq, sw};
|
||||
collective::GlobalSum(info, &dat);
|
||||
std::tie(meanq, sw) = std::tuple_cat(dat);
|
||||
meanq /= (sw + kRtEps);
|
||||
base_score->Reshape(1);
|
||||
base_score->Data()->Fill(meanq);
|
||||
|
||||
@@ -728,10 +728,8 @@ class MeanAbsoluteError : public ObjFunction {
|
||||
std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
|
||||
[w](float v) { return v * w; });
|
||||
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(out.Values().data(), out.Values().size());
|
||||
collective::Allreduce<collective::Operation::kSum>(&w, 1);
|
||||
}
|
||||
collective::GlobalSum(info, &out.Values());
|
||||
collective::GlobalSum(info, &w, 1);
|
||||
|
||||
if (common::CloseTo(w, 0.0)) {
|
||||
// Mostly for handling empty dataset test.
|
||||
|
||||
Reference in New Issue
Block a user