[Breaking] Switch from rabit to the collective communicator (#8257)
* Switch from rabit to the collective communicator * fix size_t specialization * really fix size_t * try again * add include * more include * fix lint errors * remove rabit includes * fix pylint error * return dict from communicator context * fix communicator shutdown * fix dask test * reset communicator mocklist * fix distributed tests * do not save device communicator * fix jvm gpu tests * add python test for federated communicator * Update gputreeshap submodule Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
This commit is contained in:
@@ -7,8 +7,8 @@
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/common.h"
|
||||
#include "rabit/rabit.h"
|
||||
#include "xgboost/generic_parameters.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/tree_model.h"
|
||||
@@ -39,7 +39,7 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
|
||||
auto const& h_node_idx = nidx;
|
||||
|
||||
size_t n_leaf{h_node_idx.size()};
|
||||
rabit::Allreduce<rabit::op::Max>(&n_leaf, 1);
|
||||
collective::Allreduce<collective::Operation::kMax>(&n_leaf, 1);
|
||||
CHECK(quantiles.empty() || quantiles.size() == n_leaf);
|
||||
if (quantiles.empty()) {
|
||||
quantiles.resize(n_leaf, std::numeric_limits<float>::quiet_NaN());
|
||||
@@ -49,12 +49,12 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
|
||||
std::vector<int32_t> n_valids(quantiles.size());
|
||||
std::transform(quantiles.cbegin(), quantiles.cend(), n_valids.begin(),
|
||||
[](float q) { return static_cast<int32_t>(!std::isnan(q)); });
|
||||
rabit::Allreduce<rabit::op::Sum>(n_valids.data(), n_valids.size());
|
||||
collective::Allreduce<collective::Operation::kSum>(n_valids.data(), n_valids.size());
|
||||
// convert to 0 for all reduce
|
||||
std::replace_if(
|
||||
quantiles.begin(), quantiles.end(), [](float q) { return std::isnan(q); }, 0.f);
|
||||
// use the mean value
|
||||
rabit::Allreduce<rabit::op::Sum>(quantiles.data(), quantiles.size());
|
||||
collective::Allreduce<collective::Operation::kSum>(quantiles.data(), quantiles.size());
|
||||
for (size_t i = 0; i < n_leaf; ++i) {
|
||||
if (n_valids[i] > 0) {
|
||||
quantiles[i] /= static_cast<float>(n_valids[i]);
|
||||
|
||||
@@ -724,8 +724,8 @@ class MeanAbsoluteError : public ObjFunction {
|
||||
}
|
||||
|
||||
// Weighted average base score across all workers
|
||||
rabit::Allreduce<rabit::op::Sum>(out.Values().data(), out.Values().size());
|
||||
rabit::Allreduce<rabit::op::Sum>(&w, 1);
|
||||
collective::Allreduce<collective::Operation::kSum>(out.Values().data(), out.Values().size());
|
||||
collective::Allreduce<collective::Operation::kSum>(&w, 1);
|
||||
|
||||
std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
|
||||
[w](float v) { return v / w; });
|
||||
|
||||
Reference in New Issue
Block a user