Support bitwise allreduce operations in the communicator (#8623)
This commit is contained in:
@@ -58,7 +58,14 @@ inline std::size_t GetTypeSize(DataType data_type) {
|
||||
}
|
||||
|
||||
/** @brief Defines the reduction operation. */
|
||||
enum class Operation { kMax = 0, kMin = 1, kSum = 2 };
|
||||
enum class Operation {
|
||||
kMax = 0,
|
||||
kMin = 1,
|
||||
kSum = 2,
|
||||
kBitwiseAND = 3,
|
||||
kBitwiseOR = 4,
|
||||
kBitwiseXOR = 5
|
||||
};
|
||||
|
||||
class DeviceCommunicator;
|
||||
|
||||
|
||||
@@ -30,6 +30,29 @@ class AllreduceFunctor {
|
||||
}
|
||||
|
||||
private:
|
||||
template <class T, std::enable_if_t<std::is_integral<T>::value>* = nullptr>
|
||||
void AccumulateBitwise(T* buffer, T const* input, std::size_t size,
|
||||
Operation reduce_operation) const {
|
||||
switch (reduce_operation) {
|
||||
case Operation::kBitwiseAND:
|
||||
std::transform(buffer, buffer + size, input, buffer, std::bit_and<T>());
|
||||
break;
|
||||
case Operation::kBitwiseOR:
|
||||
std::transform(buffer, buffer + size, input, buffer, std::bit_or<T>());
|
||||
break;
|
||||
case Operation::kBitwiseXOR:
|
||||
std::transform(buffer, buffer + size, input, buffer, std::bit_xor<T>());
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument("Invalid reduce operation");
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::enable_if_t<std::is_floating_point<T>::value>* = nullptr>
|
||||
void AccumulateBitwise(T*, T const*, std::size_t, Operation) const {
|
||||
LOG(FATAL) << "Floating point types do not support bitwise operations.";
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void Accumulate(T* buffer, T const* input, std::size_t size, Operation reduce_operation) const {
|
||||
switch (reduce_operation) {
|
||||
@@ -44,6 +67,11 @@ class AllreduceFunctor {
|
||||
case Operation::kSum:
|
||||
std::transform(buffer, buffer + size, input, buffer, std::plus<T>());
|
||||
break;
|
||||
case Operation::kBitwiseAND:
|
||||
case Operation::kBitwiseOR:
|
||||
case Operation::kBitwiseXOR:
|
||||
AccumulateBitwise(buffer, input, size, reduce_operation);
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument("Invalid reduce operation");
|
||||
}
|
||||
|
||||
@@ -96,11 +96,33 @@ class RabitCommunicator : public Communicator {
|
||||
void Print(const std::string &message) override { rabit::TrackerPrint(message); }
|
||||
|
||||
protected:
|
||||
void Shutdown() override {
|
||||
rabit::Finalize();
|
||||
}
|
||||
void Shutdown() override { rabit::Finalize(); }
|
||||
|
||||
private:
|
||||
template <typename DType, std::enable_if_t<std::is_integral<DType>::value> * = nullptr>
|
||||
void DoBitwiseAllReduce(void *send_receive_buffer, std::size_t count, Operation op) {
|
||||
switch (op) {
|
||||
case Operation::kBitwiseAND:
|
||||
rabit::Allreduce<rabit::op::BitAND, DType>(static_cast<DType *>(send_receive_buffer),
|
||||
count);
|
||||
break;
|
||||
case Operation::kBitwiseOR:
|
||||
rabit::Allreduce<rabit::op::BitOR, DType>(static_cast<DType *>(send_receive_buffer), count);
|
||||
break;
|
||||
case Operation::kBitwiseXOR:
|
||||
rabit::Allreduce<rabit::op::BitXOR, DType>(static_cast<DType *>(send_receive_buffer),
|
||||
count);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown allreduce operation";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DType, std::enable_if_t<std::is_floating_point<DType>::value> * = nullptr>
|
||||
void DoBitwiseAllReduce(void *send_receive_buffer, std::size_t count, Operation op) {
|
||||
LOG(FATAL) << "Floating point types do not support bitwise operations.";
|
||||
}
|
||||
|
||||
template <typename DType>
|
||||
void DoAllReduce(void *send_receive_buffer, std::size_t count, Operation op) {
|
||||
switch (op) {
|
||||
@@ -113,6 +135,11 @@ class RabitCommunicator : public Communicator {
|
||||
case Operation::kSum:
|
||||
rabit::Allreduce<rabit::op::Sum, DType>(static_cast<DType *>(send_receive_buffer), count);
|
||||
break;
|
||||
case Operation::kBitwiseAND:
|
||||
case Operation::kBitwiseOR:
|
||||
case Operation::kBitwiseXOR:
|
||||
DoBitwiseAllReduce<DType>(send_receive_buffer, count, op);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown allreduce operation";
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user