Improve allgather functions (#9649)

This commit is contained in:
Rong Ou
2023-10-12 08:31:43 -07:00
committed by GitHub
parent d1dee4ad99
commit e164d51c43
20 changed files with 346 additions and 122 deletions

View File

@@ -23,7 +23,7 @@ class ServerForTest {
std::unique_ptr<grpc::Server> server_;
public:
explicit ServerForTest(std::int32_t world_size) {
explicit ServerForTest(std::size_t world_size) {
server_thread_.reset(new std::thread([this, world_size] {
grpc::ServerBuilder builder;
xgboost::federated::FederatedService service{world_size};

View File

@@ -19,6 +19,11 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
CheckAllgather(comm, rank);
}
static void VerifyAllgatherV(int rank, const std::string &server_address) {
FederatedCommunicator comm{kWorldSize, rank, server_address};
CheckAllgatherV(comm, rank);
}
static void VerifyAllreduce(int rank, const std::string &server_address) {
FederatedCommunicator comm{kWorldSize, rank, server_address};
CheckAllreduce(comm);
@@ -31,14 +36,19 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
protected:
static void CheckAllgather(FederatedCommunicator &comm, int rank) {
int buffer[kWorldSize] = {0, 0};
buffer[rank] = rank;
comm.AllGather(buffer, sizeof(buffer));
std::string input{static_cast<char>('0' + rank)};
auto output = comm.AllGather(input);
for (auto i = 0; i < kWorldSize; i++) {
EXPECT_EQ(buffer[i], i);
EXPECT_EQ(output[i], static_cast<char>('0' + i));
}
}
static void CheckAllgatherV(FederatedCommunicator &comm, int rank) {
std::vector<std::string_view> inputs{"Federated", " Learning!!!"};
auto output = comm.AllGatherV(inputs[rank]);
EXPECT_EQ(output, "Federated Learning!!!");
}
static void CheckAllreduce(FederatedCommunicator &comm) {
int buffer[] = {1, 2, 3, 4, 5};
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kSum);
@@ -119,6 +129,16 @@ TEST_F(FederatedCommunicatorTest, Allgather) {
}
}
TEST_F(FederatedCommunicatorTest, AllgatherV) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(&FederatedCommunicatorTest::VerifyAllgatherV, rank, server_->Address());
}
for (auto &thread : threads) {
thread.join();
}
}
TEST_F(FederatedCommunicatorTest, Allreduce) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {

View File

@@ -18,6 +18,11 @@ class FederatedServerTest : public BaseFederatedTest {
CheckAllgather(client, rank);
}
static void VerifyAllgatherV(int rank, const std::string& server_address) {
federated::FederatedClient client{server_address, rank};
CheckAllgatherV(client, rank);
}
static void VerifyAllreduce(int rank, const std::string& server_address) {
federated::FederatedClient client{server_address, rank};
CheckAllreduce(client);
@@ -39,8 +44,7 @@ class FederatedServerTest : public BaseFederatedTest {
protected:
static void CheckAllgather(federated::FederatedClient& client, int rank) {
int data[kWorldSize] = {0, 0};
data[rank] = rank;
int data[] = {rank};
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
auto reply = client.Allgather(send_buffer);
auto const* result = reinterpret_cast<int const*>(reply.data());
@@ -49,6 +53,12 @@ class FederatedServerTest : public BaseFederatedTest {
}
}
static void CheckAllgatherV(federated::FederatedClient& client, int rank) {
std::vector<std::string_view> inputs{"Hello,", " World!"};
auto reply = client.AllgatherV(inputs[rank]);
EXPECT_EQ(reply, "Hello, World!");
}
static void CheckAllreduce(federated::FederatedClient& client) {
int data[] = {1, 2, 3, 4, 5};
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
@@ -80,6 +90,16 @@ TEST_F(FederatedServerTest, Allgather) {
}
}
TEST_F(FederatedServerTest, AllgatherV) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(&FederatedServerTest::VerifyAllgatherV, rank, server_->Address());
}
for (auto& thread : threads) {
thread.join();
}
}
TEST_F(FederatedServerTest, Allreduce) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {