Fix device communicator dependency (#9346)
This commit is contained in:
@@ -9,6 +9,7 @@
|
||||
#include <thread>
|
||||
|
||||
#include "../../../plugin/federated/federated_communicator.h"
|
||||
#include "../../../src/collective/communicator-inl.cuh"
|
||||
#include "../../../src/collective/device_communicator_adapter.cuh"
|
||||
#include "./helpers.h"
|
||||
|
||||
@@ -17,67 +18,63 @@ namespace xgboost::collective {
|
||||
class FederatedAdapterTest : public BaseFederatedTest {};
|
||||
|
||||
TEST(FederatedAdapterSimpleTest, ThrowOnInvalidDeviceOrdinal) {
|
||||
auto construct = []() { DeviceCommunicatorAdapter adapter{-1, nullptr}; };
|
||||
auto construct = []() { DeviceCommunicatorAdapter adapter{-1}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(FederatedAdapterSimpleTest, ThrowOnInvalidCommunicator) {
|
||||
auto construct = []() { DeviceCommunicatorAdapter adapter{0, nullptr}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back([rank, server_address = server_->Address()] {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
|
||||
DeviceCommunicatorAdapter adapter{0, &comm};
|
||||
int count = 3;
|
||||
thrust::device_vector<double> buffer(count, 0);
|
||||
thrust::sequence(buffer.begin(), buffer.end());
|
||||
adapter.AllReduce(buffer.data().get(), count, DataType::kDouble, Operation::kSum);
|
||||
thrust::host_vector<double> host_buffer = buffer;
|
||||
EXPECT_EQ(host_buffer.size(), count);
|
||||
for (auto i = 0; i < count; i++) {
|
||||
EXPECT_EQ(host_buffer[i], i * kWorldSize);
|
||||
}
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
namespace {
|
||||
void VerifyAllReduceSum() {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
int count = 3;
|
||||
thrust::device_vector<double> buffer(count, 0);
|
||||
thrust::sequence(buffer.begin(), buffer.end());
|
||||
collective::AllReduce<collective::Operation::kSum>(rank, buffer.data().get(), count);
|
||||
thrust::host_vector<double> host_buffer = buffer;
|
||||
EXPECT_EQ(host_buffer.size(), count);
|
||||
for (auto i = 0; i < count; i++) {
|
||||
EXPECT_EQ(host_buffer[i], i * world_size);
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back([rank, server_address = server_->Address()] {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
|
||||
DeviceCommunicatorAdapter adapter{0, &comm};
|
||||
|
||||
int const count = rank + 2;
|
||||
thrust::device_vector<char> buffer(count, 0);
|
||||
thrust::sequence(buffer.begin(), buffer.end());
|
||||
std::vector<std::size_t> segments(kWorldSize);
|
||||
dh::caching_device_vector<char> receive_buffer{};
|
||||
|
||||
adapter.AllGatherV(buffer.data().get(), count, &segments, &receive_buffer);
|
||||
|
||||
EXPECT_EQ(segments[0], 2);
|
||||
EXPECT_EQ(segments[1], 3);
|
||||
thrust::host_vector<char> host_buffer = receive_buffer;
|
||||
EXPECT_EQ(host_buffer.size(), 9);
|
||||
int expected[] = {0, 1, 0, 1, 2, 0, 1, 2, 3};
|
||||
for (auto i = 0; i < 9; i++) {
|
||||
EXPECT_EQ(host_buffer[i], expected[i]);
|
||||
}
|
||||
});
|
||||
TEST_F(FederatedAdapterTest, MGPUAllReduceSum) {
|
||||
auto const n_gpus = common::AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPUAllReduceSum test with # GPUs = " << n_gpus;
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAllReduceSum);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyAllGatherV() {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
int const count = rank + 2;
|
||||
thrust::device_vector<char> buffer(count, 0);
|
||||
thrust::sequence(buffer.begin(), buffer.end());
|
||||
std::vector<std::size_t> segments(world_size);
|
||||
dh::caching_device_vector<char> receive_buffer{};
|
||||
|
||||
collective::AllGatherV(rank, buffer.data().get(), count, &segments, &receive_buffer);
|
||||
|
||||
EXPECT_EQ(segments[0], 2);
|
||||
EXPECT_EQ(segments[1], 3);
|
||||
thrust::host_vector<char> host_buffer = receive_buffer;
|
||||
EXPECT_EQ(host_buffer.size(), 5);
|
||||
int expected[] = {0, 1, 0, 1, 2};
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
EXPECT_EQ(host_buffer[i], expected[i]);
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(FederatedAdapterTest, MGPUAllGatherV) {
|
||||
auto const n_gpus = common::AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPUAllGatherV test with # GPUs = " << n_gpus;
|
||||
}
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAllGatherV);
|
||||
}
|
||||
|
||||
} // namespace xgboost::collective
|
||||
|
||||
Reference in New Issue
Block a user