Merge branch 'master'
This commit is contained in:
94
tests/cpp/plugin/federated/test_federated_coll.cc
Normal file
94
tests/cpp/plugin/federated/test_federated_coll.cc
Normal file
@@ -0,0 +1,94 @@
|
||||
/**
|
||||
* Copyright 2022-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/span.h> // for Span
|
||||
|
||||
#include <array> // for array
|
||||
|
||||
#include "../../../../src/common/type.h" // for EraseType
|
||||
#include "../../collective/test_worker.h" // for SocketTest
|
||||
#include "federated_coll.h" // for FederatedColl
|
||||
#include "federated_comm.h" // for FederatedComm
|
||||
#include "test_worker.h" // for TestFederated
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
class FederatedCollTest : public SocketTest {};
|
||||
} // namespace
|
||||
|
||||
TEST_F(FederatedCollTest, Allreduce) {
|
||||
std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 3u);
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t) {
|
||||
std::array<std::int32_t, 5> buffer = {1, 2, 3, 4, 5};
|
||||
std::array<std::int32_t, 5> expected;
|
||||
std::transform(buffer.cbegin(), buffer.cend(), expected.begin(),
|
||||
[=](auto i) { return i * n_workers; });
|
||||
|
||||
auto coll = std::make_shared<FederatedColl>();
|
||||
auto rc = coll->Allreduce(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}),
|
||||
ArrayInterfaceHandler::kI4, Op::kSum);
|
||||
ASSERT_TRUE(rc.OK());
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
ASSERT_EQ(buffer[i], expected[i]);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(FederatedCollTest, Broadcast) {
|
||||
std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 3u);
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t) {
|
||||
FederatedColl coll{};
|
||||
auto rc = Success();
|
||||
if (comm->Rank() == 0) {
|
||||
std::string buffer{"hello"};
|
||||
rc = coll.Broadcast(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}), 0);
|
||||
ASSERT_EQ(buffer, "hello");
|
||||
} else {
|
||||
std::string buffer{" "};
|
||||
rc = coll.Broadcast(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}), 0);
|
||||
ASSERT_EQ(buffer, "hello");
|
||||
}
|
||||
ASSERT_TRUE(rc.OK());
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(FederatedCollTest, Allgather) {
|
||||
std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 3u);
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t) {
|
||||
FederatedColl coll{};
|
||||
|
||||
std::vector<std::int32_t> buffer(n_workers, 0);
|
||||
buffer[comm->Rank()] = comm->Rank();
|
||||
auto rc = coll.Allgather(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}),
|
||||
sizeof(int));
|
||||
ASSERT_TRUE(rc.OK());
|
||||
for (auto i = 0; i < n_workers; i++) {
|
||||
ASSERT_EQ(buffer[i], i);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(FederatedCollTest, AllgatherV) {
|
||||
std::int32_t n_workers = 2;
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t) {
|
||||
FederatedColl coll{};
|
||||
|
||||
std::vector<std::string_view> inputs{"Federated", " Learning!!!"};
|
||||
std::vector<std::int64_t> recv_segments(inputs.size() + 1, 0);
|
||||
std::string r;
|
||||
std::vector<std::int64_t> sizes{static_cast<std::int64_t>(inputs[0].size()),
|
||||
static_cast<std::int64_t>(inputs[1].size())};
|
||||
r.resize(sizes[0] + sizes[1]);
|
||||
|
||||
auto rc = coll.AllgatherV(
|
||||
*comm,
|
||||
common::EraseType(common::Span{inputs[comm->Rank()].data(), inputs[comm->Rank()].size()}),
|
||||
common::Span{sizes.data(), sizes.size()}, recv_segments,
|
||||
common::EraseType(common::Span{r.data(), r.size()}), AllgatherVAlgo::kRing);
|
||||
|
||||
EXPECT_EQ(r, "Federated Learning!!!");
|
||||
ASSERT_TRUE(rc.OK());
|
||||
});
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
131
tests/cpp/plugin/federated/test_federated_coll.cu
Normal file
131
tests/cpp/plugin/federated/test_federated_coll.cu
Normal file
@@ -0,0 +1,131 @@
|
||||
/**
|
||||
* Copyright 2022-2023, XGBoost contributors
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/collective/result.h> // for Result
|
||||
|
||||
#include "../../../../src/common/common.h" // for AllVisibleGPUs
|
||||
#include "../../../../src/common/device_helpers.cuh" // for device_vector
|
||||
#include "../../../../src/common/type.h" // for EraseType
|
||||
#include "../../collective/test_worker.h" // for SocketTest
|
||||
#include "../../helpers.h" // for MakeCUDACtx
|
||||
#include "federated_coll.cuh"
|
||||
#include "federated_comm.cuh"
|
||||
#include "test_worker.h" // for TestFederated
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
class FederatedCollTestGPU : public SocketTest {};
|
||||
|
||||
struct Worker {
|
||||
std::shared_ptr<FederatedColl> impl;
|
||||
std::shared_ptr<Comm> nccl_comm;
|
||||
std::shared_ptr<CUDAFederatedColl> coll;
|
||||
|
||||
Worker(std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
auto ctx = MakeCUDACtx(rank);
|
||||
impl = std::make_shared<FederatedColl>();
|
||||
nccl_comm.reset(comm->MakeCUDAVar(&ctx, impl));
|
||||
coll = std::make_shared<CUDAFederatedColl>(impl);
|
||||
}
|
||||
};
|
||||
|
||||
void TestAllreduce(std::shared_ptr<FederatedComm> comm, std::int32_t rank, std::int32_t n_workers) {
|
||||
Worker w{comm, rank};
|
||||
|
||||
dh::device_vector<std::int32_t> buffer{std::vector<std::int32_t>{1, 2, 3, 4, 5}};
|
||||
dh::device_vector<std::int32_t> expected(buffer.size());
|
||||
thrust::transform(buffer.cbegin(), buffer.cend(), expected.begin(),
|
||||
[=] XGBOOST_DEVICE(std::int32_t i) { return i * n_workers; });
|
||||
|
||||
auto rc = w.coll->Allreduce(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)),
|
||||
ArrayInterfaceHandler::kI4, Op::kSum);
|
||||
ASSERT_TRUE(rc.OK());
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
ASSERT_EQ(buffer[i], expected[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void TestBroadcast(std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
Worker w{comm, rank};
|
||||
|
||||
auto rc = Success();
|
||||
std::vector<std::int32_t> expect{0, 1, 2, 3};
|
||||
|
||||
if (comm->Rank() == 0) {
|
||||
dh::device_vector<std::int32_t> buffer{expect};
|
||||
rc = w.coll->Broadcast(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)), 0);
|
||||
std::vector<std::int32_t> expect{0, 1, 2, 3};
|
||||
ASSERT_EQ(buffer, expect);
|
||||
} else {
|
||||
dh::device_vector<std::int32_t> buffer(std::vector<std::int32_t>{4, 5, 6, 7});
|
||||
rc = w.coll->Broadcast(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)), 0);
|
||||
ASSERT_EQ(buffer, expect);
|
||||
}
|
||||
ASSERT_TRUE(rc.OK());
|
||||
}
|
||||
|
||||
void TestAllgather(std::shared_ptr<FederatedComm> comm, std::int32_t rank, std::int32_t n_workers) {
|
||||
Worker w{comm, rank};
|
||||
|
||||
dh::device_vector<std::int32_t> buffer(n_workers, 0);
|
||||
buffer[comm->Rank()] = comm->Rank();
|
||||
auto rc = w.coll->Allgather(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)), sizeof(int));
|
||||
ASSERT_TRUE(rc.OK());
|
||||
for (auto i = 0; i < n_workers; i++) {
|
||||
ASSERT_EQ(buffer[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
void TestAllgatherV(std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
Worker w{comm, rank};
|
||||
|
||||
std::vector<dh::device_vector<std::int32_t>> inputs{std::vector<std::int32_t>{1, 2, 3},
|
||||
std::vector<std::int32_t>{4, 5}};
|
||||
std::vector<std::int64_t> recv_segments(inputs.size() + 1, 0);
|
||||
dh::device_vector<std::int32_t> r;
|
||||
std::vector<std::int64_t> sizes{static_cast<std::int64_t>(inputs[0].size()),
|
||||
static_cast<std::int64_t>(inputs[1].size())};
|
||||
r.resize(sizes[0] + sizes[1]);
|
||||
|
||||
auto rc = w.coll->AllgatherV(*w.nccl_comm, common::EraseType(dh::ToSpan(inputs[comm->Rank()])),
|
||||
common::Span{sizes.data(), sizes.size()}, recv_segments,
|
||||
common::EraseType(dh::ToSpan(r)), AllgatherVAlgo::kRing);
|
||||
ASSERT_TRUE(rc.OK());
|
||||
|
||||
ASSERT_EQ(r[0], 1);
|
||||
for (std::size_t i = 1; i < r.size(); ++i) {
|
||||
ASSERT_EQ(r[i], r[i - 1] + 1);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_F(FederatedCollTestGPU, Allreduce) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
TestAllreduce(comm, rank, n_workers);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(FederatedCollTestGPU, Broadcast) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
TestBroadcast(comm, rank);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(FederatedCollTestGPU, Allgather) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
TestAllgather(comm, rank, n_workers);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(FederatedCollTestGPU, AllgatherV) {
|
||||
std::int32_t n_workers = 2;
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
TestAllgatherV(comm, rank);
|
||||
});
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
@@ -7,10 +7,10 @@
|
||||
#include <thread> // for thread
|
||||
|
||||
#include "../../../../plugin/federated/federated_comm.h"
|
||||
#include "../../collective/net_test.h" // for SocketTest
|
||||
#include "../../helpers.h" // for ExpectThrow
|
||||
#include "test_worker.h" // for TestFederated
|
||||
#include "xgboost/json.h" // for Json
|
||||
#include "../../collective/test_worker.h" // for SocketTest
|
||||
#include "../../helpers.h" // for ExpectThrow
|
||||
#include "test_worker.h" // for TestFederated
|
||||
#include "xgboost/json.h" // for Json
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
@@ -71,14 +71,9 @@ TEST_F(FederatedCommTest, IsDistributed) {
|
||||
|
||||
TEST_F(FederatedCommTest, InsecureTracker) {
|
||||
std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 3u);
|
||||
TestFederated(n_workers, [=](std::int32_t port, std::int32_t rank) {
|
||||
Json config{Object{}};
|
||||
config["federated_world_size"] = n_workers;
|
||||
config["federated_rank"] = rank;
|
||||
config["federated_server_address"] = "0.0.0.0:" + std::to_string(port);
|
||||
FederatedComm comm{config};
|
||||
ASSERT_EQ(comm.Rank(), rank);
|
||||
ASSERT_EQ(comm.World(), n_workers);
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
ASSERT_EQ(comm->Rank(), rank);
|
||||
ASSERT_EQ(comm->World(), n_workers);
|
||||
});
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
|
||||
@@ -9,7 +9,8 @@
|
||||
#include <thread> // for thread
|
||||
|
||||
#include "../../../../plugin/federated/federated_tracker.h"
|
||||
#include "xgboost/json.h" // for Json
|
||||
#include "federated_comm.h" // for FederatedComm
|
||||
#include "xgboost/json.h" // for Json
|
||||
|
||||
namespace xgboost::collective {
|
||||
template <typename WorkerFn>
|
||||
@@ -28,7 +29,15 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
|
||||
std::int32_t port = tracker.Port();
|
||||
|
||||
for (std::int32_t i = 0; i < n_workers; ++i) {
|
||||
workers.emplace_back([=] { fn(port, i); });
|
||||
workers.emplace_back([=] {
|
||||
Json config{Object{}};
|
||||
config["federated_world_size"] = n_workers;
|
||||
config["federated_rank"] = i;
|
||||
config["federated_server_address"] = "0.0.0.0:" + std::to_string(port);
|
||||
auto comm = std::make_shared<FederatedComm>(config);
|
||||
|
||||
fn(comm, i);
|
||||
});
|
||||
}
|
||||
|
||||
for (auto& t : workers) {
|
||||
|
||||
Reference in New Issue
Block a user