Merge branch 'master' into sync-condition-2023Oct11

This commit is contained in:
Hui Liu
2023-10-30 13:19:33 -07:00
41 changed files with 1486 additions and 156 deletions

View File

@@ -14,6 +14,7 @@
#include <vector> // for vector
#include "../../../src/collective/allgather.h" // for RingAllgather
#include "../../../src/collective/coll.h" // for Coll
#include "../../../src/collective/comm.h" // for RabitComm
#include "gtest/gtest.h" // for AssertionR...
#include "test_worker.h" // for TestDistri...
@@ -63,37 +64,79 @@ class Worker : public WorkerForTest {
}
}
void TestV() {
{
// basic test
std::int32_t n{comm_.Rank()};
std::vector<std::int32_t> result;
auto rc = RingAllgatherV(comm_, common::Span{&n, 1}, &result);
ASSERT_TRUE(rc.OK()) << rc.Report();
for (std::int32_t i = 0; i < comm_.World(); ++i) {
ASSERT_EQ(result[i], i);
}
}
{
// V test
std::vector<std::int32_t> data(comm_.Rank() + 1, comm_.Rank());
std::vector<std::int32_t> result;
auto rc = RingAllgatherV(comm_, common::Span{data.data(), data.size()}, &result);
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_EQ(result.size(), (1 + comm_.World()) * comm_.World() / 2);
std::int32_t k{0};
for (std::int32_t r = 0; r < comm_.World(); ++r) {
auto seg = common::Span{result.data(), result.size()}.subspan(k, (r + 1));
if (comm_.Rank() == 0) {
for (auto v : seg) {
ASSERT_EQ(v, r);
}
k += seg.size();
void CheckV(common::Span<std::int32_t> result) {
std::int32_t k{0};
for (std::int32_t r = 0; r < comm_.World(); ++r) {
auto seg = common::Span{result.data(), result.size()}.subspan(k, (r + 1));
if (comm_.Rank() == 0) {
for (auto v : seg) {
ASSERT_EQ(v, r);
}
k += seg.size();
}
}
}
void TestVRing() {
// V test
std::vector<std::int32_t> data(comm_.Rank() + 1, comm_.Rank());
std::vector<std::int32_t> result;
auto rc = RingAllgatherV(comm_, common::Span{data.data(), data.size()}, &result);
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_EQ(result.size(), (1 + comm_.World()) * comm_.World() / 2);
CheckV(result);
}
void TestVBasic() {
// basic test
std::int32_t n{comm_.Rank()};
std::vector<std::int32_t> result;
auto rc = RingAllgatherV(comm_, common::Span{&n, 1}, &result);
ASSERT_TRUE(rc.OK()) << rc.Report();
for (std::int32_t i = 0; i < comm_.World(); ++i) {
ASSERT_EQ(result[i], i);
}
}
void TestVAlgo() {
// V test, broadcast
std::vector<std::int32_t> data(comm_.Rank() + 1, comm_.Rank());
auto s_data = common::Span{data.data(), data.size()};
std::vector<std::int64_t> sizes(comm_.World(), 0);
sizes[comm_.Rank()] = s_data.size_bytes();
auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()}, 1);
ASSERT_TRUE(rc.OK()) << rc.Report();
std::shared_ptr<Coll> pcoll{new Coll{}};
std::vector<std::int64_t> recv_segments(comm_.World() + 1, 0);
std::vector<std::int32_t> recv(std::accumulate(sizes.cbegin(), sizes.cend(), 0));
auto s_recv = common::Span{recv.data(), recv.size()};
rc = pcoll->AllgatherV(comm_, common::EraseType(s_data),
common::Span{sizes.data(), sizes.size()},
common::Span{recv_segments.data(), recv_segments.size()},
common::EraseType(s_recv), AllgatherVAlgo::kBcast);
ASSERT_TRUE(rc.OK());
CheckV(s_recv);
// Test inplace
auto test_inplace = [&] (AllgatherVAlgo algo) {
std::fill_n(s_recv.data(), s_recv.size(), 0);
auto current = s_recv.subspan(recv_segments[comm_.Rank()],
recv_segments[comm_.Rank() + 1] - recv_segments[comm_.Rank()]);
std::copy_n(data.data(), data.size(), current.data());
rc = pcoll->AllgatherV(comm_, common::EraseType(current),
common::Span{sizes.data(), sizes.size()},
common::Span{recv_segments.data(), recv_segments.size()},
common::EraseType(s_recv), algo);
ASSERT_TRUE(rc.OK());
CheckV(s_recv);
};
test_inplace(AllgatherVAlgo::kBcast);
test_inplace(AllgatherVAlgo::kRing);
}
};
} // namespace
@@ -106,12 +149,30 @@ TEST_F(AllgatherTest, Basic) {
});
}
TEST_F(AllgatherTest, V) {
TEST_F(AllgatherTest, VBasic) {
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker worker{host, port, timeout, n_workers, r};
worker.TestV();
worker.TestVBasic();
});
}
TEST_F(AllgatherTest, VRing) {
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker worker{host, port, timeout, n_workers, r};
worker.TestVRing();
});
}
TEST_F(AllgatherTest, VAlgo) {
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker worker{host, port, timeout, n_workers, r};
worker.TestVAlgo();
});
}
} // namespace xgboost::collective

View File

@@ -0,0 +1,117 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#if defined(XGBOOST_USE_NCCL)
#include <gtest/gtest.h>
#include <thrust/device_vector.h> // for device_vector
#include <thrust/equal.h> // for equal
#include <xgboost/span.h> // for Span
#include <cstddef> // for size_t
#include <cstdint> // for int32_t, int64_t
#include <vector> // for vector
#include "../../../src/collective/allgather.h" // for RingAllgather
#include "../../../src/common/device_helpers.cuh" // for ToSpan, device_vector
#include "../../../src/common/type.h" // for EraseType
#include "test_worker.cuh" // for NCCLWorkerForTest
#include "test_worker.h" // for TestDistributed, WorkerForTest
namespace xgboost::collective {
namespace {
class Worker : public NCCLWorkerForTest {
public:
using NCCLWorkerForTest::NCCLWorkerForTest;
void TestV(AllgatherVAlgo algo) {
{
// basic test
std::size_t n = 1;
// create data
dh::device_vector<std::int32_t> data(n, comm_.Rank());
auto s_data = common::EraseType(common::Span{data.data().get(), data.size()});
// get size
std::vector<std::int64_t> sizes(comm_.World(), -1);
sizes[comm_.Rank()] = s_data.size_bytes();
auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()}, 1);
ASSERT_TRUE(rc.OK()) << rc.Report();
// create result
dh::device_vector<std::int32_t> result(comm_.World(), -1);
auto s_result = common::EraseType(dh::ToSpan(result));
std::vector<std::int64_t> recv_seg(nccl_comm_->World() + 1, 0);
rc = nccl_coll_->AllgatherV(*nccl_comm_, s_data, common::Span{sizes.data(), sizes.size()},
common::Span{recv_seg.data(), recv_seg.size()}, s_result, algo);
ASSERT_TRUE(rc.OK()) << rc.Report();
for (std::int32_t i = 0; i < comm_.World(); ++i) {
ASSERT_EQ(result[i], i);
}
}
{
// V test
std::size_t n = 256 * 256;
// create data
dh::device_vector<std::int32_t> data(n * nccl_comm_->Rank(), nccl_comm_->Rank());
auto s_data = common::EraseType(common::Span{data.data().get(), data.size()});
// get size
std::vector<std::int64_t> sizes(nccl_comm_->World(), 0);
sizes[comm_.Rank()] = dh::ToSpan(data).size_bytes();
auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()}, 1);
ASSERT_TRUE(rc.OK()) << rc.Report();
auto n_bytes = std::accumulate(sizes.cbegin(), sizes.cend(), 0);
// create result
dh::device_vector<std::int32_t> result(n_bytes / sizeof(std::int32_t), -1);
auto s_result = common::EraseType(dh::ToSpan(result));
std::vector<std::int64_t> recv_seg(nccl_comm_->World() + 1, 0);
rc = nccl_coll_->AllgatherV(*nccl_comm_, s_data, common::Span{sizes.data(), sizes.size()},
common::Span{recv_seg.data(), recv_seg.size()}, s_result, algo);
ASSERT_TRUE(rc.OK()) << rc.Report();
// check segment size
if (algo != AllgatherVAlgo::kBcast) {
auto size = recv_seg[nccl_comm_->Rank() + 1] - recv_seg[nccl_comm_->Rank()];
ASSERT_EQ(size, n * nccl_comm_->Rank() * sizeof(std::int32_t));
ASSERT_EQ(size, sizes[nccl_comm_->Rank()]);
}
// check data
std::size_t k{0};
for (std::int32_t r = 0; r < nccl_comm_->World(); ++r) {
std::size_t s = n * r;
auto current = dh::ToSpan(result).subspan(k, s);
std::vector<std::int32_t> h_data(current.size());
dh::CopyDeviceSpanToVector(&h_data, current);
for (auto v : h_data) {
ASSERT_EQ(v, r);
}
k += s;
}
}
}
};
class AllgatherTestGPU : public SocketTest {};
} // namespace
TEST_F(AllgatherTestGPU, MGPUTestVRing) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker w{host, port, timeout, n_workers, r};
w.Setup();
w.TestV(AllgatherVAlgo::kRing);
w.TestV(AllgatherVAlgo::kBcast);
});
}
TEST_F(AllgatherTestGPU, MGPUTestVBcast) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker w{host, port, timeout, n_workers, r};
w.Setup();
w.TestV(AllgatherVAlgo::kBcast);
});
}
} // namespace xgboost::collective
#endif // defined(XGBOOST_USE_NCCL)

View File

@@ -6,10 +6,10 @@
#include "../../../src/collective/allreduce.h"
#include "../../../src/collective/coll.h" // for Coll
#include "../../../src/collective/tracker.h"
#include "test_worker.h" // for WorkerForTest, TestDistributed
#include "../../../src/common/type.h" // for EraseType
#include "test_worker.h" // for WorkerForTest, TestDistributed
namespace xgboost::collective {
namespace {
class AllreduceWorker : public WorkerForTest {
public:
@@ -50,11 +50,10 @@ class AllreduceWorker : public WorkerForTest {
}
void BitOr() {
Context ctx;
std::vector<std::uint32_t> data(comm_.World(), 0);
data[comm_.Rank()] = ~std::uint32_t{0};
auto pcoll = std::shared_ptr<Coll>{new Coll{}};
auto rc = pcoll->Allreduce(&ctx, comm_, EraseType(common::Span{data.data(), data.size()}),
auto rc = pcoll->Allreduce(comm_, common::EraseType(common::Span{data.data(), data.size()}),
ArrayInterfaceHandler::kU4, Op::kBitwiseOR);
ASSERT_TRUE(rc.OK()) << rc.Report();
for (auto v : data) {

View File

@@ -0,0 +1,70 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#if defined(XGBOOST_USE_NCCL)
#include <gtest/gtest.h>
#include <thrust/host_vector.h> // for host_vector
#include "../../../src/collective/coll.h" // for Coll
#include "../../../src/common/common.h"
#include "../../../src/common/device_helpers.cuh" // for ToSpan, device_vector
#include "../../../src/common/type.h" // for EraseType
#include "../helpers.h" // for MakeCUDACtx
#include "test_worker.cuh" // for NCCLWorkerForTest
#include "test_worker.h" // for WorkerForTest, TestDistributed
namespace xgboost::collective {
namespace {
class AllreduceTestGPU : public SocketTest {};
class Worker : public NCCLWorkerForTest {
public:
using NCCLWorkerForTest::NCCLWorkerForTest;
void BitOr() {
dh::device_vector<std::uint32_t> data(comm_.World(), 0);
data[comm_.Rank()] = ~std::uint32_t{0};
auto rc = nccl_coll_->Allreduce(*nccl_comm_, common::EraseType(dh::ToSpan(data)),
ArrayInterfaceHandler::kU4, Op::kBitwiseOR);
ASSERT_TRUE(rc.OK()) << rc.Report();
thrust::host_vector<std::uint32_t> h_data(data.size());
thrust::copy(data.cbegin(), data.cend(), h_data.begin());
for (auto v : h_data) {
ASSERT_EQ(v, ~std::uint32_t{0});
}
}
void Acc() {
dh::device_vector<double> data(314, 1.5);
auto rc = nccl_coll_->Allreduce(*nccl_comm_, common::EraseType(dh::ToSpan(data)),
ArrayInterfaceHandler::kF8, Op::kSum);
ASSERT_TRUE(rc.OK()) << rc.Report();
for (std::size_t i = 0; i < data.size(); ++i) {
auto v = data[i];
ASSERT_EQ(v, 1.5 * static_cast<double>(comm_.World())) << i;
}
}
};
} // namespace
TEST_F(AllreduceTestGPU, BitOr) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker w{host, port, timeout, n_workers, r};
w.Setup();
w.BitOr();
});
}
TEST_F(AllreduceTestGPU, Sum) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker w{host, port, timeout, n_workers, r};
w.Setup();
w.Acc();
});
}
} // namespace xgboost::collective
#endif // defined(XGBOOST_USE_NCCL)

View File

@@ -47,5 +47,5 @@ TEST_F(BroadcastTest, Basic) {
Worker worker{host, port, timeout, n_workers, r};
worker.Run();
});
}
} // namespace
} // namespace xgboost::collective

View File

@@ -0,0 +1,32 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#pragma once
#include <memory> // for shared_ptr
#include "../../../src/collective/coll.h" // for Coll
#include "../../../src/collective/comm.h" // for Comm
#include "test_worker.h"
#include "xgboost/context.h" // for Context
namespace xgboost::collective {
class NCCLWorkerForTest : public WorkerForTest {
protected:
std::shared_ptr<Coll> coll_;
std::shared_ptr<xgboost::collective::Comm> nccl_comm_;
std::shared_ptr<Coll> nccl_coll_;
Context ctx_;
public:
using WorkerForTest::WorkerForTest;
void Setup() {
ctx_ = MakeCUDACtx(comm_.Rank());
coll_.reset(new Coll{});
nccl_comm_.reset(this->comm_.MakeCUDAVar(&ctx_, coll_));
nccl_coll_.reset(coll_->MakeCUDAVar());
ASSERT_EQ(comm_.World(), nccl_comm_->World());
ASSERT_EQ(comm_.Rank(), nccl_comm_->Rank());
}
};
} // namespace xgboost::collective

View File

@@ -1,6 +1,7 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#pragma once
#include <gtest/gtest.h>
#include <chrono> // for seconds

View File

@@ -97,4 +97,29 @@ TEST(BitField, Clear) {
TestBitFieldClear<RBitField8>(19);
}
}
TEST(BitField, CTZ) {
{
auto cnt = TrailingZeroBits(0);
ASSERT_EQ(cnt, sizeof(std::uint32_t) * 8);
}
{
auto cnt = TrailingZeroBits(0b00011100);
ASSERT_EQ(cnt, 2);
cnt = detail::TrailingZeroBitsImpl(0b00011100);
ASSERT_EQ(cnt, 2);
}
{
auto cnt = TrailingZeroBits(0b00011101);
ASSERT_EQ(cnt, 0);
cnt = detail::TrailingZeroBitsImpl(0b00011101);
ASSERT_EQ(cnt, 0);
}
{
auto cnt = TrailingZeroBits(0b1000000000000000);
ASSERT_EQ(cnt, 15);
cnt = detail::TrailingZeroBitsImpl(0b1000000000000000);
ASSERT_EQ(cnt, 15);
}
}
} // namespace xgboost

View File

@@ -572,4 +572,31 @@ class BaseMGPUTest : public ::testing::Test {
class DeclareUnifiedDistributedTest(MetricTest) : public BaseMGPUTest{};
inline DeviceOrd FstCU() { return DeviceOrd::CUDA(0); }
/**
* @brief poor man's gmock for message matching.
*
* @tparam Error The type of expected execption.
*
* @param submsg A substring of the actual error message.
* @param fn The function that throws Error
*/
template <typename Error, typename Fn>
void ExpectThrow(std::string submsg, Fn&& fn) {
try {
fn();
} catch (Error const& exc) {
auto actual = std::string{exc.what()};
ASSERT_NE(actual.find(submsg), std::string::npos)
<< "Expecting substring `" << submsg << "` from the error message."
<< " Got:\n"
<< actual << "\n";
return;
} catch (std::exception const& exc) {
auto actual = exc.what();
ASSERT_TRUE(false) << "An unexpected type of exception is thrown. what:" << actual;
return;
}
ASSERT_TRUE(false) << "No exception is thrown";
}
} // namespace xgboost

View File

@@ -0,0 +1,84 @@
/**
* Copyright 2022-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <string> // for string
#include <thread> // for thread
#include "../../../../plugin/federated/federated_comm.h"
#include "../../collective/net_test.h" // for SocketTest
#include "../../helpers.h" // for ExpectThrow
#include "test_worker.h" // for TestFederated
#include "xgboost/json.h" // for Json
namespace xgboost::collective {
namespace {
class FederatedCommTest : public SocketTest {};
} // namespace
TEST_F(FederatedCommTest, ThrowOnWorldSizeTooSmall) {
auto construct = [] { FederatedComm comm{"localhost", 0, 0, 0}; };
ExpectThrow<dmlc::Error>("Invalid world size.", construct);
}
TEST_F(FederatedCommTest, ThrowOnRankTooSmall) {
auto construct = [] { FederatedComm comm{"localhost", 0, 1, -1}; };
ExpectThrow<dmlc::Error>("Invalid worker rank.", construct);
}
TEST_F(FederatedCommTest, ThrowOnRankTooBig) {
auto construct = [] { FederatedComm comm{"localhost", 0, 1, 1}; };
ExpectThrow<dmlc::Error>("Invalid worker rank.", construct);
}
TEST_F(FederatedCommTest, ThrowOnWorldSizeNotInteger) {
auto construct = [] {
Json config{Object{}};
config["federated_server_address"] = std::string("localhost:0");
config["federated_world_size"] = std::string("1");
config["federated_rank"] = Integer(0);
FederatedComm comm(config);
};
ExpectThrow<dmlc::Error>("got: `String`", construct);
}
TEST_F(FederatedCommTest, ThrowOnRankNotInteger) {
auto construct = [] {
Json config{Object{}};
config["federated_server_address"] = std::string("localhost:0");
config["federated_world_size"] = 1;
config["federated_rank"] = std::string("0");
FederatedComm comm(config);
};
ExpectThrow<dmlc::Error>("got: `String`", construct);
}
TEST_F(FederatedCommTest, GetWorldSizeAndRank) {
Json config{Object{}};
config["federated_world_size"] = 6;
config["federated_rank"] = 3;
config["federated_server_address"] = String{"localhost:0"};
FederatedComm comm{config};
EXPECT_EQ(comm.World(), 6);
EXPECT_EQ(comm.Rank(), 3);
}
TEST_F(FederatedCommTest, IsDistributed) {
FederatedComm comm{"localhost", 0, 2, 1};
EXPECT_TRUE(comm.IsDistributed());
}
TEST_F(FederatedCommTest, InsecureTracker) {
std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 3u);
TestFederated(n_workers, [=](std::int32_t port, std::int32_t rank) {
Json config{Object{}};
config["federated_world_size"] = n_workers;
config["federated_rank"] = rank;
config["federated_server_address"] = "0.0.0.0:" + std::to_string(port);
FederatedComm comm{config};
ASSERT_EQ(comm.Rank(), rank);
ASSERT_EQ(comm.World(), n_workers);
});
}
} // namespace xgboost::collective

View File

@@ -0,0 +1,42 @@
/**
* Copyright 2022-2023, XGBoost contributors
*/
#pragma once
#include <gtest/gtest.h>
#include <chrono> // for ms
#include <thread> // for thread
#include "../../../../plugin/federated/federated_tracker.h"
#include "xgboost/json.h" // for Json
namespace xgboost::collective {
template <typename WorkerFn>
void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
Json config{Object()};
config["federated_secure"] = Boolean{false};
config["n_workers"] = Integer{n_workers};
FederatedTracker tracker{config};
auto fut = tracker.Run();
std::vector<std::thread> workers;
using namespace std::chrono_literals;
while (tracker.Port() == 0) {
std::this_thread::sleep_for(100ms);
}
std::int32_t port = tracker.Port();
for (std::int32_t i = 0; i < n_workers; ++i) {
workers.emplace_back([=] { fn(port, i); });
}
for (auto& t : workers) {
t.join();
}
auto rc = tracker.Shutdown();
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_TRUE(fut.get().OK());
}
} // namespace xgboost::collective

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2022-2023 XGBoost contributors
/**
* Copyright 2022-2023, XGBoost contributors
*/
#pragma once
@@ -26,7 +26,7 @@ class ServerForTest {
explicit ServerForTest(std::size_t world_size) {
server_thread_.reset(new std::thread([this, world_size] {
grpc::ServerBuilder builder;
xgboost::federated::FederatedService service{world_size};
xgboost::federated::FederatedService service{static_cast<std::int32_t>(world_size)};
int selected_port;
builder.AddListeningPort("localhost:0", grpc::InsecureServerCredentials(), &selected_port);
builder.RegisterService(&service);