temp merge, disable 1 line, SetValid

This commit is contained in:
Your Name
2023-10-12 16:16:44 -07:00
492 changed files with 15533 additions and 9376 deletions

View File

@@ -37,7 +37,14 @@ class ServerForTest {
}
~ServerForTest() {
using namespace std::chrono_literals;
while (!server_) {
std::this_thread::sleep_for(100ms);
}
server_->Shutdown();
while (!server_thread_) {
std::this_thread::sleep_for(100ms);
}
server_thread_->join();
}
@@ -56,7 +63,7 @@ class BaseFederatedTest : public ::testing::Test {
void TearDown() override { server_.reset(nullptr); }
static int constexpr kWorldSize{3};
static int constexpr kWorldSize{2};
std::unique_ptr<ServerForTest> server_;
};

View File

@@ -9,7 +9,9 @@
#include <thread>
#include "../../../plugin/federated/federated_communicator.h"
#include "../../../src/collective/communicator-inl.cuh"
#include "../../../src/collective/device_communicator_adapter.cuh"
#include "../helpers.h"
#include "./helpers.h"
namespace xgboost::collective {
@@ -17,67 +19,80 @@ namespace xgboost::collective {
class FederatedAdapterTest : public BaseFederatedTest {};
TEST(FederatedAdapterSimpleTest, ThrowOnInvalidDeviceOrdinal) {
auto construct = []() { DeviceCommunicatorAdapter adapter{-1, nullptr}; };
auto construct = []() { DeviceCommunicatorAdapter adapter{-1}; };
EXPECT_THROW(construct(), dmlc::Error);
}
TEST(FederatedAdapterSimpleTest, ThrowOnInvalidCommunicator) {
auto construct = []() { DeviceCommunicatorAdapter adapter{0, nullptr}; };
EXPECT_THROW(construct(), dmlc::Error);
}
TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back([rank, server_address = server_->Address()] {
FederatedCommunicator comm{kWorldSize, rank, server_address};
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
DeviceCommunicatorAdapter adapter{0, &comm};
int count = 3;
thrust::device_vector<double> buffer(count, 0);
thrust::sequence(buffer.begin(), buffer.end());
adapter.AllReduce(buffer.data().get(), count, DataType::kDouble, Operation::kSum);
thrust::host_vector<double> host_buffer = buffer;
EXPECT_EQ(host_buffer.size(), count);
for (auto i = 0; i < count; i++) {
EXPECT_EQ(host_buffer[i], i * kWorldSize);
}
});
}
for (auto& thread : threads) {
thread.join();
namespace {
void VerifyAllReduceSum() {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto const device = GPUIDX;
int count = 3;
common::SetDevice(device);
thrust::device_vector<double> buffer(count, 0);
thrust::sequence(buffer.begin(), buffer.end());
collective::AllReduce<collective::Operation::kSum>(device, buffer.data().get(), count);
thrust::host_vector<double> host_buffer = buffer;
EXPECT_EQ(host_buffer.size(), count);
for (auto i = 0; i < count; i++) {
EXPECT_EQ(host_buffer[i], i * world_size);
}
}
} // anonymous namespace
TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back([rank, server_address = server_->Address()] {
FederatedCommunicator comm{kWorldSize, rank, server_address};
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
DeviceCommunicatorAdapter adapter{0, &comm};
int const count = rank + 2;
thrust::device_vector<char> buffer(count, 0);
thrust::sequence(buffer.begin(), buffer.end());
std::vector<std::size_t> segments(kWorldSize);
dh::caching_device_vector<char> receive_buffer{};
adapter.AllGatherV(buffer.data().get(), count, &segments, &receive_buffer);
EXPECT_EQ(segments[0], 2);
EXPECT_EQ(segments[1], 3);
thrust::host_vector<char> host_buffer = receive_buffer;
EXPECT_EQ(host_buffer.size(), 9);
int expected[] = {0, 1, 0, 1, 2, 0, 1, 2, 3};
for (auto i = 0; i < 9; i++) {
EXPECT_EQ(host_buffer[i], expected[i]);
}
});
}
for (auto& thread : threads) {
thread.join();
}
TEST_F(FederatedAdapterTest, MGPUAllReduceSum) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAllReduceSum);
}
namespace {
void VerifyAllGather() {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto const device = GPUIDX;
common::SetDevice(device);
thrust::device_vector<double> send_buffer(1, rank);
thrust::device_vector<double> receive_buffer(world_size, 0);
collective::AllGather(device, send_buffer.data().get(), receive_buffer.data().get(),
sizeof(double));
thrust::host_vector<double> host_buffer = receive_buffer;
EXPECT_EQ(host_buffer.size(), world_size);
for (auto i = 0; i < world_size; i++) {
EXPECT_EQ(host_buffer[i], i);
}
}
} // anonymous namespace
TEST_F(FederatedAdapterTest, MGPUAllGather) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAllGather);
}
namespace {
void VerifyAllGatherV() {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto const device = GPUIDX;
int const count = rank + 2;
common::SetDevice(device);
thrust::device_vector<char> buffer(count, 0);
thrust::sequence(buffer.begin(), buffer.end());
std::vector<std::size_t> segments(world_size);
dh::caching_device_vector<char> receive_buffer{};
collective::AllGatherV(device, buffer.data().get(), count, &segments, &receive_buffer);
EXPECT_EQ(segments[0], 2);
EXPECT_EQ(segments[1], 3);
thrust::host_vector<char> host_buffer = receive_buffer;
EXPECT_EQ(host_buffer.size(), 5);
int expected[] = {0, 1, 0, 1, 2};
for (auto i = 0; i < 5; i++) {
EXPECT_EQ(host_buffer[i], expected[i]);
}
}
} // anonymous namespace
TEST_F(FederatedAdapterTest, MGPUAllGatherV) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAllGatherV);
}
} // namespace xgboost::collective

View File

@@ -31,7 +31,7 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
protected:
static void CheckAllgather(FederatedCommunicator &comm, int rank) {
int buffer[kWorldSize] = {0, 0, 0};
int buffer[kWorldSize] = {0, 0};
buffer[rank] = rank;
comm.AllGather(buffer, sizeof(buffer));
for (auto i = 0; i < kWorldSize; i++) {
@@ -42,7 +42,7 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
static void CheckAllreduce(FederatedCommunicator &comm) {
int buffer[] = {1, 2, 3, 4, 5};
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kSum);
int expected[] = {3, 6, 9, 12, 15};
int expected[] = {2, 4, 6, 8, 10};
for (auto i = 0; i < 5; i++) {
EXPECT_EQ(buffer[i], expected[i]);
}

View File

@@ -30,7 +30,7 @@ void VerifyLoadUri() {
std::string uri = path + "?format=csv";
dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol));
ASSERT_EQ(dmat->Info().num_col_, 8 * collective::GetWorldSize() + 3);
ASSERT_EQ(dmat->Info().num_col_, 8 * collective::GetWorldSize() + 1);
ASSERT_EQ(dmat->Info().num_row_, kRows);
for (auto const& page : dmat->GetBatches<SparsePage>()) {

View File

@@ -15,9 +15,11 @@
namespace xgboost {
namespace {
auto MakeModel(std::string tree_method, std::string objective, std::shared_ptr<DMatrix> dmat) {
auto MakeModel(std::string tree_method, std::string device, std::string objective,
std::shared_ptr<DMatrix> dmat) {
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
learner->SetParam("tree_method", tree_method);
learner->SetParam("device", device);
learner->SetParam("objective", objective);
if (objective.find("quantile") != std::string::npos) {
learner->SetParam("quantile_alpha", "0.5");
@@ -35,7 +37,7 @@ auto MakeModel(std::string tree_method, std::string objective, std::shared_ptr<D
}
void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json expected_model,
std::string tree_method, std::string objective) {
std::string tree_method, std::string device, std::string objective) {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
std::shared_ptr<DMatrix> dmat{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)};
@@ -61,14 +63,14 @@ void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json e
}
std::shared_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
auto model = MakeModel(tree_method, objective, sliced);
auto model = MakeModel(tree_method, device, objective, sliced);
auto base_score = GetBaseScore(model);
ASSERT_EQ(base_score, expected_base_score);
ASSERT_EQ(model, expected_model);
ASSERT_EQ(base_score, expected_base_score) << " rank " << rank;
ASSERT_EQ(model, expected_model) << " rank " << rank;
}
} // namespace
class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
class VerticalFederatedLearnerTest : public ::testing::TestWithParam<std::string> {
std::unique_ptr<ServerForTest> server_;
static int constexpr kWorldSize{3};
@@ -76,7 +78,7 @@ class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
void SetUp() override { server_ = std::make_unique<ServerForTest>(kWorldSize); }
void TearDown() override { server_.reset(nullptr); }
void Run(std::string tree_method, std::string objective) {
void Run(std::string tree_method, std::string device, std::string objective) {
static auto constexpr kRows{16};
static auto constexpr kCols{16};
@@ -99,27 +101,35 @@ class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
}
}
auto model = MakeModel(tree_method, objective, dmat);
auto model = MakeModel(tree_method, device, objective, dmat);
auto score = GetBaseScore(model);
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyObjective, kRows, kCols,
score, model, tree_method, objective);
score, model, tree_method, device, objective);
}
};
TEST_P(FederatedLearnerTest, Approx) {
TEST_P(VerticalFederatedLearnerTest, Approx) {
std::string objective = GetParam();
this->Run("approx", objective);
this->Run("approx", "cpu", objective);
}
TEST_P(FederatedLearnerTest, Hist) {
TEST_P(VerticalFederatedLearnerTest, Hist) {
std::string objective = GetParam();
this->Run("hist", objective);
this->Run("hist", "cpu", objective);
}
INSTANTIATE_TEST_SUITE_P(FederatedLearnerObjective, FederatedLearnerTest,
::testing::ValuesIn(MakeObjNamesForTest()),
[](const ::testing::TestParamInfo<FederatedLearnerTest::ParamType> &info) {
return ObjTestNameGenerator(info);
});
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
TEST_P(VerticalFederatedLearnerTest, GPUHist) {
std::string objective = GetParam();
this->Run("hist", "cuda:0", objective);
}
#endif // defined(XGBOOST_USE_CUDA)
INSTANTIATE_TEST_SUITE_P(
FederatedLearnerObjective, VerticalFederatedLearnerTest,
::testing::ValuesIn(MakeObjNamesForTest()),
[](const ::testing::TestParamInfo<VerticalFederatedLearnerTest::ParamType> &info) {
return ObjTestNameGenerator(info);
});
} // namespace xgboost

View File

@@ -39,7 +39,7 @@ class FederatedServerTest : public BaseFederatedTest {
protected:
static void CheckAllgather(federated::FederatedClient& client, int rank) {
int data[kWorldSize] = {0, 0, 0};
int data[kWorldSize] = {0, 0};
data[rank] = rank;
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
auto reply = client.Allgather(send_buffer);
@@ -54,7 +54,7 @@ class FederatedServerTest : public BaseFederatedTest {
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
auto reply = client.Allreduce(send_buffer, federated::INT32, federated::SUM);
auto const* result = reinterpret_cast<int const*>(reply.data());
int expected[] = {3, 6, 9, 12, 15};
int expected[] = {2, 4, 6, 8, 10};
for (auto i = 0; i < 5; i++) {
EXPECT_EQ(result[i], expected[i]);
}

View File

@@ -148,7 +148,7 @@ TEST(Plugin, CPUvsOneAPI) {
{
// CPU
ctx.gpu_id = -1;
ctx = ctx.MakeCPU();
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
}
{