Common interface for collective communication (#8057)
* implement broadcast for federated communicator * implement allreduce * add communicator factory * add device adapter * add device communicator to factory * add rabit communicator * add rabit communicator to the factory * add nccl device communicator * add synchronize to device communicator * add back print and getprocessorname * add python wrapper and c api * clean up types * fix non-gpu build * try to fix ci * fix std::size_t * portable string compare ignore case * c style size_t * fix lint errors * cross platform setenv * fix memory leak * fix lint errors * address review feedback * add python test for rabit communicator * fix failing gtest * use json to configure communicators * fix lint error * get rid of factories * fix cpu build * fix include * fix python import * don't export collective.py yet * skip collective communicator pytest on windows * add review feedback * update documentation * remove mpi communicator type * fix tests * shutdown the communicator separately Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
54
tests/cpp/collective/test_communicator.cc
Normal file
54
tests/cpp/collective/test_communicator.cc
Normal file
@@ -0,0 +1,54 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/parameter.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/collective/communicator.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
TEST(CommunicatorFactory, TypeFromEnv) {
|
||||
EXPECT_EQ(CommunicatorType::kUnknown, Communicator::GetTypeFromEnv());
|
||||
|
||||
dmlc::SetEnv<std::string>("XGBOOST_COMMUNICATOR", "rabit");
|
||||
EXPECT_EQ(CommunicatorType::kRabit, Communicator::GetTypeFromEnv());
|
||||
|
||||
dmlc::SetEnv<std::string>("XGBOOST_COMMUNICATOR", "Federated");
|
||||
EXPECT_EQ(CommunicatorType::kFederated, Communicator::GetTypeFromEnv());
|
||||
|
||||
dmlc::SetEnv<std::string>("XGBOOST_COMMUNICATOR", "foo");
|
||||
EXPECT_THROW(Communicator::GetTypeFromEnv(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(CommunicatorFactory, TypeFromArgs) {
|
||||
Json config{JsonObject()};
|
||||
EXPECT_EQ(CommunicatorType::kUnknown, Communicator::GetTypeFromConfig(config));
|
||||
|
||||
config["xgboost_communicator"] = String("rabit");
|
||||
EXPECT_EQ(CommunicatorType::kRabit, Communicator::GetTypeFromConfig(config));
|
||||
|
||||
config["xgboost_communicator"] = String("federated");
|
||||
EXPECT_EQ(CommunicatorType::kFederated, Communicator::GetTypeFromConfig(config));
|
||||
|
||||
config["xgboost_communicator"] = String("foo");
|
||||
EXPECT_THROW(Communicator::GetTypeFromConfig(config), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(CommunicatorFactory, TypeFromArgsUpperCase) {
|
||||
Json config{JsonObject()};
|
||||
EXPECT_EQ(CommunicatorType::kUnknown, Communicator::GetTypeFromConfig(config));
|
||||
|
||||
config["XGBOOST_COMMUNICATOR"] = String("rabit");
|
||||
EXPECT_EQ(CommunicatorType::kRabit, Communicator::GetTypeFromConfig(config));
|
||||
|
||||
config["XGBOOST_COMMUNICATOR"] = String("federated");
|
||||
EXPECT_EQ(CommunicatorType::kFederated, Communicator::GetTypeFromConfig(config));
|
||||
|
||||
config["XGBOOST_COMMUNICATOR"] = String("foo");
|
||||
EXPECT_THROW(Communicator::GetTypeFromConfig(config), dmlc::Error);
|
||||
}
|
||||
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
26
tests/cpp/collective/test_nccl_device_communicator.cu
Normal file
26
tests/cpp/collective/test_nccl_device_communicator.cu
Normal file
@@ -0,0 +1,26 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
*/
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/collective/nccl_device_communicator.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidDeviceOrdinal) {
|
||||
auto construct = []() { NcclDeviceCommunicator comm{-1, nullptr}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidCommunicator) {
|
||||
auto construct = []() { NcclDeviceCommunicator comm{0, nullptr}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
|
||||
#endif
|
||||
39
tests/cpp/collective/test_rabit_communicator.cc
Normal file
39
tests/cpp/collective/test_rabit_communicator.cc
Normal file
@@ -0,0 +1,39 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/collective/rabit_communicator.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
TEST(RabitCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) {
|
||||
auto construct = []() { RabitCommunicator comm{0, 0}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(RabitCommunicatorSimpleTest, ThrowOnRankTooSmall) {
|
||||
auto construct = []() { RabitCommunicator comm{1, -1}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(RabitCommunicatorSimpleTest, ThrowOnRankTooBig) {
|
||||
auto construct = []() { RabitCommunicator comm{1, 1}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(RabitCommunicatorSimpleTest, GetWorldSizeAndRank) {
|
||||
RabitCommunicator comm{6, 3};
|
||||
EXPECT_EQ(comm.GetWorldSize(), 6);
|
||||
EXPECT_EQ(comm.GetRank(), 3);
|
||||
}
|
||||
|
||||
TEST(RabitCommunicatorSimpleTest, IsNotDistributed) {
|
||||
RabitCommunicator comm{2, 1};
|
||||
// Rabit is only distributed with a tracker.
|
||||
EXPECT_FALSE(comm.IsDistributed());
|
||||
}
|
||||
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
Reference in New Issue
Block a user