[CI] Test federated learning plugin in the CI (#8325)
This commit is contained in:
parent
97a5b088a5
commit
2faa744aba
@ -4,6 +4,7 @@ include(cmake/Utils.cmake)
|
|||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
cmake_policy(SET CMP0022 NEW)
|
||||||
cmake_policy(SET CMP0079 NEW)
|
cmake_policy(SET CMP0079 NEW)
|
||||||
|
cmake_policy(SET CMP0076 NEW)
|
||||||
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
||||||
cmake_policy(SET CMP0063 NEW)
|
cmake_policy(SET CMP0063 NEW)
|
||||||
|
|
||||||
@ -117,6 +118,20 @@ endif (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
|
|||||||
if (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
|
if (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
|
||||||
message(SEND_ERROR "Cannot build with RMM using cub submodule.")
|
message(SEND_ERROR "Cannot build with RMM using cub submodule.")
|
||||||
endif (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
|
endif (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
|
||||||
|
if (PLUGIN_FEDERATED)
|
||||||
|
if (CMAKE_CROSSCOMPILING)
|
||||||
|
message(SEND_ERROR "Cannot cross compile with federated learning support")
|
||||||
|
endif ()
|
||||||
|
if (BUILD_STATIC_LIB)
|
||||||
|
message(SEND_ERROR "Cannot build static lib with federated learning support")
|
||||||
|
endif ()
|
||||||
|
if (R_LIB OR JVM_BINDINGS)
|
||||||
|
message(SEND_ERROR "Cannot enable federated learning support when R or JVM packages are enabled.")
|
||||||
|
endif ()
|
||||||
|
if (WIN32)
|
||||||
|
message(SEND_ERROR "Federated learning not supported for Windows platform")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
#-- Sanitizer
|
#-- Sanitizer
|
||||||
if (USE_SANITIZER)
|
if (USE_SANITIZER)
|
||||||
|
|||||||
@ -1,7 +1,9 @@
|
|||||||
# gRPC needs to be installed first. See README.md.
|
# gRPC needs to be installed first. See README.md.
|
||||||
|
set(protobuf_MODULE_COMPATIBLE TRUE)
|
||||||
|
set(protobuf_BUILD_SHARED_LIBS TRUE)
|
||||||
find_package(Protobuf CONFIG REQUIRED)
|
find_package(Protobuf CONFIG REQUIRED)
|
||||||
find_package(gRPC CONFIG REQUIRED)
|
find_package(gRPC CONFIG REQUIRED)
|
||||||
find_package(Threads)
|
message(STATUS "Found gRPC: ${gRPC_CONFIG}")
|
||||||
|
|
||||||
# Generated code from the protobuf definition.
|
# Generated code from the protobuf definition.
|
||||||
add_library(federated_proto federated.proto)
|
add_library(federated_proto federated.proto)
|
||||||
@ -9,13 +11,16 @@ target_link_libraries(federated_proto PUBLIC protobuf::libprotobuf gRPC::grpc gR
|
|||||||
target_include_directories(federated_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
target_include_directories(federated_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
xgboost_target_properties(federated_proto)
|
xgboost_target_properties(federated_proto)
|
||||||
|
|
||||||
get_target_property(grpc_cpp_plugin_location gRPC::grpc_cpp_plugin LOCATION)
|
protobuf_generate(
|
||||||
protobuf_generate(TARGET federated_proto LANGUAGE cpp)
|
TARGET federated_proto
|
||||||
|
LANGUAGE cpp
|
||||||
|
PROTOC_OUT_DIR "${PROTO_BINARY_DIR}")
|
||||||
protobuf_generate(
|
protobuf_generate(
|
||||||
TARGET federated_proto
|
TARGET federated_proto
|
||||||
LANGUAGE grpc
|
LANGUAGE grpc
|
||||||
GENERATE_EXTENSIONS .grpc.pb.h .grpc.pb.cc
|
GENERATE_EXTENSIONS .grpc.pb.h .grpc.pb.cc
|
||||||
PLUGIN "protoc-gen-grpc=${grpc_cpp_plugin_location}")
|
PLUGIN "protoc-gen-grpc=\$<TARGET_FILE:gRPC::grpc_cpp_plugin>"
|
||||||
|
PROTOC_OUT_DIR "${PROTO_BINARY_DIR}")
|
||||||
|
|
||||||
# Wrapper for the gRPC client.
|
# Wrapper for the gRPC client.
|
||||||
add_library(federated_client INTERFACE)
|
add_library(federated_client INTERFACE)
|
||||||
|
|||||||
@ -5,14 +5,7 @@ This folder contains the plugin for federated learning. Follow these steps to bu
|
|||||||
|
|
||||||
Install gRPC
|
Install gRPC
|
||||||
------------
|
------------
|
||||||
```shell
|
Refer to the [installation guide from the gRPC website](https://grpc.io/docs/languages/cpp/quickstart/).
|
||||||
sudo apt-get install build-essential autoconf libtool pkg-config cmake ninja-build
|
|
||||||
git clone -b v1.47.0 https://github.com/grpc/grpc
|
|
||||||
cd grpc
|
|
||||||
git submodule update --init
|
|
||||||
cmake -S . -B build -GNinja -DABSL_PROPAGATE_CXX_STD=ON
|
|
||||||
cmake --build build --target install
|
|
||||||
```
|
|
||||||
|
|
||||||
Build the Plugin
|
Build the Plugin
|
||||||
----------------
|
----------------
|
||||||
@ -20,16 +13,16 @@ Build the Plugin
|
|||||||
# Under xgboost source tree.
|
# Under xgboost source tree.
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
# For now NCCL needs to be turned off.
|
cmake .. -GNinja \
|
||||||
cmake .. -GNinja\
|
-DPLUGIN_FEDERATED=ON \
|
||||||
-DPLUGIN_FEDERATED=ON\
|
-DBUILD_WITH_CUDA_CUB=ON \
|
||||||
-DUSE_CUDA=ON\
|
-DUSE_CUDA=ON\
|
||||||
-DBUILD_WITH_CUDA_CUB=ON\
|
-DUSE_NCCL=ON
|
||||||
-DUSE_NCCL=OFF
|
|
||||||
ninja
|
ninja
|
||||||
cd ../python-package
|
cd ../python-package
|
||||||
pip install -e . # or equivalently python setup.py develop
|
pip install -e . # or equivalently python setup.py develop
|
||||||
```
|
```
|
||||||
|
If CMake fails to locate gRPC, you may need to pass `-DCMAKE_PREFIX_PATH=<grpc path>` to CMake.
|
||||||
|
|
||||||
Test Federated XGBoost
|
Test Federated XGBoost
|
||||||
----------------------
|
----------------------
|
||||||
|
|||||||
@ -6,7 +6,7 @@ set -x
|
|||||||
if [ "$#" -lt 1 ]
|
if [ "$#" -lt 1 ]
|
||||||
then
|
then
|
||||||
echo "Usage: $0 [container to build]"
|
echo "Usage: $0 [container to build]"
|
||||||
return 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
container=$1
|
container=$1
|
||||||
|
|
||||||
@ -17,18 +17,21 @@ echo "--- Build container ${container}"
|
|||||||
BUILD_ARGS=""
|
BUILD_ARGS=""
|
||||||
|
|
||||||
case "${container}" in
|
case "${container}" in
|
||||||
|
cpu)
|
||||||
|
;;
|
||||||
|
|
||||||
gpu|rmm)
|
gpu|rmm)
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
jvm_gpu_build)
|
gpu_build_centos7|jvm_gpu_build)
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
*)
|
*)
|
||||||
echo "Unrecognized container ID: ${container}"
|
echo "Unrecognized container ID: ${container}"
|
||||||
return 2
|
exit 2
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
|||||||
@ -14,7 +14,8 @@ $command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h
|
|||||||
# the configured header build/dmlc/build_config.h instead of
|
# the configured header build/dmlc/build_config.h instead of
|
||||||
# include/dmlc/build_config_default.h.
|
# include/dmlc/build_config_default.h.
|
||||||
echo "--- Build libxgboost from the source"
|
echo "--- Build libxgboost from the source"
|
||||||
$command_wrapper tests/ci_build/build_via_cmake.sh -DPLUGIN_DENSE_PARSER=ON
|
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc \
|
||||||
|
-DPLUGIN_DENSE_PARSER=ON -DPLUGIN_FEDERATED=ON
|
||||||
echo "--- Run Google Test"
|
echo "--- Run Google Test"
|
||||||
$command_wrapper bash -c "cd build && ctest --extra-verbose"
|
$command_wrapper bash -c "cd build && ctest --extra-verbose"
|
||||||
echo "--- Stash XGBoost CLI executable"
|
echo "--- Stash XGBoost CLI executable"
|
||||||
|
|||||||
@ -20,10 +20,10 @@ command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg
|
|||||||
|
|
||||||
echo "--- Build libxgboost from the source"
|
echo "--- Build libxgboost from the source"
|
||||||
$command_wrapper tests/ci_build/prune_libnccl.sh
|
$command_wrapper tests/ci_build/prune_libnccl.sh
|
||||||
$command_wrapper tests/ci_build/build_via_cmake.sh -DUSE_CUDA=ON -DUSE_NCCL=ON \
|
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc \
|
||||||
-DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DUSE_NCCL_LIB_PATH=ON \
|
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
|
||||||
-DNCCL_INCLUDE_DIR=/usr/include -DNCCL_LIBRARY=/workspace/libnccl_static.a \
|
-DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
|
||||||
${arch_flag}
|
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
|
||||||
echo "--- Build binary wheel"
|
echo "--- Build binary wheel"
|
||||||
$command_wrapper bash -c \
|
$command_wrapper bash -c \
|
||||||
"cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
|
"cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
|
||||||
|
|||||||
@ -17,6 +17,7 @@ steps:
|
|||||||
- label: ":docker: Build containers"
|
- label: ":docker: Build containers"
|
||||||
commands:
|
commands:
|
||||||
- "tests/buildkite/build-containers.sh gpu"
|
- "tests/buildkite/build-containers.sh gpu"
|
||||||
|
- "tests/buildkite/build-containers.sh gpu_build_centos7"
|
||||||
- "tests/buildkite/build-containers.sh jvm_gpu_build"
|
- "tests/buildkite/build-containers.sh jvm_gpu_build"
|
||||||
key: build-containers
|
key: build-containers
|
||||||
agents:
|
agents:
|
||||||
|
|||||||
@ -13,7 +13,9 @@ steps:
|
|||||||
#### -------- CONTAINER BUILD --------
|
#### -------- CONTAINER BUILD --------
|
||||||
- label: ":docker: Build containers"
|
- label: ":docker: Build containers"
|
||||||
commands:
|
commands:
|
||||||
|
- "tests/buildkite/build-containers.sh cpu"
|
||||||
- "tests/buildkite/build-containers.sh gpu"
|
- "tests/buildkite/build-containers.sh gpu"
|
||||||
|
- "tests/buildkite/build-containers.sh gpu_build_centos7"
|
||||||
- "tests/buildkite/build-containers.sh rmm"
|
- "tests/buildkite/build-containers.sh rmm"
|
||||||
key: build-containers
|
key: build-containers
|
||||||
agents:
|
agents:
|
||||||
|
|||||||
@ -26,6 +26,15 @@ ENV CPP=cpp-8
|
|||||||
ENV GOSU_VERSION 1.10
|
ENV GOSU_VERSION 1.10
|
||||||
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
||||||
|
|
||||||
|
# Install gRPC
|
||||||
|
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
|
||||||
|
--recurse-submodules --depth 1 --shallow-submodules && \
|
||||||
|
pushd grpc && \
|
||||||
|
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc && \
|
||||||
|
cmake --build build --target install && \
|
||||||
|
popd && \
|
||||||
|
rm -rf grpc
|
||||||
|
|
||||||
# Create new Conda environment
|
# Create new Conda environment
|
||||||
COPY conda_env/cpu_test.yml /scripts/
|
COPY conda_env/cpu_test.yml /scripts/
|
||||||
RUN mamba env create -n cpu_test --file=/scripts/cpu_test.yml
|
RUN mamba env create -n cpu_test --file=/scripts/cpu_test.yml
|
||||||
|
|||||||
@ -1,49 +0,0 @@
|
|||||||
ARG CUDA_VERSION_ARG
|
|
||||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu16.04
|
|
||||||
ARG CUDA_VERSION_ARG
|
|
||||||
|
|
||||||
# Environment
|
|
||||||
ENV DEBIAN_FRONTEND noninteractive
|
|
||||||
SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
|
||||||
|
|
||||||
# Install all basic requirements
|
|
||||||
RUN \
|
|
||||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/3bf863cc.pub && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y software-properties-common && \
|
|
||||||
add-apt-repository ppa:ubuntu-toolchain-r/test && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y tar unzip wget bzip2 libgomp1 git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 && \
|
|
||||||
# CMake
|
|
||||||
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
|
|
||||||
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
|
||||||
# Python
|
|
||||||
wget -nv -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
|
||||||
bash Miniconda3.sh -b -p /opt/python
|
|
||||||
|
|
||||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
|
||||||
RUN \
|
|
||||||
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
|
||||||
export NCCL_VERSION=2.13.4-1 && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
|
|
||||||
|
|
||||||
ENV PATH=/opt/python/bin:$PATH
|
|
||||||
ENV CC=gcc-8
|
|
||||||
ENV CXX=g++-8
|
|
||||||
ENV CPP=cpp-8
|
|
||||||
|
|
||||||
ENV GOSU_VERSION 1.10
|
|
||||||
|
|
||||||
# Install lightweight sudo (not bound to TTY)
|
|
||||||
RUN set -ex; \
|
|
||||||
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
|
||||||
chmod +x /usr/local/bin/gosu && \
|
|
||||||
gosu nobody true
|
|
||||||
|
|
||||||
# Default entry-point to use if running locally
|
|
||||||
# It will preserve attributes of created files
|
|
||||||
COPY entrypoint.sh /scripts/
|
|
||||||
|
|
||||||
WORKDIR /workspace
|
|
||||||
ENTRYPOINT ["/scripts/entrypoint.sh"]
|
|
||||||
@ -35,6 +35,15 @@ ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp
|
|||||||
|
|
||||||
ENV GOSU_VERSION 1.10
|
ENV GOSU_VERSION 1.10
|
||||||
|
|
||||||
|
# Install gRPC
|
||||||
|
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
|
||||||
|
--recurse-submodules --depth 1 && \
|
||||||
|
pushd grpc && \
|
||||||
|
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc && \
|
||||||
|
cmake --build build --target install && \
|
||||||
|
popd && \
|
||||||
|
rm -rf grpc
|
||||||
|
|
||||||
# Install lightweight sudo (not bound to TTY)
|
# Install lightweight sudo (not bound to TTY)
|
||||||
RUN set -ex; \
|
RUN set -ex; \
|
||||||
wget -nv -nc -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
wget -nv -nc -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
||||||
|
|||||||
19
tests/cpp/plugin/helpers.cc
Normal file
19
tests/cpp/plugin/helpers.cc
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#include <chrono>
|
||||||
|
#include <thread>
|
||||||
|
#include <random>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
#include "helpers.h"
|
||||||
|
|
||||||
|
using namespace std::chrono_literals;
|
||||||
|
|
||||||
|
int GenerateRandomPort(int low, int high) {
|
||||||
|
// Ensure unique timestamp by introducing a small artificial delay
|
||||||
|
std::this_thread::sleep_for(100ms);
|
||||||
|
auto timestamp = static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||||
|
std::chrono::system_clock::now().time_since_epoch()).count());
|
||||||
|
std::mt19937_64 rng(timestamp);
|
||||||
|
std::uniform_int_distribution<int> dist(low, high);
|
||||||
|
int port = dist(rng);
|
||||||
|
return port;
|
||||||
|
}
|
||||||
10
tests/cpp/plugin/helpers.h
Normal file
10
tests/cpp/plugin/helpers.h
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
/*!
|
||||||
|
* Copyright 2022 XGBoost contributors
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
|
||||||
|
#define XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
|
||||||
|
|
||||||
|
int GenerateRandomPort(int low, int high);
|
||||||
|
|
||||||
|
#endif // XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
|
||||||
@ -5,24 +5,36 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <thrust/host_vector.h>
|
#include <thrust/host_vector.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
|
#include "./helpers.h"
|
||||||
#include "../../../plugin/federated/federated_communicator.h"
|
#include "../../../plugin/federated/federated_communicator.h"
|
||||||
#include "../../../plugin/federated/federated_server.h"
|
#include "../../../plugin/federated/federated_server.h"
|
||||||
#include "../../../src/collective/device_communicator_adapter.cuh"
|
#include "../../../src/collective/device_communicator_adapter.cuh"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
std::string GetServerAddress() {
|
||||||
|
int port = GenerateRandomPort(50000, 60000);
|
||||||
|
std::string address = std::string("localhost:") + std::to_string(port);
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace collective {
|
namespace collective {
|
||||||
|
|
||||||
std::string const kServerAddress{"localhost:56789"}; // NOLINT(cert-err58-cpp)
|
|
||||||
|
|
||||||
class FederatedAdapterTest : public ::testing::Test {
|
class FederatedAdapterTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
|
server_address_ = GetServerAddress();
|
||||||
server_thread_.reset(new std::thread([this] {
|
server_thread_.reset(new std::thread([this] {
|
||||||
grpc::ServerBuilder builder;
|
grpc::ServerBuilder builder;
|
||||||
federated::FederatedService service{kWorldSize};
|
federated::FederatedService service{kWorldSize};
|
||||||
builder.AddListeningPort(kServerAddress, grpc::InsecureServerCredentials());
|
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
|
||||||
builder.RegisterService(&service);
|
builder.RegisterService(&service);
|
||||||
server_ = builder.BuildAndStart();
|
server_ = builder.BuildAndStart();
|
||||||
server_->Wait();
|
server_->Wait();
|
||||||
@ -35,6 +47,7 @@ class FederatedAdapterTest : public ::testing::Test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int const kWorldSize{2};
|
static int const kWorldSize{2};
|
||||||
|
std::string server_address_;
|
||||||
std::unique_ptr<std::thread> server_thread_;
|
std::unique_ptr<std::thread> server_thread_;
|
||||||
std::unique_ptr<grpc::Server> server_;
|
std::unique_ptr<grpc::Server> server_;
|
||||||
};
|
};
|
||||||
@ -52,9 +65,10 @@ TEST(FederatedAdapterSimpleTest, ThrowOnInvalidCommunicator) {
|
|||||||
TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
|
TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||||
threads.emplace_back(std::thread([rank] {
|
threads.emplace_back(std::thread([rank, server_address=server_address_] {
|
||||||
FederatedCommunicator comm{kWorldSize, rank, kServerAddress};
|
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||||
DeviceCommunicatorAdapter adapter{rank, &comm};
|
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
|
||||||
|
DeviceCommunicatorAdapter adapter{0, &comm};
|
||||||
int const count = 3;
|
int const count = 3;
|
||||||
thrust::device_vector<double> buffer(count, 0);
|
thrust::device_vector<double> buffer(count, 0);
|
||||||
thrust::sequence(buffer.begin(), buffer.end());
|
thrust::sequence(buffer.begin(), buffer.end());
|
||||||
@ -74,9 +88,10 @@ TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
|
|||||||
TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
|
TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||||
threads.emplace_back(std::thread([rank] {
|
threads.emplace_back(std::thread([rank, server_address=server_address_] {
|
||||||
FederatedCommunicator comm{kWorldSize, rank, kServerAddress};
|
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||||
DeviceCommunicatorAdapter adapter{rank, &comm};
|
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
|
||||||
|
DeviceCommunicatorAdapter adapter{0, &comm};
|
||||||
|
|
||||||
int const count = rank + 2;
|
int const count = rank + 2;
|
||||||
thrust::device_vector<char> buffer(count, 0);
|
thrust::device_vector<char> buffer(count, 0);
|
||||||
|
|||||||
@ -5,34 +5,46 @@
|
|||||||
#include <grpcpp/server_builder.h>
|
#include <grpcpp/server_builder.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
|
#include "helpers.h"
|
||||||
#include "../../../plugin/federated/federated_communicator.h"
|
#include "../../../plugin/federated/federated_communicator.h"
|
||||||
#include "../../../plugin/federated/federated_server.h"
|
#include "../../../plugin/federated/federated_server.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
std::string GetServerAddress() {
|
||||||
|
int port = GenerateRandomPort(50000, 60000);
|
||||||
|
std::string address = std::string("localhost:") + std::to_string(port);
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace collective {
|
namespace collective {
|
||||||
|
|
||||||
std::string const kServerAddress{"localhost:56789"}; // NOLINT(cert-err58-cpp)
|
|
||||||
|
|
||||||
class FederatedCommunicatorTest : public ::testing::Test {
|
class FederatedCommunicatorTest : public ::testing::Test {
|
||||||
public:
|
public:
|
||||||
static void VerifyAllreduce(int rank) {
|
static void VerifyAllreduce(int rank, const std::string& server_address) {
|
||||||
FederatedCommunicator comm{kWorldSize, rank, kServerAddress};
|
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||||
CheckAllreduce(comm);
|
CheckAllreduce(comm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void VerifyBroadcast(int rank) {
|
static void VerifyBroadcast(int rank, const std::string& server_address) {
|
||||||
FederatedCommunicator comm{kWorldSize, rank, kServerAddress};
|
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||||
CheckBroadcast(comm, rank);
|
CheckBroadcast(comm, rank);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
|
server_address_ = GetServerAddress();
|
||||||
server_thread_.reset(new std::thread([this] {
|
server_thread_.reset(new std::thread([this] {
|
||||||
grpc::ServerBuilder builder;
|
grpc::ServerBuilder builder;
|
||||||
federated::FederatedService service{kWorldSize};
|
federated::FederatedService service{kWorldSize};
|
||||||
builder.AddListeningPort(kServerAddress, grpc::InsecureServerCredentials());
|
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
|
||||||
builder.RegisterService(&service);
|
builder.RegisterService(&service);
|
||||||
server_ = builder.BuildAndStart();
|
server_ = builder.BuildAndStart();
|
||||||
server_->Wait();
|
server_->Wait();
|
||||||
@ -66,29 +78,40 @@ class FederatedCommunicatorTest : public ::testing::Test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int const kWorldSize{3};
|
static int const kWorldSize{3};
|
||||||
|
std::string server_address_;
|
||||||
std::unique_ptr<std::thread> server_thread_;
|
std::unique_ptr<std::thread> server_thread_;
|
||||||
std::unique_ptr<grpc::Server> server_;
|
std::unique_ptr<grpc::Server> server_;
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) {
|
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) {
|
||||||
auto construct = []() { FederatedCommunicator comm{0, 0, kServerAddress, "", "", ""}; };
|
std::string server_address{GetServerAddress()};
|
||||||
|
auto construct = [server_address]() {
|
||||||
|
FederatedCommunicator comm{0, 0, server_address, "", "", ""};
|
||||||
|
};
|
||||||
EXPECT_THROW(construct(), dmlc::Error);
|
EXPECT_THROW(construct(), dmlc::Error);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooSmall) {
|
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooSmall) {
|
||||||
auto construct = []() { FederatedCommunicator comm{1, -1, kServerAddress, "", "", ""}; };
|
std::string server_address{GetServerAddress()};
|
||||||
|
auto construct = [server_address]() {
|
||||||
|
FederatedCommunicator comm{1, -1, server_address, "", "", ""};
|
||||||
|
};
|
||||||
EXPECT_THROW(construct(), dmlc::Error);
|
EXPECT_THROW(construct(), dmlc::Error);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooBig) {
|
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooBig) {
|
||||||
auto construct = []() { FederatedCommunicator comm{1, 1, kServerAddress, "", "", ""}; };
|
std::string server_address{GetServerAddress()};
|
||||||
|
auto construct = [server_address]() {
|
||||||
|
FederatedCommunicator comm{1, 1, server_address, "", "", ""};
|
||||||
|
};
|
||||||
EXPECT_THROW(construct(), dmlc::Error);
|
EXPECT_THROW(construct(), dmlc::Error);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeNotInteger) {
|
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeNotInteger) {
|
||||||
auto construct = []() {
|
std::string server_address{GetServerAddress()};
|
||||||
|
auto construct = [server_address]() {
|
||||||
Json config{JsonObject()};
|
Json config{JsonObject()};
|
||||||
config["federated_server_address"] = kServerAddress;
|
config["federated_server_address"] = server_address;
|
||||||
config["federated_world_size"] = std::string("1");
|
config["federated_world_size"] = std::string("1");
|
||||||
config["federated_rank"] = Integer(0);
|
config["federated_rank"] = Integer(0);
|
||||||
auto *comm = FederatedCommunicator::Create(config);
|
auto *comm = FederatedCommunicator::Create(config);
|
||||||
@ -97,9 +120,10 @@ TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeNotInteger) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankNotInteger) {
|
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankNotInteger) {
|
||||||
auto construct = []() {
|
std::string server_address{GetServerAddress()};
|
||||||
|
auto construct = [server_address]() {
|
||||||
Json config{JsonObject()};
|
Json config{JsonObject()};
|
||||||
config["federated_server_address"] = kServerAddress;
|
config["federated_server_address"] = server_address;
|
||||||
config["federated_world_size"] = 1;
|
config["federated_world_size"] = 1;
|
||||||
config["federated_rank"] = std::string("0");
|
config["federated_rank"] = std::string("0");
|
||||||
auto *comm = FederatedCommunicator::Create(config);
|
auto *comm = FederatedCommunicator::Create(config);
|
||||||
@ -108,20 +132,23 @@ TEST(FederatedCommunicatorSimpleTest, ThrowOnRankNotInteger) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(FederatedCommunicatorSimpleTest, GetWorldSizeAndRank) {
|
TEST(FederatedCommunicatorSimpleTest, GetWorldSizeAndRank) {
|
||||||
FederatedCommunicator comm{6, 3, kServerAddress};
|
std::string server_address{GetServerAddress()};
|
||||||
|
FederatedCommunicator comm{6, 3, server_address};
|
||||||
EXPECT_EQ(comm.GetWorldSize(), 6);
|
EXPECT_EQ(comm.GetWorldSize(), 6);
|
||||||
EXPECT_EQ(comm.GetRank(), 3);
|
EXPECT_EQ(comm.GetRank(), 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(FederatedCommunicatorSimpleTest, IsDistributed) {
|
TEST(FederatedCommunicatorSimpleTest, IsDistributed) {
|
||||||
FederatedCommunicator comm{2, 1, kServerAddress};
|
std::string server_address{GetServerAddress()};
|
||||||
|
FederatedCommunicator comm{2, 1, server_address};
|
||||||
EXPECT_TRUE(comm.IsDistributed());
|
EXPECT_TRUE(comm.IsDistributed());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(FederatedCommunicatorTest, Allreduce) {
|
TEST_F(FederatedCommunicatorTest, Allreduce) {
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||||
threads.emplace_back(std::thread(&FederatedCommunicatorTest::VerifyAllreduce, rank));
|
threads.emplace_back(
|
||||||
|
std::thread(&FederatedCommunicatorTest::VerifyAllreduce, rank, server_address_));
|
||||||
}
|
}
|
||||||
for (auto &thread : threads) {
|
for (auto &thread : threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
@ -131,7 +158,8 @@ TEST_F(FederatedCommunicatorTest, Allreduce) {
|
|||||||
TEST_F(FederatedCommunicatorTest, Broadcast) {
|
TEST_F(FederatedCommunicatorTest, Broadcast) {
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||||
threads.emplace_back(std::thread(&FederatedCommunicatorTest::VerifyBroadcast, rank));
|
threads.emplace_back(
|
||||||
|
std::thread(&FederatedCommunicatorTest::VerifyBroadcast, rank, server_address_));
|
||||||
}
|
}
|
||||||
for (auto &thread : threads) {
|
for (auto &thread : threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
|
|||||||
@ -4,32 +4,45 @@
|
|||||||
#include <grpcpp/server_builder.h>
|
#include <grpcpp/server_builder.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
|
#include "helpers.h"
|
||||||
#include "federated_client.h"
|
#include "federated_client.h"
|
||||||
#include "federated_server.h"
|
#include "federated_server.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
std::string GetServerAddress() {
|
||||||
|
int port = GenerateRandomPort(50000, 60000);
|
||||||
|
std::string address = std::string("localhost:") + std::to_string(port);
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|
||||||
class FederatedServerTest : public ::testing::Test {
|
class FederatedServerTest : public ::testing::Test {
|
||||||
public:
|
public:
|
||||||
static void VerifyAllgather(int rank) {
|
static void VerifyAllgather(int rank, const std::string& server_address) {
|
||||||
federated::FederatedClient client{kServerAddress, rank};
|
federated::FederatedClient client{server_address, rank};
|
||||||
CheckAllgather(client, rank);
|
CheckAllgather(client, rank);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void VerifyAllreduce(int rank) {
|
static void VerifyAllreduce(int rank, const std::string& server_address) {
|
||||||
federated::FederatedClient client{kServerAddress, rank};
|
federated::FederatedClient client{server_address, rank};
|
||||||
CheckAllreduce(client);
|
CheckAllreduce(client);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void VerifyBroadcast(int rank) {
|
static void VerifyBroadcast(int rank, const std::string& server_address) {
|
||||||
federated::FederatedClient client{kServerAddress, rank};
|
federated::FederatedClient client{server_address, rank};
|
||||||
CheckBroadcast(client, rank);
|
CheckBroadcast(client, rank);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void VerifyMixture(int rank) {
|
static void VerifyMixture(int rank, const std::string& server_address) {
|
||||||
federated::FederatedClient client{kServerAddress, rank};
|
federated::FederatedClient client{server_address, rank};
|
||||||
for (auto i = 0; i < 10; i++) {
|
for (auto i = 0; i < 10; i++) {
|
||||||
CheckAllgather(client, rank);
|
CheckAllgather(client, rank);
|
||||||
CheckAllreduce(client);
|
CheckAllreduce(client);
|
||||||
@ -39,10 +52,11 @@ class FederatedServerTest : public ::testing::Test {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
|
server_address_ = GetServerAddress();
|
||||||
server_thread_.reset(new std::thread([this] {
|
server_thread_.reset(new std::thread([this] {
|
||||||
grpc::ServerBuilder builder;
|
grpc::ServerBuilder builder;
|
||||||
federated::FederatedService service{kWorldSize};
|
federated::FederatedService service{kWorldSize};
|
||||||
builder.AddListeningPort(kServerAddress, grpc::InsecureServerCredentials());
|
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
|
||||||
builder.RegisterService(&service);
|
builder.RegisterService(&service);
|
||||||
server_ = builder.BuildAndStart();
|
server_ = builder.BuildAndStart();
|
||||||
server_->Wait();
|
server_->Wait();
|
||||||
@ -80,17 +94,15 @@ class FederatedServerTest : public ::testing::Test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int const kWorldSize{3};
|
static int const kWorldSize{3};
|
||||||
static std::string const kServerAddress;
|
std::string server_address_;
|
||||||
std::unique_ptr<std::thread> server_thread_;
|
std::unique_ptr<std::thread> server_thread_;
|
||||||
std::unique_ptr<grpc::Server> server_;
|
std::unique_ptr<grpc::Server> server_;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string const FederatedServerTest::kServerAddress{"localhost:56789"}; // NOLINT(cert-err58-cpp)
|
|
||||||
|
|
||||||
TEST_F(FederatedServerTest, Allgather) {
|
TEST_F(FederatedServerTest, Allgather) {
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||||
threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllgather, rank));
|
threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllgather, rank, server_address_));
|
||||||
}
|
}
|
||||||
for (auto& thread : threads) {
|
for (auto& thread : threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
@ -100,7 +112,7 @@ TEST_F(FederatedServerTest, Allgather) {
|
|||||||
TEST_F(FederatedServerTest, Allreduce) {
|
TEST_F(FederatedServerTest, Allreduce) {
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||||
threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllreduce, rank));
|
threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllreduce, rank, server_address_));
|
||||||
}
|
}
|
||||||
for (auto& thread : threads) {
|
for (auto& thread : threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
@ -110,7 +122,7 @@ TEST_F(FederatedServerTest, Allreduce) {
|
|||||||
TEST_F(FederatedServerTest, Broadcast) {
|
TEST_F(FederatedServerTest, Broadcast) {
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||||
threads.emplace_back(std::thread(&FederatedServerTest::VerifyBroadcast, rank));
|
threads.emplace_back(std::thread(&FederatedServerTest::VerifyBroadcast, rank, server_address_));
|
||||||
}
|
}
|
||||||
for (auto& thread : threads) {
|
for (auto& thread : threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
@ -120,7 +132,7 @@ TEST_F(FederatedServerTest, Broadcast) {
|
|||||||
TEST_F(FederatedServerTest, Mixture) {
|
TEST_F(FederatedServerTest, Mixture) {
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||||
threads.emplace_back(std::thread(&FederatedServerTest::VerifyMixture, rank));
|
threads.emplace_back(std::thread(&FederatedServerTest::VerifyMixture, rank, server_address_));
|
||||||
}
|
}
|
||||||
for (auto& thread : threads) {
|
for (auto& thread : threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user