[rabit] Drop support for MPI backend. (#9525)
- Add checks in cmake. - Remove mpi related code.
This commit is contained in:
parent
c3574d932f
commit
90ef250ea1
@ -47,7 +47,6 @@ option(USE_OPENMP "Build with OpenMP support." ON)
|
|||||||
option(BUILD_STATIC_LIB "Build static library" OFF)
|
option(BUILD_STATIC_LIB "Build static library" OFF)
|
||||||
option(BUILD_DEPRECATED_CLI "Build the deprecated command line interface" OFF)
|
option(BUILD_DEPRECATED_CLI "Build the deprecated command line interface" OFF)
|
||||||
option(FORCE_SHARED_CRT "Build with dynamic CRT on Windows (/MD)" OFF)
|
option(FORCE_SHARED_CRT "Build with dynamic CRT on Windows (/MD)" OFF)
|
||||||
option(RABIT_BUILD_MPI "Build MPI" OFF)
|
|
||||||
## Bindings
|
## Bindings
|
||||||
option(JVM_BINDINGS "Build JVM bindings" OFF)
|
option(JVM_BINDINGS "Build JVM bindings" OFF)
|
||||||
option(R_LIB "Build shared library for R package" OFF)
|
option(R_LIB "Build shared library for R package" OFF)
|
||||||
@ -106,12 +105,6 @@ if (R_LIB AND GOOGLE_TEST)
|
|||||||
message(WARNING "Some C++ unittests will fail with `R_LIB` enabled,
|
message(WARNING "Some C++ unittests will fail with `R_LIB` enabled,
|
||||||
as R package redirects some functions to R runtime implementation.")
|
as R package redirects some functions to R runtime implementation.")
|
||||||
endif (R_LIB AND GOOGLE_TEST)
|
endif (R_LIB AND GOOGLE_TEST)
|
||||||
if (USE_AVX)
|
|
||||||
message(SEND_ERROR "The option 'USE_AVX' is deprecated as experimental AVX features have been removed from XGBoost.")
|
|
||||||
endif (USE_AVX)
|
|
||||||
if (PLUGIN_LZ4)
|
|
||||||
message(SEND_ERROR "The option 'PLUGIN_LZ4' is removed from XGBoost.")
|
|
||||||
endif (PLUGIN_LZ4)
|
|
||||||
if (PLUGIN_RMM AND NOT (USE_CUDA))
|
if (PLUGIN_RMM AND NOT (USE_CUDA))
|
||||||
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
|
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
|
||||||
endif (PLUGIN_RMM AND NOT (USE_CUDA))
|
endif (PLUGIN_RMM AND NOT (USE_CUDA))
|
||||||
@ -144,6 +137,26 @@ if (PLUGIN_FEDERATED)
|
|||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
#-- Removed options
|
||||||
|
if (USE_AVX)
|
||||||
|
message(SEND_ERROR "The option `USE_AVX` is deprecated as experimental AVX features have been removed from XGBoost.")
|
||||||
|
endif (USE_AVX)
|
||||||
|
if (PLUGIN_LZ4)
|
||||||
|
message(SEND_ERROR "The option `PLUGIN_LZ4` is removed from XGBoost.")
|
||||||
|
endif (PLUGIN_LZ4)
|
||||||
|
if (RABIT_BUILD_MPI)
|
||||||
|
message(SEND_ERROR "The option `RABIT_BUILD_MPI` has been removed from XGBoost.")
|
||||||
|
endif (RABIT_BUILD_MPI)
|
||||||
|
if (USE_S3)
|
||||||
|
message(SEND_ERROR "The option `USE_S3` has been removed from XGBoost")
|
||||||
|
endif (USE_S3)
|
||||||
|
if (USE_AZURE)
|
||||||
|
message(SEND_ERROR "The option `USE_AZURE` has been removed from XGBoost")
|
||||||
|
endif (USE_AZURE)
|
||||||
|
if (USE_HDFS)
|
||||||
|
message(SEND_ERROR "The option `USE_HDFS` has been removed from XGBoost")
|
||||||
|
endif (USE_HDFS)
|
||||||
|
|
||||||
#-- Sanitizer
|
#-- Sanitizer
|
||||||
if (USE_SANITIZER)
|
if (USE_SANITIZER)
|
||||||
include(cmake/Sanitizer.cmake)
|
include(cmake/Sanitizer.cmake)
|
||||||
@ -222,9 +235,6 @@ endif (MSVC)
|
|||||||
|
|
||||||
# rabit
|
# rabit
|
||||||
add_subdirectory(rabit)
|
add_subdirectory(rabit)
|
||||||
if (RABIT_BUILD_MPI)
|
|
||||||
find_package(MPI REQUIRED)
|
|
||||||
endif (RABIT_BUILD_MPI)
|
|
||||||
|
|
||||||
# core xgboost
|
# core xgboost
|
||||||
add_subdirectory(${xgboost_SOURCE_DIR}/src)
|
add_subdirectory(${xgboost_SOURCE_DIR}/src)
|
||||||
|
|||||||
@ -295,10 +295,6 @@ macro(xgboost_target_link_libraries target)
|
|||||||
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
||||||
endif (USE_NVTX)
|
endif (USE_NVTX)
|
||||||
|
|
||||||
if (RABIT_BUILD_MPI)
|
|
||||||
target_link_libraries(${target} PRIVATE MPI::MPI_CXX)
|
|
||||||
endif (RABIT_BUILD_MPI)
|
|
||||||
|
|
||||||
if (MINGW)
|
if (MINGW)
|
||||||
target_link_libraries(${target} PRIVATE wsock32 ws2_32)
|
target_link_libraries(${target} PRIVATE wsock32 ws2_32)
|
||||||
endif (MINGW)
|
endif (MINGW)
|
||||||
|
|||||||
@ -6,9 +6,7 @@ set(RABIT_SOURCES
|
|||||||
${CMAKE_CURRENT_LIST_DIR}/src/allreduce_base.cc
|
${CMAKE_CURRENT_LIST_DIR}/src/allreduce_base.cc
|
||||||
${CMAKE_CURRENT_LIST_DIR}/src/rabit_c_api.cc)
|
${CMAKE_CURRENT_LIST_DIR}/src/rabit_c_api.cc)
|
||||||
|
|
||||||
if (RABIT_BUILD_MPI)
|
if (RABIT_MOCK)
|
||||||
list(APPEND RABIT_SOURCES ${CMAKE_CURRENT_LIST_DIR}/src/engine_mpi.cc)
|
|
||||||
elseif (RABIT_MOCK)
|
|
||||||
list(APPEND RABIT_SOURCES ${CMAKE_CURRENT_LIST_DIR}/src/engine_mock.cc)
|
list(APPEND RABIT_SOURCES ${CMAKE_CURRENT_LIST_DIR}/src/engine_mock.cc)
|
||||||
else ()
|
else ()
|
||||||
list(APPEND RABIT_SOURCES ${CMAKE_CURRENT_LIST_DIR}/src/engine.cc)
|
list(APPEND RABIT_SOURCES ${CMAKE_CURRENT_LIST_DIR}/src/engine.cc)
|
||||||
|
|||||||
@ -1,162 +0,0 @@
|
|||||||
/*!
|
|
||||||
* Copyright (c) 2014 by Contributors
|
|
||||||
* \file engine_mpi.cc
|
|
||||||
* \brief this file gives an implementation of engine interface using MPI,
|
|
||||||
* this will allow rabit program to run with MPI, but do not comes with fault tolerant
|
|
||||||
*
|
|
||||||
* \author Tianqi Chen
|
|
||||||
*/
|
|
||||||
#define NOMINMAX
|
|
||||||
#include <mpi.h>
|
|
||||||
#include <rabit/base.h>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <string>
|
|
||||||
#include "rabit/internal/engine.h"
|
|
||||||
#include "rabit/internal/utils.h"
|
|
||||||
|
|
||||||
namespace rabit {
|
|
||||||
namespace engine {
|
|
||||||
/*! \brief implementation of engine using MPI */
|
|
||||||
class MPIEngine : public IEngine {
|
|
||||||
public:
|
|
||||||
MPIEngine(void) {
|
|
||||||
version_number = 0;
|
|
||||||
}
|
|
||||||
void Allgather(void *sendrecvbuf_, size_t total_size, size_t slice_begin,
|
|
||||||
size_t slice_end, size_t size_prev_slice) override {
|
|
||||||
utils::Error("MPIEngine:: Allgather is not supported");
|
|
||||||
}
|
|
||||||
void Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count,
|
|
||||||
ReduceFunction reducer, PreprocFunction prepare_fun,
|
|
||||||
void *prepare_arg) override {
|
|
||||||
utils::Error("MPIEngine:: Allreduce is not supported,"\
|
|
||||||
"use Allreduce_ instead");
|
|
||||||
}
|
|
||||||
int GetRingPrevRank(void) const override {
|
|
||||||
utils::Error("MPIEngine:: GetRingPrevRank is not supported");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
void Broadcast(void *sendrecvbuf_, size_t size, int root) override {
|
|
||||||
MPI::COMM_WORLD.Bcast(sendrecvbuf_, size, MPI::CHAR, root);
|
|
||||||
}
|
|
||||||
virtual void InitAfterException(void) {
|
|
||||||
utils::Error("MPI is not fault tolerant");
|
|
||||||
}
|
|
||||||
virtual int LoadCheckPoint(Serializable *global_model,
|
|
||||||
Serializable *local_model = NULL) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
virtual void CheckPoint(const Serializable *global_model,
|
|
||||||
const Serializable *local_model = NULL) {
|
|
||||||
version_number += 1;
|
|
||||||
}
|
|
||||||
virtual void LazyCheckPoint(const Serializable *global_model) {
|
|
||||||
version_number += 1;
|
|
||||||
}
|
|
||||||
virtual int VersionNumber(void) const {
|
|
||||||
return version_number;
|
|
||||||
}
|
|
||||||
/*! \brief get rank of current node */
|
|
||||||
virtual int GetRank(void) const {
|
|
||||||
return MPI::COMM_WORLD.Get_rank();
|
|
||||||
}
|
|
||||||
/*! \brief get total number of */
|
|
||||||
virtual int GetWorldSize(void) const {
|
|
||||||
return MPI::COMM_WORLD.Get_size();
|
|
||||||
}
|
|
||||||
/*! \brief whether it is distributed */
|
|
||||||
virtual bool IsDistributed(void) const {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
/*! \brief get the host name of current node */
|
|
||||||
virtual std::string GetHost(void) const {
|
|
||||||
int len;
|
|
||||||
char name[MPI_MAX_PROCESSOR_NAME];
|
|
||||||
MPI::Get_processor_name(name, len);
|
|
||||||
name[len] = '\0';
|
|
||||||
return std::string(name);
|
|
||||||
}
|
|
||||||
virtual void TrackerPrint(const std::string &msg) {
|
|
||||||
// simply print information into the tracker
|
|
||||||
if (GetRank() == 0) {
|
|
||||||
utils::Printf("%s", msg.c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
int version_number;
|
|
||||||
};
|
|
||||||
|
|
||||||
// singleton sync manager
|
|
||||||
MPIEngine manager;
|
|
||||||
|
|
||||||
/*! \brief initialize the synchronization module */
|
|
||||||
bool Init(int argc, char *argv[]) {
|
|
||||||
try {
|
|
||||||
MPI::Init(argc, argv);
|
|
||||||
return true;
|
|
||||||
} catch (const std::exception& e) {
|
|
||||||
fprintf(stderr, " failed in MPI Init %s\n", e.what());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*! \brief finalize syncrhonization module */
|
|
||||||
bool Finalize(void) {
|
|
||||||
try {
|
|
||||||
MPI::Finalize();
|
|
||||||
return true;
|
|
||||||
} catch (const std::exception& e) {
|
|
||||||
fprintf(stderr, "failed in MPI shutdown %s\n", e.what());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! \brief singleton method to get engine */
|
|
||||||
IEngine *GetEngine(void) {
|
|
||||||
return &manager;
|
|
||||||
}
|
|
||||||
// transform enum to MPI data type
|
|
||||||
inline MPI::Datatype GetType(mpi::DataType dtype) {
|
|
||||||
using namespace mpi;
|
|
||||||
switch (dtype) {
|
|
||||||
case kChar: return MPI::CHAR;
|
|
||||||
case kUChar: return MPI::BYTE;
|
|
||||||
case kInt: return MPI::INT;
|
|
||||||
case kUInt: return MPI::UNSIGNED;
|
|
||||||
case kLong: return MPI::LONG;
|
|
||||||
case kULong: return MPI::UNSIGNED_LONG;
|
|
||||||
case kFloat: return MPI::FLOAT;
|
|
||||||
case kDouble: return MPI::DOUBLE;
|
|
||||||
case kLongLong: return MPI::LONG_LONG;
|
|
||||||
case kULongLong: return MPI::UNSIGNED_LONG_LONG;
|
|
||||||
}
|
|
||||||
utils::Error("unknown mpi::DataType");
|
|
||||||
return MPI::CHAR;
|
|
||||||
}
|
|
||||||
// transform enum to MPI OP
|
|
||||||
inline MPI::Op GetOp(mpi::OpType otype) {
|
|
||||||
using namespace mpi;
|
|
||||||
switch (otype) {
|
|
||||||
case kMax: return MPI::MAX;
|
|
||||||
case kMin: return MPI::MIN;
|
|
||||||
case kSum: return MPI::SUM;
|
|
||||||
case kBitwiseOR: return MPI::BOR;
|
|
||||||
}
|
|
||||||
utils::Error("unknown mpi::OpType");
|
|
||||||
return MPI::MAX;
|
|
||||||
}
|
|
||||||
// perform in-place allreduce, on sendrecvbuf
|
|
||||||
void Allreduce_(void *sendrecvbuf,
|
|
||||||
size_t type_nbytes,
|
|
||||||
size_t count,
|
|
||||||
IEngine::ReduceFunction red,
|
|
||||||
mpi::DataType dtype,
|
|
||||||
mpi::OpType op,
|
|
||||||
IEngine::PreprocFunction prepare_fun,
|
|
||||||
void *prepare_arg) {
|
|
||||||
if (prepare_fun != NULL) prepare_fun(prepare_arg);
|
|
||||||
MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf,
|
|
||||||
count, GetType(dtype), GetOp(op));
|
|
||||||
}
|
|
||||||
} // namespace engine
|
|
||||||
} // namespace rabit
|
|
||||||
Loading…
x
Reference in New Issue
Block a user