support bootstrap allreduce/broadcast (#98)

* support run rabit tests as xgboost subproject using xgboost/dmlc-core

* support tracker config set/get

* remove redudant printf

* remove redudant printf

* add c++0x declaration

* log allreduce/broadcast caller, engine should track caller stack for
investigation

* tracker support binary config format

* Revert "tracker support binary config format"

This reverts commit 2a28e5e2b55c200cb621af8d19f17ab1bc62503b.

* remove caller, prototype fetch allreduce/broadcast results from resbuf

* store cached allreduce/broadcast seq_no to tracker

* allow restore all caches from other nodes

* try new rabit collective cache, todo: recv_link seems down

* link up cache restore with main recovery

* cleanup load cache state

* update cache api

* pass test.mk

* have a working tests

* try to unify check into actionsummary

* more logging to debug distributed hist three method issue

* update rabit interface to support caller signature matching

* splite seq_counter from cur_cache_seq to different variables

* still see issue with inf loop

* support debug print caller as well as allreduce op

* cleanup

* remove get/set cache from model_recover, adding recover in
loadcheckpoint

* clarify rabit cache strategy, cache is set only by successful collective
call involving all nodes with unique cache key. if all nodes call
getcache at same time, we keep rabit run collective call. If some nodes
call getcache while others not, we backfill cache from those nodes with
most entries

* revert caller logs

* fix lint error

* fix engine mpi signature

* support getcache by ref

* allow result buffer presiet to filestream

* add loging

* try fix checkpoint failure recovery case

* use int64_t to avoid overflow caused seq fault

* try avoid int overflow

* try fix checkpoint failure recovery case

* try avoid seqno overflow to negative by offseting specifial flag value
adding cache seq no to checkpoint/load checkpoint/check point ack to avoid
confusion from cache recovery

* fix cache seq assert error

* remove loging, handle edge case

* add extensive log to checkpoint state  with different seq no

* fix lint errors

* clean up comments before merge back to master

* add logs to allreduce/broadcast/checkpoint

* use unsinged int 32 and give seq no larger range

* address remove allreduce dropseq code segment

* using caller signature to filter bootstrapallreduces

* remove get/set cache from empty

* apply signature to reducer

* apply signature to broadcast

* add key to broadcat log

* fix broadcast signature

* fix default _line value for non linux system

* adding comments, remove sleep(1)

* fix osx build issue

* try fix mpi

* fix doc

* fix engine_empty api

* logging, adding more logs, restore immutable assertion

* print unsinged int with ud

* fix lint

* rename seqtype to kSeq and KCache indicating it's usage
apply kDiffSeq check to load_cache routine

* comment allreduce/broadcast log

* allow tests run on arm

* enable flag to turn on / off cache

* add log info alert if user choose to enable rabit bootstrap cache

* add rabit_debug setting so user can use config to turn on

* log flags when user turn on rabit_debug

* force rabit restart if tracker assign -1 rank

* use OPENMP to vecotrize reducer

* address comment

* Revert "address comment"

This reverts commit 1dc61f33e7357dad8fa65528abeb81db92c5f9ed.

* fix checkpoint size print 0

* per feedback, remove DISABLEOPEMP, address race condition

* - remove openmp from this pr
- update name from cache to boostrapcache

* add default value of signature macros

* remove openmp from cmake file

* Update src/allreduce_robust.cc

Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu>

* Update src/allreduce_robust.cc

Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu>

* run test with cmake

* remove openmp

* fix cmake based tests

* use cmake test fix darwin .dylib issue

* move around rabit_signature definition due to windows build

* misc, add c++ check in CMakeFile

* per feedback

* resolve CMake file

* update rabit version
This commit is contained in:
Chen Qin
2019-08-27 18:12:33 -07:00
committed by Philip Hyunsu Cho
parent dba32d54d1
commit 5797dcb64e
20 changed files with 927 additions and 232 deletions

View File

@@ -9,6 +9,40 @@
#include <string>
#include "../serializable.h"
// keeps rabit api caller signature
#ifndef RABIT_API_CALLER_SIGNATURE
#define RABIT_API_CALLER_SIGNATURE
#ifdef __has_builtin
#if __has_builtin(__builtin_FILE)
#define _FILE __builtin_FILE()
#else
#define _FILE "N/A"
#endif // __has_builtin(__builtin_FILE)
#if __has_builtin(__builtin_LINE)
#define _LINE __builtin_LINE()
#else
#define _LINE -1
#endif // __has_builtin(__builtin_LINE)
#if __has_builtin(__builtin_FUNCTION)
#define _CALLER __builtin_FUNCTION()
#else
#define _CALLER "N/A"
#endif // __has_builtin(__builtin_FUNCTION)
#else
#define _FILE "N/A"
#define _LINE -1
#define _CALLER "N/A"
#endif // __has_builtin
#endif // RABIT_API_CALLER_SIGNATURE
namespace MPI {
/*! \brief MPI data type just to be compatible with MPI reduce function*/
class Datatype;
@@ -54,20 +88,36 @@ class IEngine {
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf.
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
* \param prepare_arg argument used to pass into the lazy preprocessing function
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
*/
virtual void Allreduce(void *sendrecvbuf_,
size_t type_nbytes,
size_t count,
ReduceFunction reducer,
PreprocFunction prepare_fun = NULL,
void *prepare_arg = NULL) = 0;
void *prepare_arg = NULL,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER) = 0;
/*!
* \brief broadcasts data from root to every other node
* \param sendrecvbuf_ buffer for both sending and receiving data
* \param size the size of the data to be broadcasted
* \param root the root worker id to broadcast the data
* \param is_bootstrap if this broadcast is needed to bootstrap failed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
*/
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) = 0;
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER) = 0;
/*!
* \brief explicitly re-initialize everything before calling LoadCheckPoint
* call this function when IEngine throws an exception,
@@ -204,6 +254,10 @@ enum DataType {
* will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf_.
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
* \param prepare_arg argument used to pass into the lazy preprocessing function.
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
*/
void Allreduce_(void *sendrecvbuf,
size_t type_nbytes,
@@ -212,8 +266,11 @@ void Allreduce_(void *sendrecvbuf,
mpi::DataType dtype,
mpi::OpType op,
IEngine::PreprocFunction prepare_fun = NULL,
void *prepare_arg = NULL);
void *prepare_arg = NULL,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER);
/*!
* \brief handle for customized reducer, used to handle customized reduce
* this class is mainly created for compatiblity issues with MPI's customized reduce
@@ -239,12 +296,20 @@ class ReduceHandle {
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf_.
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
* \param prepare_arg argument used to pass into the lazy preprocessing function
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
*/
void Allreduce(void *sendrecvbuf,
size_t type_nbytes,
size_t count,
IEngine::PreprocFunction prepare_fun = NULL,
void *prepare_arg = NULL);
void *prepare_arg = NULL,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER);
/*! \return the number of bytes occupied by the type */
static int TypeSize(const MPI::Datatype &dtype);

View File

@@ -96,6 +96,7 @@ template<typename OP, typename DType>
inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) {
const DType *src = (const DType*)src_;
DType *dst = (DType*)dst_; // NOLINT(*)
for (int i = 0; i < len; ++i) {
OP::Reduce(dst[i], src[i]);
}
@@ -127,28 +128,43 @@ inline std::string GetProcessorName(void) {
return engine::GetEngine()->GetHost();
}
// broadcast data to all other nodes from root
inline void Broadcast(void *sendrecv_data, size_t size, int root) {
engine::GetEngine()->Broadcast(sendrecv_data, size, root);
inline void Broadcast(void *sendrecv_data, size_t size, int root,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
engine::GetEngine()->Broadcast(sendrecv_data, size, root,
is_bootstrap, _file, _line, _caller);
}
template<typename DType>
inline void Broadcast(std::vector<DType> *sendrecv_data, int root) {
inline void Broadcast(std::vector<DType> *sendrecv_data, int root,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
size_t size = sendrecv_data->size();
Broadcast(&size, sizeof(size), root);
Broadcast(&size, sizeof(size), root, is_bootstrap, _file, _line, _caller);
if (sendrecv_data->size() != size) {
sendrecv_data->resize(size);
}
if (size != 0) {
Broadcast(&(*sendrecv_data)[0], size * sizeof(DType), root);
Broadcast(&(*sendrecv_data)[0], size * sizeof(DType), root,
is_bootstrap, _file, _line, _caller);
}
}
inline void Broadcast(std::string *sendrecv_data, int root) {
inline void Broadcast(std::string *sendrecv_data, int root,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
size_t size = sendrecv_data->length();
Broadcast(&size, sizeof(size), root);
Broadcast(&size, sizeof(size), root, is_bootstrap, _file, _line, _caller);
if (sendrecv_data->length() != size) {
sendrecv_data->resize(size);
}
if (size != 0) {
Broadcast(&(*sendrecv_data)[0], size * sizeof(char), root);
Broadcast(&(*sendrecv_data)[0], size * sizeof(char), root,
is_bootstrap, _file, _line, _caller);
}
}
@@ -156,9 +172,14 @@ inline void Broadcast(std::string *sendrecv_data, int root) {
template<typename OP, typename DType>
inline void Allreduce(DType *sendrecvbuf, size_t count,
void (*prepare_fun)(void *arg),
void *prepare_arg) {
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
engine::Allreduce_(sendrecvbuf, sizeof(DType), count, op::Reducer<OP, DType>,
engine::mpi::GetType<DType>(), OP::kType, prepare_fun, prepare_arg);
engine::mpi::GetType<DType>(), OP::kType, prepare_fun, prepare_arg,
is_bootstrap, _file, _line, _caller);
}
// C++11 support for lambda prepare function
@@ -167,9 +188,15 @@ inline void InvokeLambda_(void *fun) {
(*static_cast<std::function<void()>*>(fun))();
}
template<typename OP, typename DType>
inline void Allreduce(DType *sendrecvbuf, size_t count, std::function<void()> prepare_fun) {
inline void Allreduce(DType *sendrecvbuf, size_t count,
std::function<void()> prepare_fun,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
engine::Allreduce_(sendrecvbuf, sizeof(DType), count, op::Reducer<OP, DType>,
engine::mpi::GetType<DType>(), OP::kType, InvokeLambda_, &prepare_fun);
engine::mpi::GetType<DType>(), OP::kType, InvokeLambda_, &prepare_fun,
is_bootstrap, _file, _line, _caller);
}
#endif // C++11
@@ -188,6 +215,7 @@ inline void TrackerPrintf(const char *fmt, ...) {
msg.resize(strlen(msg.c_str()));
TrackerPrint(msg);
}
#endif // RABIT_STRICT_CXX98_
// load latest check point
inline int LoadCheckPoint(Serializable *global_model,
@@ -216,8 +244,8 @@ inline void ReducerSafe_(const void *src_, void *dst_, int len_, const MPI::Data
const size_t kUnit = sizeof(DType);
const char *psrc = reinterpret_cast<const char*>(src_);
char *pdst = reinterpret_cast<char*>(dst_);
DType tdst, tsrc;
for (int i = 0; i < len_; ++i) {
DType tdst, tsrc;
// use memcpy to avoid alignment issue
std::memcpy(&tdst, pdst + i * kUnit, sizeof(tdst));
std::memcpy(&tsrc, psrc + i * kUnit, sizeof(tsrc));
@@ -247,8 +275,13 @@ inline Reducer<DType, freduce>::Reducer(void) {
template<typename DType, void (*freduce)(DType &dst, const DType &src)> // NOLINT(*)
inline void Reducer<DType, freduce>::Allreduce(DType *sendrecvbuf, size_t count,
void (*prepare_fun)(void *arg),
void *prepare_arg) {
handle_.Allreduce(sendrecvbuf, sizeof(DType), count, prepare_fun, prepare_arg);
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
handle_.Allreduce(sendrecvbuf, sizeof(DType), count, prepare_fun,
prepare_arg, is_bootstrap, _file, _line, _caller);
}
// function to perform reduction for SerializeReducer
template<typename DType>
@@ -256,8 +289,8 @@ inline void SerializeReducerFunc_(const void *src_, void *dst_,
int len_, const MPI::Datatype &dtype) {
int nbytes = engine::ReduceHandle::TypeSize(dtype);
// temp space
DType tsrc, tdst;
for (int i = 0; i < len_; ++i) {
DType tsrc, tdst;
utils::MemoryFixSizeBuffer fsrc((char*)(src_) + i * nbytes, nbytes); // NOLINT(*)
utils::MemoryFixSizeBuffer fdst((char*)(dst_) + i * nbytes, nbytes); // NOLINT(*)
tsrc.Load(fsrc);
@@ -296,7 +329,11 @@ template<typename DType>
inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
size_t max_nbyte, size_t count,
void (*prepare_fun)(void *arg),
void *prepare_arg) {
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
buffer_.resize(max_nbyte * count);
// setup closure
SerializeReduceClosure<DType> c;
@@ -304,7 +341,8 @@ inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
c.prepare_fun = prepare_fun; c.prepare_arg = prepare_arg; c.p_buffer = &buffer_;
// invoke here
handle_.Allreduce(BeginPtr(buffer_), max_nbyte, count,
SerializeReduceClosure<DType>::Invoke, &c);
SerializeReduceClosure<DType>::Invoke, &c,
is_bootstrap, _file, _line, _caller);
for (size_t i = 0; i < count; ++i) {
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer_) + i * max_nbyte, max_nbyte);
sendrecvobj[i].Load(fs);
@@ -314,14 +352,24 @@ inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
#if DMLC_USE_CXX11
template<typename DType, void (*freduce)(DType &dst, const DType &src)> // NOLINT(*)g
inline void Reducer<DType, freduce>::Allreduce(DType *sendrecvbuf, size_t count,
std::function<void()> prepare_fun) {
this->Allreduce(sendrecvbuf, count, InvokeLambda_, &prepare_fun);
std::function<void()> prepare_fun,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
this->Allreduce(sendrecvbuf, count, InvokeLambda_, &prepare_fun,
is_bootstrap, _file, _line, _caller);
}
template<typename DType>
inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
size_t max_nbytes, size_t count,
std::function<void()> prepare_fun) {
this->Allreduce(sendrecvobj, max_nbytes, count, InvokeLambda_, &prepare_fun);
std::function<void()> prepare_fun,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
this->Allreduce(sendrecvobj, max_nbytes, count, InvokeLambda_, &prepare_fun,
is_bootstrap, _file, _line, _caller);
}
#endif // DMLC_USE_CXX11
} // namespace rabit

View File

@@ -96,9 +96,15 @@ inline void HandleCheckError(const char *msg) {
inline void HandlePrint(const char *msg) {
printf("%s", msg);
}
inline void HandleLogPrint(const char *msg) {
fprintf(stderr, "%s", msg);
fflush(stderr);
inline void HandleLogInfo(const char *fmt, ...) {
std::string msg(kPrintBuffer, '\0');
va_list args;
va_start(args, fmt);
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
va_end(args);
fprintf(stdout, "%s", msg.c_str());
fflush(stdout);
}
#else
#ifndef RABIT_STRICT_CXX98_