* support run rabit tests as xgboost subproject using xgboost/dmlc-core * support tracker config set/get * remove redudant printf * remove redudant printf * add c++0x declaration * log allreduce/broadcast caller, engine should track caller stack for investigation * tracker support binary config format * Revert "tracker support binary config format" This reverts commit 2a28e5e2b55c200cb621af8d19f17ab1bc62503b. * remove caller, prototype fetch allreduce/broadcast results from resbuf * store cached allreduce/broadcast seq_no to tracker * allow restore all caches from other nodes * try new rabit collective cache, todo: recv_link seems down * link up cache restore with main recovery * cleanup load cache state * update cache api * pass test.mk * have a working tests * try to unify check into actionsummary * more logging to debug distributed hist three method issue * update rabit interface to support caller signature matching * splite seq_counter from cur_cache_seq to different variables * still see issue with inf loop * support debug print caller as well as allreduce op * cleanup * remove get/set cache from model_recover, adding recover in loadcheckpoint * clarify rabit cache strategy, cache is set only by successful collective call involving all nodes with unique cache key. if all nodes call getcache at same time, we keep rabit run collective call. If some nodes call getcache while others not, we backfill cache from those nodes with most entries * revert caller logs * fix lint error * fix engine mpi signature * support getcache by ref * allow result buffer presiet to filestream * add loging * try fix checkpoint failure recovery case * use int64_t to avoid overflow caused seq fault * try avoid int overflow * try fix checkpoint failure recovery case * try avoid seqno overflow to negative by offseting specifial flag value adding cache seq no to checkpoint/load checkpoint/check point ack to avoid confusion from cache recovery * fix cache seq assert error * remove loging, handle edge case * add extensive log to checkpoint state with different seq no * fix lint errors * clean up comments before merge back to master * add logs to allreduce/broadcast/checkpoint * use unsinged int 32 and give seq no larger range * address remove allreduce dropseq code segment * using caller signature to filter bootstrapallreduces * remove get/set cache from empty * apply signature to reducer * apply signature to broadcast * add key to broadcat log * fix broadcast signature * fix default _line value for non linux system * adding comments, remove sleep(1) * fix osx build issue * try fix mpi * fix doc * fix engine_empty api * logging, adding more logs, restore immutable assertion * print unsinged int with ud * fix lint * rename seqtype to kSeq and KCache indicating it's usage apply kDiffSeq check to load_cache routine * comment allreduce/broadcast log * allow tests run on arm * enable flag to turn on / off cache * add log info alert if user choose to enable rabit bootstrap cache * add rabit_debug setting so user can use config to turn on * log flags when user turn on rabit_debug * force rabit restart if tracker assign -1 rank * use OPENMP to vecotrize reducer * address comment * Revert "address comment" This reverts commit 1dc61f33e7357dad8fa65528abeb81db92c5f9ed. * fix checkpoint size print 0 * per feedback, remove DISABLEOPEMP, address race condition * - remove openmp from this pr - update name from cache to boostrapcache * add default value of signature macros * remove openmp from cmake file * Update src/allreduce_robust.cc Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu> * Update src/allreduce_robust.cc Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu> * run test with cmake * remove openmp * fix cmake based tests * use cmake test fix darwin .dylib issue * move around rabit_signature definition due to windows build * misc, add c++ check in CMakeFile * per feedback * resolve CMake file * update rabit version
329 lines
13 KiB
C++
329 lines
13 KiB
C++
/*!
|
|
* Copyright (c) 2014 by Contributors
|
|
* \file engine.h
|
|
* \brief This file defines the core interface of rabit library
|
|
* \author Tianqi Chen, Nacho, Tianyi
|
|
*/
|
|
#ifndef RABIT_INTERNAL_ENGINE_H_
|
|
#define RABIT_INTERNAL_ENGINE_H_
|
|
#include <string>
|
|
#include "../serializable.h"
|
|
|
|
// keeps rabit api caller signature
|
|
#ifndef RABIT_API_CALLER_SIGNATURE
|
|
#define RABIT_API_CALLER_SIGNATURE
|
|
|
|
#ifdef __has_builtin
|
|
|
|
#if __has_builtin(__builtin_FILE)
|
|
#define _FILE __builtin_FILE()
|
|
#else
|
|
#define _FILE "N/A"
|
|
#endif // __has_builtin(__builtin_FILE)
|
|
|
|
#if __has_builtin(__builtin_LINE)
|
|
#define _LINE __builtin_LINE()
|
|
#else
|
|
#define _LINE -1
|
|
#endif // __has_builtin(__builtin_LINE)
|
|
|
|
#if __has_builtin(__builtin_FUNCTION)
|
|
#define _CALLER __builtin_FUNCTION()
|
|
#else
|
|
#define _CALLER "N/A"
|
|
#endif // __has_builtin(__builtin_FUNCTION)
|
|
|
|
#else
|
|
|
|
#define _FILE "N/A"
|
|
#define _LINE -1
|
|
#define _CALLER "N/A"
|
|
|
|
#endif // __has_builtin
|
|
|
|
#endif // RABIT_API_CALLER_SIGNATURE
|
|
|
|
namespace MPI {
|
|
/*! \brief MPI data type just to be compatible with MPI reduce function*/
|
|
class Datatype;
|
|
}
|
|
|
|
/*! \brief namespace of rabit */
|
|
namespace rabit {
|
|
/*! \brief core interface of the engine */
|
|
namespace engine {
|
|
/*! \brief interface of core Allreduce engine */
|
|
class IEngine {
|
|
public:
|
|
/*!
|
|
* \brief Preprocessing function, that is called before AllReduce,
|
|
* used to prepare the data used by AllReduce
|
|
* \param arg additional possible argument used to invoke the preprocessor
|
|
*/
|
|
typedef void (PreprocFunction) (void *arg);
|
|
/*!
|
|
* \brief reduce function, the same form of MPI reduce function is used,
|
|
* to be compatible with MPI interface
|
|
* In all the functions, the memory is ensured to aligned to 64-bit
|
|
* which means it is OK to cast src,dst to double* int* etc
|
|
* \param src pointer to source space
|
|
* \param dst pointer to destination reduction
|
|
* \param count total number of elements to be reduced (note this is total number of elements instead of bytes)
|
|
* the definition of the reduce function should be type aware
|
|
* \param dtype the data type object, to be compatible with MPI reduce
|
|
*/
|
|
typedef void (ReduceFunction) (const void *src,
|
|
void *dst, int count,
|
|
const MPI::Datatype &dtype);
|
|
/*! \brief virtual destructor */
|
|
virtual ~IEngine() {}
|
|
/*!
|
|
* \brief performs in-place Allreduce, on sendrecvbuf
|
|
* this function is NOT thread-safe
|
|
* \param sendrecvbuf_ buffer for both sending and receiving data
|
|
* \param type_nbytes the number of bytes the type has
|
|
* \param count number of elements to be reduced
|
|
* \param reducer reduce function
|
|
* \param prepare_func Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg)
|
|
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf.
|
|
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
|
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
|
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
|
|
* \param _file caller file name used to generate unique cache key
|
|
* \param _line caller line number used to generate unique cache key
|
|
* \param _caller caller function name used to generate unique cache key
|
|
*/
|
|
virtual void Allreduce(void *sendrecvbuf_,
|
|
size_t type_nbytes,
|
|
size_t count,
|
|
ReduceFunction reducer,
|
|
PreprocFunction prepare_fun = NULL,
|
|
void *prepare_arg = NULL,
|
|
bool is_bootstrap = false,
|
|
const char* _file = _FILE,
|
|
const int _line = _LINE,
|
|
const char* _caller = _CALLER) = 0;
|
|
/*!
|
|
* \brief broadcasts data from root to every other node
|
|
* \param sendrecvbuf_ buffer for both sending and receiving data
|
|
* \param size the size of the data to be broadcasted
|
|
* \param root the root worker id to broadcast the data
|
|
* \param is_bootstrap if this broadcast is needed to bootstrap failed node
|
|
* \param _file caller file name used to generate unique cache key
|
|
* \param _line caller line number used to generate unique cache key
|
|
* \param _caller caller function name used to generate unique cache key
|
|
*/
|
|
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root,
|
|
bool is_bootstrap = false,
|
|
const char* _file = _FILE,
|
|
const int _line = _LINE,
|
|
const char* _caller = _CALLER) = 0;
|
|
/*!
|
|
* \brief explicitly re-initialize everything before calling LoadCheckPoint
|
|
* call this function when IEngine throws an exception,
|
|
* this function should only be used for test purposes
|
|
*/
|
|
virtual void InitAfterException(void) = 0;
|
|
/*!
|
|
* \brief loads the latest check point
|
|
* \param global_model pointer to the globally shared model/state
|
|
* when calling this function, the caller needs to guarantee that the global_model
|
|
* is the same in all nodes
|
|
* \param local_model pointer to the local model that is specific to current node/rank
|
|
* this can be NULL when no local model is needed
|
|
*
|
|
* \return the version number of the model loaded
|
|
* if returned version == 0, this means no model has been CheckPointed
|
|
* the p_model is not touched, users should do necessary initialization by themselves
|
|
*
|
|
* Common usage example:
|
|
* int iter = rabit::LoadCheckPoint(&model);
|
|
* if (iter == 0) model.InitParameters();
|
|
* for (i = iter; i < max_iter; ++i) {
|
|
* do many things, include allreduce
|
|
* rabit::CheckPoint(model);
|
|
* }
|
|
*
|
|
* \sa CheckPoint, VersionNumber
|
|
*/
|
|
virtual int LoadCheckPoint(Serializable *global_model,
|
|
Serializable *local_model = NULL) = 0;
|
|
/*!
|
|
* \brief checkpoints the model, meaning a stage of execution was finished
|
|
* every time we call check point, a version number increases by ones
|
|
*
|
|
* \param global_model pointer to the globally shared model/state
|
|
* when calling this function, the caller needs to guarantee that the global_model
|
|
* is the same in every node
|
|
* \param local_model pointer to the local model that is specific to current node/rank
|
|
* this can be NULL when no local state is needed
|
|
*
|
|
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
|
* bring replication cost in CheckPoint function. global_model does not need explicit replication.
|
|
* So, only CheckPoint with global_model if possible
|
|
*
|
|
* \sa LoadCheckPoint, VersionNumber
|
|
*/
|
|
virtual void CheckPoint(const Serializable *global_model,
|
|
const Serializable *local_model = NULL) = 0;
|
|
/*!
|
|
* \brief This function can be used to replace CheckPoint for global_model only,
|
|
* when certain condition is met (see detailed explanation).
|
|
*
|
|
* This is a "lazy" checkpoint such that only the pointer to global_model is
|
|
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
|
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
|
|
* In other words, global_model can be changed only between the last call of
|
|
* Allreduce/Broadcast and LazyCheckPoint in the current version
|
|
*
|
|
* For example, suppose the calling sequence is:
|
|
* LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
|
|
*
|
|
* If the user can only change global_model in code3, then LazyCheckPoint can be used to
|
|
* improve the efficiency of the program.
|
|
* \param global_model pointer to the globally shared model/state
|
|
* when calling this function, the caller needs to guarantee that global_model
|
|
* is the same in every node
|
|
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
|
*/
|
|
virtual void LazyCheckPoint(const Serializable *global_model) = 0;
|
|
/*!
|
|
* \return version number of the current stored model,
|
|
* which means how many calls to CheckPoint we made so far
|
|
* \sa LoadCheckPoint, CheckPoint
|
|
*/
|
|
virtual int VersionNumber(void) const = 0;
|
|
/*! \brief gets rank of current node */
|
|
virtual int GetRank(void) const = 0;
|
|
/*! \brief gets total number of nodes */
|
|
virtual int GetWorldSize(void) const = 0;
|
|
/*! \brief whether we run in distribted mode */
|
|
virtual bool IsDistributed(void) const = 0;
|
|
/*! \brief gets the host name of the current node */
|
|
virtual std::string GetHost(void) const = 0;
|
|
/*!
|
|
* \brief prints the msg in the tracker,
|
|
* this function can be used to communicate progress information to
|
|
* the user who monitors the tracker
|
|
* \param msg message to be printed in the tracker
|
|
*/
|
|
virtual void TrackerPrint(const std::string &msg) = 0;
|
|
};
|
|
|
|
/*! \brief initializes the engine module */
|
|
bool Init(int argc, char *argv[]);
|
|
/*! \brief finalizes the engine module */
|
|
bool Finalize(void);
|
|
/*! \brief singleton method to get engine */
|
|
IEngine *GetEngine(void);
|
|
|
|
/*! \brief namespace that contains stubs to be compatible with MPI */
|
|
namespace mpi {
|
|
/*!\brief enum of all operators */
|
|
enum OpType {
|
|
kMax = 0,
|
|
kMin = 1,
|
|
kSum = 2,
|
|
kBitwiseOR = 3
|
|
};
|
|
/*!\brief enum of supported data types */
|
|
enum DataType {
|
|
kChar = 0,
|
|
kUChar = 1,
|
|
kInt = 2,
|
|
kUInt = 3,
|
|
kLong = 4,
|
|
kULong = 5,
|
|
kFloat = 6,
|
|
kDouble = 7,
|
|
kLongLong = 8,
|
|
kULongLong = 9
|
|
};
|
|
} // namespace mpi
|
|
/*!
|
|
* \brief perform in-place Allreduce, on sendrecvbuf
|
|
* this is an internal function used by rabit to be able to compile with MPI
|
|
* do not use this function directly
|
|
* \param sendrecvbuf buffer for both sending and receiving data
|
|
* \param type_nbytes the number of bytes the type has
|
|
* \param count number of elements to be reduced
|
|
* \param reducer reduce function
|
|
* \param dtype the data type
|
|
* \param op the reduce operator type
|
|
* \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg)
|
|
* will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf_.
|
|
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
|
* \param prepare_arg argument used to pass into the lazy preprocessing function.
|
|
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
|
|
* \param _file caller file name used to generate unique cache key
|
|
* \param _line caller line number used to generate unique cache key
|
|
* \param _caller caller function name used to generate unique cache key
|
|
*/
|
|
void Allreduce_(void *sendrecvbuf,
|
|
size_t type_nbytes,
|
|
size_t count,
|
|
IEngine::ReduceFunction red,
|
|
mpi::DataType dtype,
|
|
mpi::OpType op,
|
|
IEngine::PreprocFunction prepare_fun = NULL,
|
|
void *prepare_arg = NULL,
|
|
bool is_bootstrap = false,
|
|
const char* _file = _FILE,
|
|
const int _line = _LINE,
|
|
const char* _caller = _CALLER);
|
|
/*!
|
|
* \brief handle for customized reducer, used to handle customized reduce
|
|
* this class is mainly created for compatiblity issues with MPI's customized reduce
|
|
*/
|
|
class ReduceHandle {
|
|
public:
|
|
// constructor
|
|
ReduceHandle(void);
|
|
// destructor
|
|
~ReduceHandle(void);
|
|
/*!
|
|
* \brief initialize the reduce function,
|
|
* with the type the reduce function needs to deal with
|
|
* the reduce function MUST be communicative
|
|
*/
|
|
void Init(IEngine::ReduceFunction redfunc, size_t type_nbytes);
|
|
/*!
|
|
* \brief customized in-place all reduce operation
|
|
* \param sendrecvbuf the in place send-recv buffer
|
|
* \param type_n4bytes size of the type, in terms of 4bytes
|
|
* \param count number of elements to send
|
|
* \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg)
|
|
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf_.
|
|
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
|
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
|
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
|
|
* \param _file caller file name used to generate unique cache key
|
|
* \param _line caller line number used to generate unique cache key
|
|
* \param _caller caller function name used to generate unique cache key
|
|
*/
|
|
void Allreduce(void *sendrecvbuf,
|
|
size_t type_nbytes,
|
|
size_t count,
|
|
IEngine::PreprocFunction prepare_fun = NULL,
|
|
void *prepare_arg = NULL,
|
|
bool is_bootstrap = false,
|
|
const char* _file = _FILE,
|
|
const int _line = _LINE,
|
|
const char* _caller = _CALLER);
|
|
/*! \return the number of bytes occupied by the type */
|
|
static int TypeSize(const MPI::Datatype &dtype);
|
|
|
|
protected:
|
|
// handle function field
|
|
void *handle_;
|
|
// reduce function of the reducer
|
|
IEngine::ReduceFunction *redfunc_;
|
|
// handle to the type field
|
|
void *htype_;
|
|
// the created type in 4 bytes
|
|
size_t created_type_nbytes_;
|
|
};
|
|
} // namespace engine
|
|
} // namespace rabit
|
|
#endif // RABIT_INTERNAL_ENGINE_H_
|