updating doc
This commit is contained in:
parent
b87da8fe9a
commit
aebb7998a3
@ -1,7 +1,7 @@
|
||||
Library Header Files of Rabit
|
||||
Library Header Files
|
||||
====
|
||||
* This folder contains all the header needed to use rabit libary
|
||||
* To use it, add include to the search path of the compiler
|
||||
* User only need to know [rabit.h](rabit.h) and [rabit_serializable.h](rabit_serializable.h) to use the library
|
||||
* Folder [rabit](rabit) contains headers for internal engine and implementation of template
|
||||
* Not all .h files in the projects are contained in include, .h files that are internally used by library remains at [src](../src)
|
||||
* This folder contains all the header needed to use the library
|
||||
* To use it, add the "include" folder to the search path of the compiler
|
||||
* User only needs to know [rabit.h](rabit.h) and [rabit_serializable.h](rabit_serializable.h) in order to use the library
|
||||
* Folder [rabit](rabit) contains headers for internal engine and template's implementation
|
||||
* Not all .h files in the project are in the "include" folder, .h files that are internally used by the library remain at [src](../src)
|
||||
|
||||
156
include/rabit.h
156
include/rabit.h
@ -1,18 +1,18 @@
|
||||
/*!
|
||||
* Copyright (c) 2014 by Contributors
|
||||
* \file rabit.h
|
||||
* \brief This file defines unified Allreduce/Broadcast interface of rabit
|
||||
* The actual implementation is redirected to rabit engine
|
||||
* Code only using this header can also compiled with MPI Allreduce(with no fault recovery),
|
||||
* \brief This file defines rabit's Allreduce/Broadcast interface
|
||||
* The rabit engine contains the actual implementation
|
||||
* Code that only uses this header can also be compiled with MPI Allreduce (non fault-tolerant),
|
||||
*
|
||||
* rabit.h and serializable.h is all the user need to use rabit interface
|
||||
* rabit.h and serializable.h is all what the user needs to use the rabit interface
|
||||
* \author Tianqi Chen, Ignacio Cano, Tianyi Zhou
|
||||
*/
|
||||
#ifndef RABIT_RABIT_H_
|
||||
#define RABIT_RABIT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
// optionally support of lambda function in C++11, if available
|
||||
// optionally support of lambda functions in C++11, if available
|
||||
#if __cplusplus >= 201103L
|
||||
#include <functional>
|
||||
#endif // C++11
|
||||
@ -20,13 +20,13 @@
|
||||
#include "./rabit_serializable.h"
|
||||
// engine definition of rabit, defines internal implementation
|
||||
// to use rabit interface, there is no need to read engine.h
|
||||
// rabit.h and serializable.h are suffice to use the interface
|
||||
// rabit.h and serializable.h are enough to use the interface
|
||||
#include "./rabit/engine.h"
|
||||
|
||||
/*! \brief namespace of rabit */
|
||||
/*! \brief rabit namespace */
|
||||
namespace rabit {
|
||||
/*!
|
||||
* \brief namespace of reduction operators
|
||||
* \brief reduction operators namespace
|
||||
*/
|
||||
namespace op {
|
||||
/*!
|
||||
@ -46,90 +46,90 @@ struct Min;
|
||||
struct Sum;
|
||||
/*!
|
||||
* \class rabit::op::BitOR
|
||||
* \brief bitwise or reduction operator
|
||||
* \brief bitwise OR reduction operator
|
||||
*/
|
||||
struct BitOR;
|
||||
} // namespace op
|
||||
/*!
|
||||
* \brief intialize the rabit module, call this once before using anything
|
||||
* \brief initializes rabit, call this once at the beginning of your program
|
||||
* \param argc number of arguments in argv
|
||||
* \param argv the array of input arguments
|
||||
*/
|
||||
inline void Init(int argc, char *argv[]);
|
||||
/*!
|
||||
* \brief finalize the rabit engine, call this function after you finished all jobs
|
||||
* \brief finalizes the rabit engine, call this function after you finished with all the jobs
|
||||
*/
|
||||
inline void Finalize(void);
|
||||
/*! \brief get rank of current process */
|
||||
/*! \brief gets rank of the current process */
|
||||
inline int GetRank(void);
|
||||
/*! \brief get total number of process */
|
||||
/*! \brief gets total number of processes */
|
||||
inline int GetWorldSize(void);
|
||||
/*! \brief whether rabit env is in distributed mode */
|
||||
inline bool IsDistributed(void) {
|
||||
return GetWorldSize() != 1;
|
||||
}
|
||||
/*! \brief get name of processor */
|
||||
/*! \brief gets processor's name */
|
||||
inline std::string GetProcessorName(void);
|
||||
/*!
|
||||
* \brief print the msg to the tracker,
|
||||
* this function can be used to communicate the information of the progress to
|
||||
* \brief prints the msg to the tracker,
|
||||
* this function can be used to communicate progress information to
|
||||
* the user who monitors the tracker
|
||||
* \param msg the message to be printed
|
||||
*/
|
||||
inline void TrackerPrint(const std::string &msg);
|
||||
#ifndef RABIT_STRICT_CXX98_
|
||||
/*!
|
||||
* \brief print the msg to the tracker, this function may not be available
|
||||
* in very strict c++98 compilers, but is available most of the time
|
||||
* this function can be used to communicate the information of the progress to
|
||||
* \brief prints the msg to the tracker, this function may not be available
|
||||
* in very strict c++98 compilers, though it usually is.
|
||||
* this function can be used to communicate progress information to
|
||||
* the user who monitors the tracker
|
||||
* \param fmt the format string
|
||||
*/
|
||||
inline void TrackerPrintf(const char *fmt, ...);
|
||||
#endif
|
||||
/*!
|
||||
* \brief broadcast an memory region to all others from root
|
||||
* \brief broadcasts a memory region to every node from the root
|
||||
*
|
||||
* Example: int a = 1; Broadcast(&a, sizeof(a), root);
|
||||
* \param sendrecv_data the pointer to send or recive buffer,
|
||||
* \param size the size of the data
|
||||
* \param root the root of process
|
||||
* \param sendrecv_data the pointer to the send/receive buffer,
|
||||
* \param size the data size
|
||||
* \param root the process root
|
||||
*/
|
||||
inline void Broadcast(void *sendrecv_data, size_t size, int root);
|
||||
/*!
|
||||
* \brief broadcast an std::vector<DType> to all others from root
|
||||
* \param sendrecv_data the pointer to send or recive vector,
|
||||
* for receiver, the vector does not need to be pre-allocated
|
||||
* \param root the root of process
|
||||
* \tparam DType the data type stored in vector, have to be simple data type
|
||||
* that can be directly send by sending the sizeof(DType) data
|
||||
* \brief broadcasts an std::vector<DType> to every node from root
|
||||
* \param sendrecv_data the pointer to send/receive vector,
|
||||
* for the receiver, the vector does not need to be pre-allocated
|
||||
* \param root the process root
|
||||
* \tparam DType the data type stored in the vector, has to be a simple data type
|
||||
* that can be directly transmitted by sending the sizeof(DType)
|
||||
*/
|
||||
template<typename DType>
|
||||
inline void Broadcast(std::vector<DType> *sendrecv_data, int root);
|
||||
/*!
|
||||
* \brief broadcast an std::string to all others from root
|
||||
* \param sendrecv_data the pointer to send or recive vector,
|
||||
* for receiver, the vector does not need to be pre-allocated
|
||||
* \param root the root of process
|
||||
* \brief broadcasts a std::string to every node from the root
|
||||
* \param sendrecv_data the pointer to the send/receive buffer,
|
||||
* for the receiver, the vector does not need to be pre-allocated
|
||||
* \param root the process root
|
||||
*/
|
||||
inline void Broadcast(std::string *sendrecv_data, int root);
|
||||
/*!
|
||||
* \brief perform in-place allreduce, on sendrecvbuf
|
||||
* \brief performs in-place Allreduce on sendrecvbuf
|
||||
* this function is NOT thread-safe
|
||||
*
|
||||
* Example Usage: the following code gives sum of the result
|
||||
* Example Usage: the following code does an Allreduce and outputs the sum as the result
|
||||
* vector<int> data(10);
|
||||
* ...
|
||||
* Allreduce<op::Sum>(&data[0], data.size());
|
||||
* ...
|
||||
* \param sendrecvbuf buffer for both sending and recving data
|
||||
* \param sendrecvbuf buffer for both sending and receiving data
|
||||
* \param count number of elements to be reduced
|
||||
* \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg)
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
||||
* \tparam OP see namespace op, reduce operator
|
||||
* \tparam DType type of data
|
||||
* \tparam DType data type
|
||||
*/
|
||||
template<typename OP, typename DType>
|
||||
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
@ -139,10 +139,10 @@ inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
// C++11 support for lambda prepare function
|
||||
#if __cplusplus >= 201103L
|
||||
/*!
|
||||
* \brief perform in-place allreduce, on sendrecvbuf
|
||||
* with a prepare function specified by lambda function
|
||||
* \brief performs in-place Allreduce, on sendrecvbuf
|
||||
* with a prepare function specified by a lambda function
|
||||
*
|
||||
* Example Usage: the following code gives sum of the result
|
||||
* Example Usage: the following code does an Allreduce and outputs the sum as the result
|
||||
* vector<int> data(10);
|
||||
* ...
|
||||
* Allreduce<op::Sum>(&data[0], data.size(), [&]() {
|
||||
@ -151,29 +151,29 @@ inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
* }
|
||||
* });
|
||||
* ...
|
||||
* \param sendrecvbuf buffer for both sending and recving data
|
||||
* \param sendrecvbuf buffer for both sending and receiving data
|
||||
* \param count number of elements to be reduced
|
||||
* \param prepare_fun Lazy lambda preprocessing function, prepare_fun() will be invoked
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* by the function before performing Allreduce in order to initialize the data in sendrecvbuf.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \tparam OP see namespace op, reduce operator
|
||||
* \tparam DType type of data
|
||||
* \tparam DType data type
|
||||
*/
|
||||
template<typename OP, typename DType>
|
||||
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
std::function<void()> prepare_fun);
|
||||
#endif // C++11
|
||||
/*!
|
||||
* \brief load latest check point
|
||||
* \brief loads the latest check point
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to gauranttees that global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \param local_model pointer to the local model that is specific to the current node/rank
|
||||
* this can be NULL when no local model is needed
|
||||
*
|
||||
* \return the version number of check point loaded
|
||||
* \return the version number of the check point loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* the p_model is not touched, user should do necessary initialization by themselves
|
||||
* the p_model is not touched, users should do the necessary initialization by themselves
|
||||
*
|
||||
* Common usage example:
|
||||
* int iter = rabit::LoadCheckPoint(&model);
|
||||
@ -188,45 +188,45 @@ inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
inline int LoadCheckPoint(ISerializable *global_model,
|
||||
ISerializable *local_model = NULL);
|
||||
/*!
|
||||
* \brief checkpoint the model, meaning we finished a stage of execution
|
||||
* every time we call check point, there is a version number which will increase by one
|
||||
* \brief checkpoints the model, meaning a stage of execution has finished.
|
||||
* every time we call check point, a version number will be increased by one
|
||||
*
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to gauranttees that global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \param local_model pointer to the local model that is specific to the current node/rank
|
||||
* this can be NULL when no local state is needed
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in CheckPoint function. global_model do not need explicit replication.
|
||||
* So only CheckPoint with global_model if possible
|
||||
* bring replication cost in the CheckPoint function. global_model does not need explicit replication.
|
||||
* So, only CheckPoint with the global_model if possible
|
||||
* \sa LoadCheckPoint, VersionNumber
|
||||
*/
|
||||
inline void CheckPoint(const ISerializable *global_model,
|
||||
const ISerializable *local_model = NULL);
|
||||
/*!
|
||||
* \brief This function can be used to replace CheckPoint for global_model only,
|
||||
* when certain condition is met(see detailed expplaination).
|
||||
* when certain condition is met (see detailed explanation).
|
||||
*
|
||||
* This is a "lazy" checkpoint such that only the pointer to global_model is
|
||||
* This is a "lazy" checkpoint such that only the pointer to the global_model is
|
||||
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
||||
* The global_model must remain unchanged util last call of Allreduce/Broadcast in current version finishs.
|
||||
* In another words, global_model model can be changed only between last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint in current version
|
||||
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
|
||||
* In other words, the global_model model can be changed only between the last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint, both in the same version
|
||||
*
|
||||
* For example, suppose the calling sequence is:
|
||||
* LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
|
||||
*
|
||||
* If user want to use LazyCheckPoint, then the user MUST only change global_model in code3.
|
||||
* If the user wants to use LazyCheckPoint, then she MUST only change the global_model in code3.
|
||||
*
|
||||
* The Use of LazyCheckPoint instead of CheckPoint will improve efficiency of the program.
|
||||
* The use of LazyCheckPoint instead of CheckPoint will improve the efficiency of the program.
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to gauranttees that global_model
|
||||
* is the same in all nodes
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
||||
*/
|
||||
inline void LazyCheckPoint(const ISerializable *global_model);
|
||||
/*!
|
||||
* \return version number of current stored model,
|
||||
* \return version number of the current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
* \sa LoadCheckPoint, CheckPoint
|
||||
*/
|
||||
@ -238,9 +238,9 @@ class ReduceHandle;
|
||||
} // namespace engine
|
||||
/*!
|
||||
* \brief template class to make customized reduce and all reduce easy
|
||||
* Do not use reducer directly in the function you call Finalize, because the destructor can happen after Finalize
|
||||
* Do not use reducer directly in the function you call Finalize, because the destructor can execute after Finalize
|
||||
* \tparam DType data type that to be reduced
|
||||
* DType must be a struct, with no pointer, and contains a function Reduce(const DType &d);
|
||||
* DType must be a struct, with no pointer, and contain a function Reduce(const DType &d);
|
||||
*/
|
||||
template<typename DType>
|
||||
class Reducer {
|
||||
@ -251,9 +251,9 @@ class Reducer {
|
||||
* \param sendrecvbuf the in place send-recv buffer
|
||||
* \param count number of elements to be reduced
|
||||
* \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg)
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
||||
*/
|
||||
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
void (*prepare_fun)(void *arg) = NULL,
|
||||
@ -276,10 +276,10 @@ class Reducer {
|
||||
/*!
|
||||
* \brief template class to make customized reduce,
|
||||
* this class defines complex reducer handles all the data structure that can be
|
||||
* serialized/deserialzed into fixed size buffer
|
||||
* Do not use reducer directly in the function you call Finalize, because the destructor can happen after Finalize
|
||||
* serialized/deserialized into fixed size buffer
|
||||
* Do not use reducer directly in the function you call Finalize, because the destructor can execute after Finalize
|
||||
*
|
||||
* \tparam DType data type that to be reduced, DType must contain following functions:
|
||||
* \tparam DType data type that to be reduced, DType must contain the following functions:
|
||||
* (1) Save(IStream &fs) (2) Load(IStream &fs) (3) Reduce(const DType &d);
|
||||
*/
|
||||
template<typename DType>
|
||||
@ -293,9 +293,9 @@ class SerializeReducer {
|
||||
* this includes budget limit for intermediate and final result
|
||||
* \param count number of elements to be reduced
|
||||
* \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg)
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
* will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf.
|
||||
* If the result of Allreduce can be recovered directly, then the prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
||||
*/
|
||||
inline void Allreduce(DType *sendrecvobj,
|
||||
size_t max_nbyte, size_t count,
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/*!
|
||||
* Copyright (c) 2014 by Contributors
|
||||
* \file engine.h
|
||||
* \brief This file defines the core interface of allreduce library
|
||||
* \brief This file defines the core interface of rabit library
|
||||
* \author Tianqi Chen, Nacho, Tianyi
|
||||
*/
|
||||
#ifndef RABIT_ENGINE_H_
|
||||
@ -16,7 +16,7 @@ class Datatype;
|
||||
|
||||
/*! \brief namespace of rabit */
|
||||
namespace rabit {
|
||||
/*! \brief core interface of engine */
|
||||
/*! \brief core interface of the engine */
|
||||
namespace engine {
|
||||
/*! \brief interface of core Allreduce engine */
|
||||
class IEngine {
|
||||
@ -34,24 +34,24 @@ class IEngine {
|
||||
* which means it is OK to cast src,dst to double* int* etc
|
||||
* \param src pointer to source space
|
||||
* \param dst pointer to destination reduction
|
||||
* \param count total number of elements to be reduced(note this is total number of elements instead of bytes)
|
||||
* the definition of reduce function should be type aware
|
||||
* \param count total number of elements to be reduced (note this is total number of elements instead of bytes)
|
||||
* the definition of the reduce function should be type aware
|
||||
* \param dtype the data type object, to be compatible with MPI reduce
|
||||
*/
|
||||
typedef void (ReduceFunction) (const void *src,
|
||||
void *dst, int count,
|
||||
const MPI::Datatype &dtype);
|
||||
/*!
|
||||
* \brief perform in-place allreduce, on sendrecvbuf
|
||||
* \brief performs in-place Allreduce, on sendrecvbuf
|
||||
* this function is NOT thread-safe
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the number of bytes the type has
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
* \param prepare_func Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg)
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
||||
*/
|
||||
virtual void Allreduce(void *sendrecvbuf_,
|
||||
size_t type_nbytes,
|
||||
@ -60,29 +60,29 @@ class IEngine {
|
||||
PreprocFunction prepare_fun = NULL,
|
||||
void *prepare_arg = NULL) = 0;
|
||||
/*!
|
||||
* \brief broadcast data from root to all nodes
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \brief broadcasts data from root to every other node
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param size the size of the data to be broadcasted
|
||||
* \param root the root worker id to broadcast the data
|
||||
*/
|
||||
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) = 0;
|
||||
/*!
|
||||
* \brief explicitly re-init everything before calling LoadCheckPoint
|
||||
* call this function when IEngine throw an exception out,
|
||||
* this function is only used for test purpose
|
||||
* \brief explicitly re-initialize everything before calling LoadCheckPoint
|
||||
* call this function when IEngine throws an exception,
|
||||
* this function should only be used for test purposes
|
||||
*/
|
||||
virtual void InitAfterException(void) = 0;
|
||||
/*!
|
||||
* \brief load latest check point
|
||||
* \brief loads the latest check point
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to gauranttees that global_model
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* \param local_model pointer to the local model that is specific to current node/rank
|
||||
* this can be NULL when no local model is needed
|
||||
*
|
||||
* \return the version number of check point loaded
|
||||
* \return the version number of the model loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* the p_model is not touched, user should do necessary initialization by themselves
|
||||
* the p_model is not touched, users should do necessary initialization by themselves
|
||||
*
|
||||
* Common usage example:
|
||||
* int iter = rabit::LoadCheckPoint(&model);
|
||||
@ -97,18 +97,18 @@ class IEngine {
|
||||
virtual int LoadCheckPoint(ISerializable *global_model,
|
||||
ISerializable *local_model = NULL) = 0;
|
||||
/*!
|
||||
* \brief checkpoint the model, meaning we finished a stage of execution
|
||||
* every time we call check point, there is a version number which will increase by one
|
||||
* \brief checkpoints the model, meaning a stage of execution was finished
|
||||
* every time we call check point, a version number increases by ones
|
||||
*
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to gauranttees that global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \param local_model pointer to the local model that is specific to current node/rank
|
||||
* this can be NULL when no local state is needed
|
||||
*
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in CheckPoint function. global_model do not need explicit replication.
|
||||
* So only CheckPoint with global_model if possible
|
||||
* bring replication cost in CheckPoint function. global_model does not need explicit replication.
|
||||
* So, only CheckPoint with global_model if possible
|
||||
*
|
||||
* \sa LoadCheckPoint, VersionNumber
|
||||
*/
|
||||
@ -116,54 +116,54 @@ class IEngine {
|
||||
const ISerializable *local_model = NULL) = 0;
|
||||
/*!
|
||||
* \brief This function can be used to replace CheckPoint for global_model only,
|
||||
* when certain condition is met(see detailed expplaination.
|
||||
* when certain condition is met (see detailed explanation).
|
||||
*
|
||||
* This is a "lazy" checkpoint such that only the pointer to global_model is
|
||||
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
||||
* The global_model must remain unchanged util last call of Allreduce/Broadcast in current version finishs.
|
||||
* In another words, global_model model can be changed only between last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint in current version
|
||||
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
|
||||
* In other words, global_model can be changed only between the last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint in the current version
|
||||
*
|
||||
* For example, suppose the calling sequence is:
|
||||
* LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
|
||||
*
|
||||
* If user can only changes global_model in code3, then LazyCheckPoint can be used to
|
||||
* improve efficiency of the program.
|
||||
* If the user can only change global_model in code3, then LazyCheckPoint can be used to
|
||||
* improve the efficiency of the program.
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to gauranttees that global_model
|
||||
* is the same in all nodes
|
||||
* when calling this function, the caller needs to guarantee that global_model
|
||||
* is the same in every node
|
||||
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
||||
*/
|
||||
virtual void LazyCheckPoint(const ISerializable *global_model) = 0;
|
||||
/*!
|
||||
* \return version number of current stored model,
|
||||
* \return version number of the current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
* \sa LoadCheckPoint, CheckPoint
|
||||
*/
|
||||
virtual int VersionNumber(void) const = 0;
|
||||
/*! \brief get rank of current node */
|
||||
/*! \brief gets rank of current node */
|
||||
virtual int GetRank(void) const = 0;
|
||||
/*! \brief get total number of */
|
||||
/*! \brief gets total number of nodes */
|
||||
virtual int GetWorldSize(void) const = 0;
|
||||
/*! \brief get the host name of current node */
|
||||
/*! \brief gets the host name of the current node */
|
||||
virtual std::string GetHost(void) const = 0;
|
||||
/*!
|
||||
* \brief print the msg in the tracker,
|
||||
* this function can be used to communicate the information of the progress to
|
||||
* \brief prints the msg in the tracker,
|
||||
* this function can be used to communicate progress information to
|
||||
* the user who monitors the tracker
|
||||
* \param msg message to be printed in the tracker
|
||||
*/
|
||||
virtual void TrackerPrint(const std::string &msg) = 0;
|
||||
};
|
||||
|
||||
/*! \brief intiialize the engine module */
|
||||
/*! \brief initializes the engine module */
|
||||
void Init(int argc, char *argv[]);
|
||||
/*! \brief finalize engine module */
|
||||
/*! \brief finalizes the engine module */
|
||||
void Finalize(void);
|
||||
/*! \brief singleton method to get engine */
|
||||
IEngine *GetEngine(void);
|
||||
|
||||
/*! \brief namespace that contains staffs to be compatible with MPI */
|
||||
/*! \brief namespace that contains stubs to be compatible with MPI */
|
||||
namespace mpi {
|
||||
/*!\brief enum of all operators */
|
||||
enum OpType {
|
||||
@ -185,19 +185,19 @@ enum DataType {
|
||||
};
|
||||
} // namespace mpi
|
||||
/*!
|
||||
* \brief perform in-place allreduce, on sendrecvbuf
|
||||
* \brief perform in-place Allreduce, on sendrecvbuf
|
||||
* this is an internal function used by rabit to be able to compile with MPI
|
||||
* do not use this function directly
|
||||
* \param sendrecvbuf buffer for both sending and recving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param sendrecvbuf buffer for both sending and receiving data
|
||||
* \param type_nbytes the number of bytes the type has
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
* \param dtype the data type
|
||||
* \param op the reduce operator type
|
||||
* \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg)
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function *
|
||||
* \param prepare_arg argument used to pass into the lazy preprocessing function.
|
||||
*/
|
||||
void Allreduce_(void *sendrecvbuf,
|
||||
size_t type_nbytes,
|
||||
@ -210,7 +210,7 @@ void Allreduce_(void *sendrecvbuf,
|
||||
|
||||
/*!
|
||||
* \brief handle for customized reducer, used to handle customized reduce
|
||||
* this class is mainly created for compatiblity issue with MPI's customized reduce
|
||||
* this class is mainly created for compatiblity issues with MPI's customized reduce
|
||||
*/
|
||||
class ReduceHandle {
|
||||
public:
|
||||
@ -220,19 +220,19 @@ class ReduceHandle {
|
||||
~ReduceHandle(void);
|
||||
/*!
|
||||
* \brief initialize the reduce function,
|
||||
* with the type the reduce function need to deal with
|
||||
* with the type the reduce function needs to deal with
|
||||
* the reduce function MUST be communicative
|
||||
*/
|
||||
void Init(IEngine::ReduceFunction redfunc, size_t type_nbytes);
|
||||
/*!
|
||||
* \brief customized in-place all reduce operation
|
||||
* \param sendrecvbuf the in place send-recv buffer
|
||||
* \param type_n4bytes unit size of the type, in terms of 4bytes
|
||||
* \param type_n4bytes size of the type, in terms of 4bytes
|
||||
* \param count number of elements to send
|
||||
* \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg)
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
||||
*/
|
||||
void Allreduce(void *sendrecvbuf,
|
||||
size_t type_nbytes, size_t count,
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/*!
|
||||
* Copyright (c) 2014 by Contributors
|
||||
* \file io.h
|
||||
* \brief utilities that implements different serializable interface
|
||||
* \brief utilities with different serializable implementations
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#ifndef RABIT_UTILS_IO_H_
|
||||
|
||||
@ -10,9 +10,8 @@
|
||||
|
||||
namespace rabit {
|
||||
namespace utils {
|
||||
// TODO not net cross platform, avoid to use this in most places
|
||||
/*!
|
||||
* \brief return time in seconds
|
||||
* \brief return time in seconds, not cross platform, avoid to use this in most places
|
||||
*/
|
||||
inline double GetTime(void) {
|
||||
timespec ts;
|
||||
|
||||
@ -60,7 +60,7 @@ const int kPrintBuffer = 1 << 12;
|
||||
|
||||
#ifndef RABIT_CUSTOMIZE_MSG_
|
||||
/*!
|
||||
* \brief handling of Assert error, caused by in-apropriate input
|
||||
* \brief handling of Assert error, caused by inappropriate input
|
||||
* \param msg error message
|
||||
*/
|
||||
inline void HandleAssertError(const char *msg) {
|
||||
@ -68,7 +68,7 @@ inline void HandleAssertError(const char *msg) {
|
||||
exit(-1);
|
||||
}
|
||||
/*!
|
||||
* \brief handling of Check error, caused by in-apropriate input
|
||||
* \brief handling of Check error, caused by inappropriate input
|
||||
* \param msg error message
|
||||
*/
|
||||
inline void HandleCheckError(const char *msg) {
|
||||
@ -98,7 +98,7 @@ extern "C" void (*Assert)(int exp, const char *fmt, ...);
|
||||
extern "C" void (*Check)(int exp, const char *fmt, ...);
|
||||
extern "C" void (*Error)(const char *fmt, ...);
|
||||
#else
|
||||
/*! \brief printf, print message to the console */
|
||||
/*! \brief printf, prints messages to the console */
|
||||
inline void Printf(const char *fmt, ...) {
|
||||
std::string msg(kPrintBuffer, '\0');
|
||||
va_list args;
|
||||
@ -116,7 +116,7 @@ inline int SPrintf(char *buf, size_t size, const char *fmt, ...) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*! \brief assert an condition is true, use this to handle debug information */
|
||||
/*! \brief assert a condition is true, use this to handle debug information */
|
||||
inline void Assert(bool exp, const char *fmt, ...) {
|
||||
if (!exp) {
|
||||
std::string msg(kPrintBuffer, '\0');
|
||||
@ -128,7 +128,7 @@ inline void Assert(bool exp, const char *fmt, ...) {
|
||||
}
|
||||
}
|
||||
|
||||
/*!\brief same as assert, but this is intended to be used as message for user*/
|
||||
/*!\brief same as assert, but this is intended to be used as a message for users */
|
||||
inline void Check(bool exp, const char *fmt, ...) {
|
||||
if (!exp) {
|
||||
std::string msg(kPrintBuffer, '\0');
|
||||
@ -160,7 +160,7 @@ inline std::FILE *FopenCheck(const char *fname, const char *flag) {
|
||||
return fp;
|
||||
}
|
||||
} // namespace utils
|
||||
// easy utils that can be directly acessed in xgboost
|
||||
// easy utils that can be directly accessed in xgboost
|
||||
/*! \brief get the beginning address of a vector */
|
||||
template<typename T>
|
||||
inline T *BeginPtr(std::vector<T> &vec) {
|
||||
|
||||
@ -17,26 +17,26 @@ namespace rabit {
|
||||
class IStream {
|
||||
public:
|
||||
/*!
|
||||
* \brief read data from stream
|
||||
* \param ptr pointer to memory buffer
|
||||
* \param size size of block
|
||||
* \return usually is the size of data readed
|
||||
* \brief reads data from a stream
|
||||
* \param ptr pointer to a memory buffer
|
||||
* \param size block size
|
||||
* \return the size of data read
|
||||
*/
|
||||
virtual size_t Read(void *ptr, size_t size) = 0;
|
||||
/*!
|
||||
* \brief write data to stream
|
||||
* \param ptr pointer to memory buffer
|
||||
* \param size size of block
|
||||
* \brief writes data to a stream
|
||||
* \param ptr pointer to a memory buffer
|
||||
* \param size block size
|
||||
*/
|
||||
virtual void Write(const void *ptr, size_t size) = 0;
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~IStream(void) {}
|
||||
|
||||
public:
|
||||
// helper functions to write various of data structures
|
||||
// helper functions to write/read different data structures
|
||||
/*!
|
||||
* \brief binary serialize a vector
|
||||
* \param vec vector to be serialized
|
||||
* \brief writes a vector
|
||||
* \param vec vector to be written/serialized
|
||||
*/
|
||||
template<typename T>
|
||||
inline void Write(const std::vector<T> &vec) {
|
||||
@ -47,9 +47,9 @@ class IStream {
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief binary load a vector
|
||||
* \param out_vec vector to be loaded
|
||||
* \return whether load is successfull
|
||||
* \brief loads a vector
|
||||
* \param out_vec vector to be loaded/deserialized
|
||||
* \return whether the load was successful
|
||||
*/
|
||||
template<typename T>
|
||||
inline bool Read(std::vector<T> *out_vec) {
|
||||
@ -62,8 +62,8 @@ class IStream {
|
||||
return true;
|
||||
}
|
||||
/*!
|
||||
* \brief binary serialize a string
|
||||
* \param str the string to be serialized
|
||||
* \brief writes a string
|
||||
* \param str the string to be written/serialized
|
||||
*/
|
||||
inline void Write(const std::string &str) {
|
||||
uint64_t sz = static_cast<uint64_t>(str.length());
|
||||
@ -73,9 +73,9 @@ class IStream {
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief binary load a string
|
||||
* \param out_str string to be loaded
|
||||
* \return whether load is successful
|
||||
* \brief loads a string
|
||||
* \param out_str string to be loaded/deserialized
|
||||
* \return whether the load/deserialization was successful
|
||||
*/
|
||||
inline bool Read(std::string *out_str) {
|
||||
uint64_t sz;
|
||||
@ -88,12 +88,18 @@ class IStream {
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief interface of serializable objects */
|
||||
/*! \brief interface for serializable objects */
|
||||
class ISerializable {
|
||||
public:
|
||||
/*! \brief load the model from file */
|
||||
/*!
|
||||
* \brief load the model from a stream
|
||||
* \param fi stream where to load the model from
|
||||
*/
|
||||
virtual void Load(IStream &fi) = 0;
|
||||
/*! \brief save the model to the stream*/
|
||||
/*!
|
||||
* \brief saves the model to a stream
|
||||
* \param fo stream where to save the model to
|
||||
*/
|
||||
virtual void Save(IStream &fo) const = 0;
|
||||
};
|
||||
} // namespace rabit
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user