e81a11d Merge pull request #25 from daiyl0320/master 35c3b37 add retry mechanism to ConnectTracker and modify Listen backlog to 128 in rabit_traker.py c71ed6f try deply doxygen 62e5647 try deply doxygen 732f1c6 try 2fa6e02 ok 0537665 minor 7b59dcb minor 5934950 new doc f538187 ok 44b6049 new doc 387339b add more 9d4397a chg 2879a48 chg 30e3110 ok 9ff0301 add link translation 6b629c2 k 32e1955 ok 8f4839d fix 93137b2 ok 7eeeb79 reload recommonmark a8f00cc minor 19b0f01 ok dd01184 minor c1cdc19 minor fcf0f43 try rst cbc21ae try 62ddfa7 tiny aefc05c final change 2aee9b4 minor fe4e7c2 ok 8001983 change to subtitle 5ca33e4 ok 88f7d24 update guide 29d43ab add code fe8bb3b minor hack for readthedocs 229c71d Merge branch 'master' of ssh://github.com/dmlc/rabit 7424218 ok d1d45bb Update README.md 1e8813f Update README.md 1ccc990 Update README.md 0323e06 remove readme 679a835 remove theme 7ea5b7c remove numpydoc to napoleon b73e2be Merge branch 'master' of ssh://github.com/dmlc/rabit 1742283 ok 1838e25 Update python-requirements.txt bc4e957 ok fba6fc2 ok 0251101 ok d50b905 ok d4f2509 ok cdf401a ok fef0ef2 new doc cef360d ok c125d2a ok 270a49e add requirments 744f901 get the basic doc 1cb5cad Merge branch 'master' of ssh://github.com/dmlc/rabit 8cc07ba minor d74f126 Update .travis.yml 52b3dcd Update .travis.yml 099581b Update .travis.yml 1258046 Update .travis.yml 7addac9 Update Makefile 0ea7adf Update .travis.yml f858856 Update travis_script.sh d8eac4a Update README.md 3cc49ad lint and travis ceedf4e fix fd8920c fix win32 8bbed35 modify 9520b90 Merge pull request #14 from dmlc/hjk41 df14bb1 fix type f441dc7 replace tab with blankspace 2467942 remove unnecessary include 181ef47 defined long long and ulonglong 1582180 use int32_t to define int and int64_t to define long. in VC long is 32bit e0b7da0 fix git-subtree-dir: subtree/rabit git-subtree-split: e81a11dd7ee3cff87a38a42901315821df018bae
127 lines
5.2 KiB
C
127 lines
5.2 KiB
C
/*!
|
|
* Copyright by Contributors
|
|
* \file rabit_wrapper.h
|
|
* \author Tianqi Chen
|
|
* \brief a C style wrapper of rabit
|
|
* can be used to create wrapper of other languages
|
|
*/
|
|
#ifndef RABIT_WRAPPER_H_
|
|
#define RABIT_WRAPPER_H_
|
|
#ifdef _MSC_VER
|
|
#define RABIT_DLL __declspec(dllexport)
|
|
#else
|
|
#define RABIT_DLL
|
|
#endif
|
|
// manually define unsign long
|
|
typedef unsigned long rbt_ulong; // NOLINT(*)
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
/*!
|
|
* \brief intialize the rabit module, call this once before using anything
|
|
* \param argc number of arguments in argv
|
|
* \param argv the array of input arguments
|
|
*/
|
|
RABIT_DLL void RabitInit(int argc, char *argv[]);
|
|
/*!
|
|
* \brief finalize the rabit engine, call this function after you finished all jobs
|
|
*/
|
|
RABIT_DLL void RabitFinalize(void);
|
|
/*! \brief get rank of current process */
|
|
RABIT_DLL int RabitGetRank(void);
|
|
/*! \brief get total number of process */
|
|
RABIT_DLL int RabitGetWorldSize(void);
|
|
/*!
|
|
* \brief print the msg to the tracker,
|
|
* this function can be used to communicate the information of the progress to
|
|
* the user who monitors the tracker
|
|
* \param msg the message to be printed
|
|
*/
|
|
RABIT_DLL void RabitTrackerPrint(const char *msg);
|
|
/*!
|
|
* \brief get name of processor
|
|
* \param out_name hold output string
|
|
* \param out_len hold length of output string
|
|
* \param max_len maximum buffer length of input
|
|
*/
|
|
RABIT_DLL void RabitGetProcessorName(char *out_name,
|
|
rbt_ulong *out_len,
|
|
rbt_ulong max_len);
|
|
/*!
|
|
* \brief broadcast an memory region to all others from root
|
|
*
|
|
* Example: int a = 1; Broadcast(&a, sizeof(a), root);
|
|
* \param sendrecv_data the pointer to send or recive buffer,
|
|
* \param size the size of the data
|
|
* \param root the root of process
|
|
*/
|
|
RABIT_DLL void RabitBroadcast(void *sendrecv_data,
|
|
rbt_ulong size, int root);
|
|
/*!
|
|
* \brief perform in-place allreduce, on sendrecvbuf
|
|
* this function is NOT thread-safe
|
|
*
|
|
* Example Usage: the following code gives sum of the result
|
|
* vector<int> data(10);
|
|
* ...
|
|
* Allreduce<op::Sum>(&data[0], data.size());
|
|
* ...
|
|
* \param sendrecvbuf buffer for both sending and recving data
|
|
* \param count number of elements to be reduced
|
|
* \param enum_dtype the enumeration of data type, see rabit::engine::mpi::DataType in engine.h of rabit include
|
|
* \param enum_op the enumeration of operation type, see rabit::engine::mpi::OpType in engine.h of rabit
|
|
* \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg)
|
|
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
|
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
|
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
|
*/
|
|
RABIT_DLL void RabitAllreduce(void *sendrecvbuf,
|
|
size_t count,
|
|
int enum_dtype,
|
|
int enum_op,
|
|
void (*prepare_fun)(void *arg),
|
|
void *prepare_arg);
|
|
|
|
/*!
|
|
* \brief load latest check point
|
|
* \param out_global_model hold output of serialized global_model
|
|
* \param out_global_len the output length of serialized global model
|
|
* \param out_local_model hold output of serialized local_model, can be NULL
|
|
* \param out_local_len the output length of serialized local model, can be NULL
|
|
*
|
|
* \return the version number of check point loaded
|
|
* if returned version == 0, this means no model has been CheckPointed
|
|
* nothing will be touched
|
|
*/
|
|
RABIT_DLL int RabitLoadCheckPoint(char **out_global_model,
|
|
rbt_ulong *out_global_len,
|
|
char **out_local_model,
|
|
rbt_ulong *out_local_len);
|
|
/*!
|
|
* \brief checkpoint the model, meaning we finished a stage of execution
|
|
* every time we call check point, there is a version number which will increase by one
|
|
*
|
|
* \param global_model hold content of serialized global_model
|
|
* \param global_len the content length of serialized global model
|
|
* \param local_model hold content of serialized local_model, can be NULL
|
|
* \param local_len the content length of serialized local model, can be NULL
|
|
*
|
|
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
|
* bring replication cost in CheckPoint function. global_model do not need explicit replication.
|
|
* So only CheckPoint with global_model if possible
|
|
*/
|
|
RABIT_DLL void RabitCheckPoint(const char *global_model,
|
|
rbt_ulong global_len,
|
|
const char *local_model,
|
|
rbt_ulong local_len);
|
|
/*!
|
|
* \return version number of current stored model,
|
|
* which means how many calls to CheckPoint we made so far
|
|
*/
|
|
RABIT_DLL int RabitVersionNumber(void);
|
|
#ifdef __cplusplus
|
|
} // C
|
|
#endif
|
|
#endif // RABIT_WRAPPER_H_
|