Squashed 'subtree/rabit/' content from commit c7282ac
git-subtree-dir: subtree/rabit
git-subtree-split: c7282acb2a
This commit is contained in:
306
wrapper/rabit.py
Normal file
306
wrapper/rabit.py
Normal file
@@ -0,0 +1,306 @@
|
||||
"""
|
||||
Python interface for rabit
|
||||
Reliable Allreduce and Broadcast Library
|
||||
Author: Tianqi Chen
|
||||
"""
|
||||
import cPickle as pickle
|
||||
import ctypes
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
import numpy as np
|
||||
|
||||
if os.name == 'nt':
|
||||
WRAPPER_PATH = os.path.dirname(__file__) + '\\..\\windows\\x64\\Release\\rabit_wrapper%s.dll'
|
||||
else:
|
||||
WRAPPER_PATH = os.path.dirname(__file__) + '/librabit_wrapper%s.so'
|
||||
rbtlib = None
|
||||
|
||||
# load in xgboost library
|
||||
def loadlib__(lib = 'standard'):
|
||||
global rbtlib
|
||||
if rbtlib != None:
|
||||
warnings.Warn('rabit.int call was ignored because it has already been initialized', level = 2)
|
||||
return
|
||||
if lib == 'standard':
|
||||
rbtlib = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '')
|
||||
elif lib == 'mock':
|
||||
rbtlib = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '_mock')
|
||||
elif lib == 'mpi':
|
||||
rbtlib = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '_mpi')
|
||||
else:
|
||||
raise Exception('unknown rabit lib %s, can be standard, mock, mpi' % lib)
|
||||
rbtlib.RabitGetRank.restype = ctypes.c_int
|
||||
rbtlib.RabitGetWorldSize.restype = ctypes.c_int
|
||||
rbtlib.RabitVersionNumber.restype = ctypes.c_int
|
||||
|
||||
def unloadlib__():
|
||||
global rbtlib
|
||||
del rbtlib
|
||||
rbtlib = None
|
||||
|
||||
# reduction operators
|
||||
MAX = 0
|
||||
MIN = 1
|
||||
SUM = 2
|
||||
BITOR = 3
|
||||
|
||||
def check_err__():
|
||||
"""
|
||||
reserved function used to check error
|
||||
"""
|
||||
return
|
||||
|
||||
def init(args = sys.argv, lib = 'standard'):
|
||||
"""
|
||||
intialize the rabit module, call this once before using anything
|
||||
Arguments:
|
||||
args: list(string) [default=sys.argv]
|
||||
the list of arguments used to initialized the rabit
|
||||
usually you need to pass in sys.argv
|
||||
with_mock: boolean [default=False]
|
||||
Whether initialize the mock test module
|
||||
"""
|
||||
loadlib__(lib)
|
||||
arr = (ctypes.c_char_p * len(args))()
|
||||
arr[:] = args
|
||||
rbtlib.RabitInit(len(args), arr)
|
||||
check_err__()
|
||||
|
||||
def finalize():
|
||||
"""
|
||||
finalize the rabit engine, call this function after you finished all jobs
|
||||
"""
|
||||
rbtlib.RabitFinalize()
|
||||
check_err__()
|
||||
unloadlib__()
|
||||
|
||||
def get_rank():
|
||||
"""
|
||||
Returns rank of current process
|
||||
"""
|
||||
ret = rbtlib.RabitGetRank()
|
||||
check_err__()
|
||||
return ret
|
||||
|
||||
def get_world_size():
|
||||
"""
|
||||
Returns get total number of process
|
||||
"""
|
||||
ret = rbtlib.RabitGetWorlSize()
|
||||
check_err__()
|
||||
return ret
|
||||
|
||||
def tracker_print(msg):
|
||||
"""
|
||||
print message to the tracker
|
||||
this function can be used to communicate the information of the progress
|
||||
to the tracker
|
||||
"""
|
||||
if not isinstance(msg, str):
|
||||
msg = str(msg)
|
||||
rbtlib.RabitTrackerPrint(ctypes.c_char_p(msg).encode('utf-8'))
|
||||
check_err__()
|
||||
|
||||
def get_processor_name():
|
||||
"""
|
||||
Returns the name of processor(host)
|
||||
"""
|
||||
mxlen = 256
|
||||
length = ctypes.c_ulong()
|
||||
buf = ctypes.create_string_buffer(mxlen)
|
||||
rbtlib.RabitGetProcessorName(buf, ctypes.byref(length),
|
||||
mxlen)
|
||||
check_err__()
|
||||
return buf.value
|
||||
|
||||
def broadcast(data, root):
|
||||
"""
|
||||
broadcast object from one node to all other nodes
|
||||
this function will return the broadcasted object
|
||||
|
||||
Example: the following example broadcast hello from rank 0 to all other nodes
|
||||
```python
|
||||
rabit.init()
|
||||
n = 3
|
||||
rank = rabit.get_rank()
|
||||
s = None
|
||||
if rank == 0:
|
||||
s = {'hello world':100, 2:3}
|
||||
print '@node[%d] before-broadcast: s=\"%s\"' % (rank, str(s))
|
||||
s = rabit.broadcast(s, 0)
|
||||
print '@node[%d] after-broadcast: s=\"%s\"' % (rank, str(s))
|
||||
rabit.finalize()
|
||||
```
|
||||
|
||||
Arguments:
|
||||
data: anytype that can be pickled
|
||||
input data, if current rank does not equal root, this can be None
|
||||
root: int
|
||||
rank of the node to broadcast data from
|
||||
Returns:
|
||||
the result of broadcast
|
||||
"""
|
||||
rank = get_rank()
|
||||
length = ctypes.c_ulong()
|
||||
if root == rank:
|
||||
assert data is not None, 'need to pass in data when broadcasting'
|
||||
s = pickle.dumps(data, protocol = pickle.HIGHEST_PROTOCOL)
|
||||
length.value = len(s)
|
||||
# run first broadcast
|
||||
rbtlib.RabitBroadcast(ctypes.byref(length),
|
||||
ctypes.sizeof(ctypes.c_ulong),
|
||||
root)
|
||||
check_err__()
|
||||
if root != rank:
|
||||
dptr = (ctypes.c_char * length.value)()
|
||||
# run second
|
||||
rbtlib.RabitBroadcast(ctypes.cast(dptr, ctypes.c_void_p),
|
||||
length.value, root)
|
||||
check_err__()
|
||||
data = pickle.loads(dptr.raw)
|
||||
del dptr
|
||||
else:
|
||||
rbtlib.RabitBroadcast(ctypes.cast(ctypes.c_char_p(s), ctypes.c_void_p),
|
||||
length.value, root)
|
||||
check_err__()
|
||||
del s
|
||||
return data
|
||||
|
||||
# enumeration of dtypes
|
||||
DTYPE_ENUM__ = {
|
||||
np.dtype('int8') : 0,
|
||||
np.dtype('uint8') : 1,
|
||||
np.dtype('int32') : 2,
|
||||
np.dtype('uint32') : 3,
|
||||
np.dtype('int64') : 4,
|
||||
np.dtype('uint64') : 5,
|
||||
np.dtype('float32') : 6,
|
||||
np.dtype('float64') : 7
|
||||
}
|
||||
|
||||
def allreduce(data, op, prepare_fun = None):
|
||||
"""
|
||||
perform allreduce, return the result, this function is not thread-safe
|
||||
Arguments:
|
||||
data: numpy ndarray
|
||||
input data
|
||||
op: int
|
||||
reduction operators, can be MIN, MAX, SUM, BITOR
|
||||
prepare_fun: lambda data
|
||||
Lazy preprocessing function, if it is not None, prepare_fun(data)
|
||||
will be called by the function before performing allreduce, to intialize the data
|
||||
If the result of Allreduce can be recovered directly, then prepare_fun will NOT be called
|
||||
Returns:
|
||||
the result of allreduce, have same shape as data
|
||||
"""
|
||||
if not isinstance(data, np.ndarray):
|
||||
raise Exception('allreduce only takes in numpy.ndarray')
|
||||
buf = data.ravel()
|
||||
if buf.base is data.base:
|
||||
buf = buf.copy()
|
||||
if buf.dtype not in DTYPE_ENUM__:
|
||||
raise Exception('data type %s not supported' % str(buf.dtype))
|
||||
if prepare_fun is None:
|
||||
rbtlib.RabitAllreduce(buf.ctypes.data_as(ctypes.c_void_p),
|
||||
buf.size, DTYPE_ENUM__[buf.dtype],
|
||||
op, None, None)
|
||||
else:
|
||||
PFUNC = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
|
||||
def pfunc(args):
|
||||
prepare_fun(data)
|
||||
rbtlib.RabitAllreduce(buf.ctypes.data_as(ctypes.c_void_p),
|
||||
buf.size, DTYPE_ENUM__[buf.dtype],
|
||||
op, PFUNC(pfunc), None)
|
||||
check_err__()
|
||||
return buf
|
||||
|
||||
|
||||
def load_model__(ptr, length):
|
||||
"""
|
||||
Internal function used by the module,
|
||||
unpickle a model from a buffer specified by ptr, length
|
||||
Arguments:
|
||||
ptr: ctypes.POINTER(ctypes._char)
|
||||
pointer to the memory region of buffer
|
||||
length: int
|
||||
the length of buffer
|
||||
"""
|
||||
data = (ctypes.c_char * length).from_address(ctypes.addressof(ptr.contents))
|
||||
return pickle.loads(data.raw)
|
||||
|
||||
def load_checkpoint(with_local = False):
|
||||
"""
|
||||
load latest check point
|
||||
Arguments:
|
||||
with_local: boolean [default = False]
|
||||
whether the checkpoint contains local model
|
||||
Returns:
|
||||
if with_local: return (version, gobal_model, local_model)
|
||||
else return (version, gobal_model)
|
||||
if returned version == 0, this means no model has been CheckPointed
|
||||
and global_model, local_model returned will be None
|
||||
"""
|
||||
gp = ctypes.POINTER(ctypes.c_char)()
|
||||
global_len = ctypes.c_ulong()
|
||||
if with_local:
|
||||
lp = ctypes.POINTER(ctypes.c_char)()
|
||||
local_len = ctypes.c_ulong()
|
||||
version = rbtlib.RabitLoadCheckPoint(
|
||||
ctypes.byref(gp),
|
||||
ctypes.byref(global_len),
|
||||
ctypes.byref(lp),
|
||||
ctypes.byref(local_len))
|
||||
check_err__()
|
||||
if version == 0:
|
||||
return (version, None, None)
|
||||
return (version,
|
||||
load_model__(gp, global_len.value),
|
||||
load_model__(lp, local_len.value))
|
||||
else:
|
||||
version = rbtlib.RabitLoadCheckPoint(
|
||||
ctypes.byref(gp),
|
||||
ctypes.byref(global_len),
|
||||
None, None)
|
||||
check_err__()
|
||||
if version == 0:
|
||||
return (version, None)
|
||||
return (version,
|
||||
load_model__(gp, global_len.value))
|
||||
|
||||
def checkpoint(global_model, local_model = None):
|
||||
"""
|
||||
checkpoint the model, meaning we finished a stage of execution
|
||||
every time we call check point, there is a version number which will increase by one
|
||||
|
||||
Arguments:
|
||||
global_model: anytype that can be pickled
|
||||
globally shared model/state when calling this function,
|
||||
the caller need to gauranttees that global_model is the same in all nodes
|
||||
local_model: anytype that can be pickled
|
||||
local model, that is specific to current node/rank.
|
||||
This can be None when no local state is needed.
|
||||
local_model requires explicit replication of the model for fault-tolerance,
|
||||
which will bring replication cost in checkpoint function,
|
||||
while global_model do not need explicit replication.
|
||||
It is recommended to use global_model if possible
|
||||
"""
|
||||
sg = pickle.dumps(global_model)
|
||||
if local_model is None:
|
||||
rbtlib.RabitCheckPoint(sg, len(sg), None, 0)
|
||||
check_err__()
|
||||
del sg;
|
||||
else:
|
||||
sl = pickle.dumps(local_model)
|
||||
rbtlib.RabitCheckPoint(sg, len(sg), sl, len(sl))
|
||||
check_err__()
|
||||
del sl; del sg;
|
||||
|
||||
def version_number():
|
||||
"""
|
||||
Returns version number of current stored model,
|
||||
which means how many calls to CheckPoint we made so far
|
||||
"""
|
||||
ret = rbtlib.RabitVersionNumber()
|
||||
check_err__()
|
||||
return ret
|
||||
239
wrapper/rabit_wrapper.cc
Normal file
239
wrapper/rabit_wrapper.cc
Normal file
@@ -0,0 +1,239 @@
|
||||
// implementations in ctypes
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
|
||||
#include <rabit.h>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include "./rabit_wrapper.h"
|
||||
namespace rabit {
|
||||
namespace wrapper {
|
||||
// helper use to avoid BitOR operator
|
||||
template<typename OP, typename DType>
|
||||
struct FHelper {
|
||||
inline static void
|
||||
Allreduce(DType *senrecvbuf_,
|
||||
size_t count,
|
||||
void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg) {
|
||||
rabit::Allreduce<OP>(senrecvbuf_, count,
|
||||
prepare_fun, prepare_arg);
|
||||
}
|
||||
};
|
||||
template<typename DType>
|
||||
struct FHelper<op::BitOR, DType> {
|
||||
inline static void
|
||||
Allreduce(DType *senrecvbuf_,
|
||||
size_t count,
|
||||
void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg) {
|
||||
utils::Error("DataType does not support bitwise or operation");
|
||||
}
|
||||
};
|
||||
template<typename OP>
|
||||
inline void Allreduce_(void *sendrecvbuf_,
|
||||
size_t count,
|
||||
engine::mpi::DataType enum_dtype,
|
||||
void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg) {
|
||||
using namespace engine::mpi;
|
||||
switch (enum_dtype) {
|
||||
case kChar:
|
||||
rabit::Allreduce<OP>
|
||||
(static_cast<char*>(sendrecvbuf_),
|
||||
count, prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kUChar:
|
||||
rabit::Allreduce<OP>
|
||||
(static_cast<unsigned char*>(sendrecvbuf_),
|
||||
count, prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kInt:
|
||||
rabit::Allreduce<OP>
|
||||
(static_cast<int*>(sendrecvbuf_),
|
||||
count, prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kUInt:
|
||||
rabit::Allreduce<OP>
|
||||
(static_cast<unsigned*>(sendrecvbuf_),
|
||||
count, prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kLong:
|
||||
rabit::Allreduce<OP>
|
||||
(static_cast<long*>(sendrecvbuf_),
|
||||
count, prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kULong:
|
||||
rabit::Allreduce<OP>
|
||||
(static_cast<unsigned long*>(sendrecvbuf_),
|
||||
count, prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kFloat:
|
||||
FHelper<OP, float>::Allreduce
|
||||
(static_cast<float*>(sendrecvbuf_),
|
||||
count, prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kDouble:
|
||||
FHelper<OP, double>::Allreduce
|
||||
(static_cast<double*>(sendrecvbuf_),
|
||||
count, prepare_fun, prepare_arg);
|
||||
return;
|
||||
default: utils::Error("unknown data_type");
|
||||
}
|
||||
}
|
||||
inline void Allreduce(void *sendrecvbuf,
|
||||
size_t count,
|
||||
engine::mpi::DataType enum_dtype,
|
||||
engine::mpi::OpType enum_op,
|
||||
void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg) {
|
||||
using namespace engine::mpi;
|
||||
switch (enum_op) {
|
||||
case kMax:
|
||||
Allreduce_<op::Max>
|
||||
(sendrecvbuf,
|
||||
count, enum_dtype,
|
||||
prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kMin:
|
||||
Allreduce_<op::Min>
|
||||
(sendrecvbuf,
|
||||
count, enum_dtype,
|
||||
prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kSum:
|
||||
Allreduce_<op::Sum>
|
||||
(sendrecvbuf,
|
||||
count, enum_dtype,
|
||||
prepare_fun, prepare_arg);
|
||||
return;
|
||||
case kBitwiseOR:
|
||||
Allreduce_<op::BitOR>
|
||||
(sendrecvbuf,
|
||||
count, enum_dtype,
|
||||
prepare_fun, prepare_arg);
|
||||
return;
|
||||
default: utils::Error("unknown enum_op");
|
||||
}
|
||||
}
|
||||
// temporal memory for global and local model
|
||||
std::string global_buffer, local_buffer;
|
||||
// wrapper for serialization
|
||||
struct ReadWrapper : public ISerializable {
|
||||
std::string *p_str;
|
||||
explicit ReadWrapper(std::string *p_str)
|
||||
: p_str(p_str) {}
|
||||
virtual void Load(IStream &fi) {
|
||||
uint64_t sz;
|
||||
utils::Assert(fi.Read(&sz, sizeof(sz)) != 0,
|
||||
"Read pickle string");
|
||||
p_str->resize(sz);
|
||||
if (sz != 0) {
|
||||
utils::Assert(fi.Read(&(*p_str)[0], sizeof(char) * sz) != 0,
|
||||
"Read pickle string");
|
||||
}
|
||||
}
|
||||
virtual void Save(IStream &fo) const {
|
||||
utils::Error("not implemented");
|
||||
}
|
||||
};
|
||||
struct WriteWrapper : public ISerializable {
|
||||
const char *data;
|
||||
size_t length;
|
||||
explicit WriteWrapper(const char *data,
|
||||
size_t length)
|
||||
: data(data), length(length) {
|
||||
}
|
||||
virtual void Load(IStream &fi) {
|
||||
utils::Error("not implemented");
|
||||
}
|
||||
virtual void Save(IStream &fo) const {
|
||||
uint64_t sz = static_cast<uint16_t>(length);
|
||||
fo.Write(&sz, sizeof(sz));
|
||||
fo.Write(data, length * sizeof(char));
|
||||
}
|
||||
};
|
||||
} // namespace wrapper
|
||||
} // namespace rabit
|
||||
extern "C" {
|
||||
void RabitInit(int argc, char *argv[]) {
|
||||
rabit::Init(argc, argv);
|
||||
}
|
||||
void RabitFinalize(void) {
|
||||
rabit::Finalize();
|
||||
}
|
||||
int RabitGetRank(void) {
|
||||
return rabit::GetRank();
|
||||
}
|
||||
int RabitGetWorldSize(void) {
|
||||
return rabit::GetWorldSize();
|
||||
}
|
||||
void RabitTrackerPrint(const char *msg) {
|
||||
std::string m(msg);
|
||||
rabit::TrackerPrint(m);
|
||||
}
|
||||
void RabitGetProcessorName(char *out_name,
|
||||
rbt_ulong *out_len,
|
||||
rbt_ulong max_len) {
|
||||
std::string s = rabit::GetProcessorName();
|
||||
if (s.length() > max_len) {
|
||||
s.resize(max_len - 1);
|
||||
}
|
||||
strcpy(out_name, s.c_str());
|
||||
*out_len = static_cast<rbt_ulong>(s.length());
|
||||
}
|
||||
void RabitBroadcast(void *sendrecv_data,
|
||||
rbt_ulong size, int root) {
|
||||
rabit::Broadcast(sendrecv_data, size, root);
|
||||
}
|
||||
void RabitAllreduce(void *sendrecvbuf,
|
||||
size_t count,
|
||||
int enum_dtype,
|
||||
int enum_op,
|
||||
void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg) {
|
||||
rabit::wrapper::Allreduce
|
||||
(sendrecvbuf, count,
|
||||
static_cast<rabit::engine::mpi::DataType>(enum_dtype),
|
||||
static_cast<rabit::engine::mpi::OpType>(enum_op),
|
||||
prepare_fun, prepare_arg);
|
||||
}
|
||||
int RabitLoadCheckPoint(char **out_global_model,
|
||||
rbt_ulong *out_global_len,
|
||||
char **out_local_model,
|
||||
rbt_ulong *out_local_len) {
|
||||
using rabit::BeginPtr;
|
||||
using namespace rabit::wrapper;
|
||||
ReadWrapper sg(&global_buffer);
|
||||
ReadWrapper sl(&local_buffer);
|
||||
int version;
|
||||
if (out_local_model == NULL) {
|
||||
version = rabit::LoadCheckPoint(&sg, NULL);
|
||||
*out_global_model = BeginPtr(global_buffer);
|
||||
*out_global_len = static_cast<rbt_ulong>(global_buffer.length());
|
||||
} else {
|
||||
version = rabit::LoadCheckPoint(&sg, &sl);
|
||||
*out_global_model = BeginPtr(global_buffer);
|
||||
*out_global_len = static_cast<rbt_ulong>(global_buffer.length());
|
||||
*out_local_model = BeginPtr(local_buffer);
|
||||
*out_local_len = static_cast<rbt_ulong>(local_buffer.length());
|
||||
}
|
||||
return version;
|
||||
}
|
||||
void RabitCheckPoint(const char *global_model,
|
||||
rbt_ulong global_len,
|
||||
const char *local_model,
|
||||
rbt_ulong local_len) {
|
||||
using namespace rabit::wrapper;
|
||||
WriteWrapper sg(global_model, global_len);
|
||||
WriteWrapper sl(local_model, local_len);
|
||||
if (local_model == NULL) {
|
||||
rabit::CheckPoint(&sg, NULL);
|
||||
} else {
|
||||
rabit::CheckPoint(&sg, &sl);
|
||||
}
|
||||
}
|
||||
int RabitVersionNumber(void) {
|
||||
return rabit::VersionNumber();
|
||||
}
|
||||
}
|
||||
125
wrapper/rabit_wrapper.h
Normal file
125
wrapper/rabit_wrapper.h
Normal file
@@ -0,0 +1,125 @@
|
||||
#ifndef RABIT_WRAPPER_H_
|
||||
#define RABIT_WRAPPER_H_
|
||||
/*!
|
||||
* \file rabit_wrapper.h
|
||||
* \author Tianqi Chen
|
||||
* \brief a C style wrapper of rabit
|
||||
* can be used to create wrapper of other languages
|
||||
*/
|
||||
#ifdef _MSC_VER
|
||||
#define RABIT_DLL __declspec(dllexport)
|
||||
#else
|
||||
#define RABIT_DLL
|
||||
#endif
|
||||
// manually define unsign long
|
||||
typedef unsigned long rbt_ulong;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/*!
|
||||
* \brief intialize the rabit module, call this once before using anything
|
||||
* \param argc number of arguments in argv
|
||||
* \param argv the array of input arguments
|
||||
*/
|
||||
RABIT_DLL void RabitInit(int argc, char *argv[]);
|
||||
/*!
|
||||
* \brief finalize the rabit engine, call this function after you finished all jobs
|
||||
*/
|
||||
RABIT_DLL void RabitFinalize(void);
|
||||
/*! \brief get rank of current process */
|
||||
RABIT_DLL int RabitGetRank(void);
|
||||
/*! \brief get total number of process */
|
||||
RABIT_DLL int RabitGetWorldSize(void);
|
||||
/*!
|
||||
* \brief print the msg to the tracker,
|
||||
* this function can be used to communicate the information of the progress to
|
||||
* the user who monitors the tracker
|
||||
* \param msg the message to be printed
|
||||
*/
|
||||
RABIT_DLL void RabitTrackerPrint(const char *msg);
|
||||
/*!
|
||||
* \brief get name of processor
|
||||
* \param out_name hold output string
|
||||
* \param out_len hold length of output string
|
||||
* \param max_len maximum buffer length of input
|
||||
*/
|
||||
RABIT_DLL void RabitGetProcessorName(char *out_name,
|
||||
rbt_ulong *out_len,
|
||||
rbt_ulong max_len);
|
||||
/*!
|
||||
* \brief broadcast an memory region to all others from root
|
||||
*
|
||||
* Example: int a = 1; Broadcast(&a, sizeof(a), root);
|
||||
* \param sendrecv_data the pointer to send or recive buffer,
|
||||
* \param size the size of the data
|
||||
* \param root the root of process
|
||||
*/
|
||||
RABIT_DLL void RabitBroadcast(void *sendrecv_data,
|
||||
rbt_ulong size, int root);
|
||||
/*!
|
||||
* \brief perform in-place allreduce, on sendrecvbuf
|
||||
* this function is NOT thread-safe
|
||||
*
|
||||
* Example Usage: the following code gives sum of the result
|
||||
* vector<int> data(10);
|
||||
* ...
|
||||
* Allreduce<op::Sum>(&data[0], data.size());
|
||||
* ...
|
||||
* \param sendrecvbuf buffer for both sending and recving data
|
||||
* \param count number of elements to be reduced
|
||||
* \param enum_dtype the enumeration of data type, see rabit::engine::mpi::DataType in engine.h of rabit include
|
||||
* \param enum_op the enumeration of operation type, see rabit::engine::mpi::OpType in engine.h of rabit
|
||||
* \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg)
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
*/
|
||||
RABIT_DLL void RabitAllreduce(void *sendrecvbuf,
|
||||
size_t count,
|
||||
int enum_dtype,
|
||||
int enum_op,
|
||||
void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg);
|
||||
|
||||
/*!
|
||||
* \brief load latest check point
|
||||
* \param out_global_model hold output of serialized global_model
|
||||
* \param out_global_len the output length of serialized global model
|
||||
* \param out_local_model hold output of serialized local_model, can be NULL
|
||||
* \param out_local_len the output length of serialized local model, can be NULL
|
||||
*
|
||||
* \return the version number of check point loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* nothing will be touched
|
||||
*/
|
||||
RABIT_DLL int RabitLoadCheckPoint(char **out_global_model,
|
||||
rbt_ulong *out_global_len,
|
||||
char **out_local_model,
|
||||
rbt_ulong *out_local_len);
|
||||
/*!
|
||||
* \brief checkpoint the model, meaning we finished a stage of execution
|
||||
* every time we call check point, there is a version number which will increase by one
|
||||
*
|
||||
* \param global_model hold content of serialized global_model
|
||||
* \param global_len the content length of serialized global model
|
||||
* \param local_model hold content of serialized local_model, can be NULL
|
||||
* \param local_len the content length of serialized local model, can be NULL
|
||||
*
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in CheckPoint function. global_model do not need explicit replication.
|
||||
* So only CheckPoint with global_model if possible
|
||||
*/
|
||||
RABIT_DLL void RabitCheckPoint(const char *global_model,
|
||||
rbt_ulong global_len,
|
||||
const char *local_model,
|
||||
rbt_ulong local_len);
|
||||
/*!
|
||||
* \return version number of current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
*/
|
||||
RABIT_DLL int RabitVersionNumber(void);
|
||||
#ifdef __cplusplus
|
||||
} // C
|
||||
#endif
|
||||
#endif // XGBOOST_WRAPPER_H_
|
||||
Reference in New Issue
Block a user