add requirments
This commit is contained in:
parent
744f9015bb
commit
270a49ee75
4
Makefile
4
Makefile
@ -28,7 +28,7 @@ ALIB= lib/librabit.a lib/librabit_mpi.a lib/librabit_empty.a lib/librabit_mock.a
|
|||||||
HEADERS=src/*.h include/*.h include/rabit/*.h
|
HEADERS=src/*.h include/*.h include/rabit/*.h
|
||||||
DMLC=dmlc-core
|
DMLC=dmlc-core
|
||||||
|
|
||||||
.PHONY: clean all install mpi python lint doc
|
.PHONY: clean all install mpi python lint doc doxygen
|
||||||
|
|
||||||
all: lib/librabit.a lib/librabit_mock.a wrapper/librabit_wrapper.so wrapper/librabit_wrapper_mock.so lib/librabit_base.a
|
all: lib/librabit.a lib/librabit_mock.a wrapper/librabit_wrapper.so wrapper/librabit_wrapper_mock.so lib/librabit_base.a
|
||||||
mpi: lib/librabit_mpi.a wrapper/librabit_wrapper_mpi.so
|
mpi: lib/librabit_mpi.a wrapper/librabit_wrapper_mpi.so
|
||||||
@ -68,7 +68,7 @@ $(SLIB) :
|
|||||||
lint:
|
lint:
|
||||||
$(DMLC)/scripts/lint.py rabit $(LINT_LANG) src include wrapper
|
$(DMLC)/scripts/lint.py rabit $(LINT_LANG) src include wrapper
|
||||||
|
|
||||||
doc:
|
doc doxygen:
|
||||||
cd include; doxygen ../doc/Doxyfile; cd -
|
cd include; doxygen ../doc/Doxyfile; cd -
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
|||||||
@ -138,7 +138,7 @@ inline void Broadcast(std::string *sendrecv_data, int root);
|
|||||||
*/
|
*/
|
||||||
template<typename OP, typename DType>
|
template<typename OP, typename DType>
|
||||||
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||||
void (*prepare_fun)(void *arg) = NULL,
|
void (*prepare_fun)(void *) = NULL,
|
||||||
void *prepare_arg = NULL);
|
void *prepare_arg = NULL);
|
||||||
// C++11 support for lambda prepare function
|
// C++11 support for lambda prepare function
|
||||||
#if DMLC_USE_CXX11
|
#if DMLC_USE_CXX11
|
||||||
@ -262,7 +262,7 @@ class Reducer {
|
|||||||
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
||||||
*/
|
*/
|
||||||
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||||
void (*prepare_fun)(void *arg) = NULL,
|
void (*prepare_fun)(void *) = NULL,
|
||||||
void *prepare_arg = NULL);
|
void *prepare_arg = NULL);
|
||||||
#if DMLC_USE_CXX11
|
#if DMLC_USE_CXX11
|
||||||
/*!
|
/*!
|
||||||
@ -306,7 +306,7 @@ class SerializeReducer {
|
|||||||
*/
|
*/
|
||||||
inline void Allreduce(DType *sendrecvobj,
|
inline void Allreduce(DType *sendrecvobj,
|
||||||
size_t max_nbyte, size_t count,
|
size_t max_nbyte, size_t count,
|
||||||
void (*prepare_fun)(void *arg) = NULL,
|
void (*prepare_fun)(void *) = NULL,
|
||||||
void *prepare_arg = NULL);
|
void *prepare_arg = NULL);
|
||||||
// C++11 support for lambda prepare function
|
// C++11 support for lambda prepare function
|
||||||
#if DMLC_USE_CXX11
|
#if DMLC_USE_CXX11
|
||||||
|
|||||||
291
wrapper/rabit.py
291
wrapper/rabit.py
@ -1,6 +1,6 @@
|
|||||||
"""
|
"""
|
||||||
Python interface for rabit
|
Reliable Allreduce and Broadcast Library.
|
||||||
Reliable Allreduce and Broadcast Library
|
|
||||||
Author: Tianqi Chen
|
Author: Tianqi Chen
|
||||||
"""
|
"""
|
||||||
# pylint: disable=unused-argument,invalid-name,global-statement,dangerous-default-value,
|
# pylint: disable=unused-argument,invalid-name,global-statement,dangerous-default-value,
|
||||||
@ -11,37 +11,42 @@ import sys
|
|||||||
import warnings
|
import warnings
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
# version information about the doc
|
||||||
|
__version__ = '1.0'
|
||||||
|
|
||||||
|
|
||||||
if os.name == 'nt':
|
if os.name == 'nt':
|
||||||
WRAPPER_PATH = os.path.dirname(__file__) + '\\..\\windows\\x64\\Release\\rabit_wrapper%s.dll'
|
WRAPPER_PATH = os.path.dirname(__file__) + '\\..\\windows\\x64\\Release\\rabit_wrapper%s.dll'
|
||||||
else:
|
else:
|
||||||
WRAPPER_PATH = os.path.dirname(__file__) + '/librabit_wrapper%s.so'
|
WRAPPER_PATH = os.path.dirname(__file__) + '/librabit_wrapper%s.so'
|
||||||
rbtlib = None
|
|
||||||
|
_LIB = None
|
||||||
|
|
||||||
# load in xgboost library
|
# load in xgboost library
|
||||||
def loadlib__(lib='standard'):
|
def _loadlib(lib='standard'):
|
||||||
"""Load rabit library"""
|
"""Load rabit library."""
|
||||||
global rbtlib
|
global _LIB
|
||||||
if rbtlib != None:
|
if _LIB != None:
|
||||||
warnings.warn('rabit.int call was ignored because it has'\
|
warnings.warn('rabit.int call was ignored because it has'\
|
||||||
' already been initialized', level=2)
|
' already been initialized', level=2)
|
||||||
return
|
return
|
||||||
if lib == 'standard':
|
if lib == 'standard':
|
||||||
rbtlib = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '')
|
_LIB = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '')
|
||||||
elif lib == 'mock':
|
elif lib == 'mock':
|
||||||
rbtlib = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '_mock')
|
_LIB = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '_mock')
|
||||||
elif lib == 'mpi':
|
elif lib == 'mpi':
|
||||||
rbtlib = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '_mpi')
|
_LIB = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '_mpi')
|
||||||
else:
|
else:
|
||||||
raise Exception('unknown rabit lib %s, can be standard, mock, mpi' % lib)
|
raise Exception('unknown rabit lib %s, can be standard, mock, mpi' % lib)
|
||||||
rbtlib.RabitGetRank.restype = ctypes.c_int
|
_LIB.RabitGetRank.restype = ctypes.c_int
|
||||||
rbtlib.RabitGetWorldSize.restype = ctypes.c_int
|
_LIB.RabitGetWorldSize.restype = ctypes.c_int
|
||||||
rbtlib.RabitVersionNumber.restype = ctypes.c_int
|
_LIB.RabitVersionNumber.restype = ctypes.c_int
|
||||||
|
|
||||||
def unloadlib__():
|
def _unloadlib():
|
||||||
"""Unload rabit library"""
|
"""Unload rabit library."""
|
||||||
global rbtlib
|
global _LIB
|
||||||
del rbtlib
|
del _LIB
|
||||||
rbtlib = None
|
_LIB = None
|
||||||
|
|
||||||
# reduction operators
|
# reduction operators
|
||||||
MAX = 0
|
MAX = 0
|
||||||
@ -49,101 +54,110 @@ MIN = 1
|
|||||||
SUM = 2
|
SUM = 2
|
||||||
BITOR = 3
|
BITOR = 3
|
||||||
|
|
||||||
def check_err__():
|
def _check_err():
|
||||||
"""
|
"""Reserved function used to check error.
|
||||||
reserved function used to check error
|
|
||||||
"""
|
"""
|
||||||
return
|
return
|
||||||
|
|
||||||
def init(args=sys.argv, lib='standard'):
|
def init(args=None, lib='standard'):
|
||||||
|
"""Intialize the rabit module, call this once before using anything.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
args: list of str, optional
|
||||||
|
The list of arguments used to initialized the rabit
|
||||||
|
usually you need to pass in sys.argv.
|
||||||
|
Defaults to sys.argv when it is None.
|
||||||
|
lib: {'standard', 'mock', 'mpi'}
|
||||||
|
Type of library we want to load
|
||||||
"""
|
"""
|
||||||
intialize the rabit module, call this once before using anything
|
if args is None:
|
||||||
Arguments:
|
args = sys.argv
|
||||||
args: list(string) [default=sys.argv]
|
_loadlib(lib)
|
||||||
the list of arguments used to initialized the rabit
|
|
||||||
usually you need to pass in sys.argv
|
|
||||||
with_mock: boolean [default=False]
|
|
||||||
Whether initialize the mock test module
|
|
||||||
"""
|
|
||||||
loadlib__(lib)
|
|
||||||
arr = (ctypes.c_char_p * len(args))()
|
arr = (ctypes.c_char_p * len(args))()
|
||||||
arr[:] = args
|
arr[:] = args
|
||||||
rbtlib.RabitInit(len(args), arr)
|
_LIB.RabitInit(len(args), arr)
|
||||||
check_err__()
|
_check_err()
|
||||||
|
|
||||||
def finalize():
|
def finalize():
|
||||||
|
"""Finalize the rabit engine.
|
||||||
|
|
||||||
|
Call this function after you finished all jobs.
|
||||||
"""
|
"""
|
||||||
finalize the rabit engine, call this function after you finished all jobs
|
_LIB.RabitFinalize()
|
||||||
"""
|
_check_err()
|
||||||
rbtlib.RabitFinalize()
|
_unloadlib()
|
||||||
check_err__()
|
|
||||||
unloadlib__()
|
|
||||||
|
|
||||||
def get_rank():
|
def get_rank():
|
||||||
|
"""Get rank of current process.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
rank : int
|
||||||
|
Rank of current process.
|
||||||
"""
|
"""
|
||||||
Returns rank of current process
|
ret = _LIB.RabitGetRank()
|
||||||
"""
|
_check_err()
|
||||||
ret = rbtlib.RabitGetRank()
|
|
||||||
check_err__()
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def get_world_size():
|
def get_world_size():
|
||||||
|
"""Get total number workers.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
n : int
|
||||||
|
Total number of process.
|
||||||
"""
|
"""
|
||||||
Returns get total number of process
|
ret = _LIB.RabitGetWorldSize()
|
||||||
"""
|
_check_err()
|
||||||
ret = rbtlib.RabitGetWorldSize()
|
|
||||||
check_err__()
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def tracker_print(msg):
|
def tracker_print(msg):
|
||||||
"""
|
"""Print message to the tracker.
|
||||||
print message to the tracker
|
|
||||||
this function can be used to communicate the information of the progress
|
This function can be used to communicate the information of
|
||||||
to the tracker
|
the progress to the tracker
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
msg : str
|
||||||
|
The message to be printed to tracker.
|
||||||
"""
|
"""
|
||||||
if not isinstance(msg, str):
|
if not isinstance(msg, str):
|
||||||
msg = str(msg)
|
msg = str(msg)
|
||||||
rbtlib.RabitTrackerPrint(ctypes.c_char_p(msg).encode('utf-8'))
|
_LIB.RabitTrackerPrint(ctypes.c_char_p(msg).encode('utf-8'))
|
||||||
check_err__()
|
_check_err()
|
||||||
|
|
||||||
def get_processor_name():
|
def get_processor_name():
|
||||||
"""
|
"""Get the processor name.
|
||||||
Returns the name of processor(host)
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
name : str
|
||||||
|
the name of processor(host)
|
||||||
"""
|
"""
|
||||||
mxlen = 256
|
mxlen = 256
|
||||||
length = ctypes.c_ulong()
|
length = ctypes.c_ulong()
|
||||||
buf = ctypes.create_string_buffer(mxlen)
|
buf = ctypes.create_string_buffer(mxlen)
|
||||||
rbtlib.RabitGetProcessorName(buf, ctypes.byref(length),
|
_LIB.RabitGetProcessorName(buf, ctypes.byref(length),
|
||||||
mxlen)
|
mxlen)
|
||||||
check_err__()
|
_check_err()
|
||||||
return buf.value
|
return buf.value
|
||||||
|
|
||||||
def broadcast(data, root):
|
def broadcast(data, root):
|
||||||
"""
|
"""Broadcast object from one node to all other nodes.
|
||||||
broadcast object from one node to all other nodes
|
|
||||||
this function will return the broadcasted object
|
|
||||||
|
|
||||||
Example: the following example broadcast hello from rank 0 to all other nodes
|
Parameters
|
||||||
```python
|
----------
|
||||||
rabit.init()
|
data : any type that can be pickled
|
||||||
n = 3
|
Input data, if current rank does not equal root, this can be None
|
||||||
rank = rabit.get_rank()
|
root : int
|
||||||
s = None
|
Rank of the node to broadcast data from.
|
||||||
if rank == 0:
|
|
||||||
s = {'hello world':100, 2:3}
|
|
||||||
print '@node[%d] before-broadcast: s=\"%s\"' % (rank, str(s))
|
|
||||||
s = rabit.broadcast(s, 0)
|
|
||||||
print '@node[%d] after-broadcast: s=\"%s\"' % (rank, str(s))
|
|
||||||
rabit.finalize()
|
|
||||||
```
|
|
||||||
|
|
||||||
Arguments:
|
Returns
|
||||||
data: anytype that can be pickled
|
-------
|
||||||
input data, if current rank does not equal root, this can be None
|
object : int
|
||||||
root: int
|
the result of broadcast.
|
||||||
rank of the node to broadcast data from
|
|
||||||
Returns:
|
|
||||||
the result of broadcast
|
|
||||||
"""
|
"""
|
||||||
rank = get_rank()
|
rank = get_rank()
|
||||||
length = ctypes.c_ulong()
|
length = ctypes.c_ulong()
|
||||||
@ -152,22 +166,22 @@ def broadcast(data, root):
|
|||||||
s = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
|
s = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
|
||||||
length.value = len(s)
|
length.value = len(s)
|
||||||
# run first broadcast
|
# run first broadcast
|
||||||
rbtlib.RabitBroadcast(ctypes.byref(length),
|
_LIB.RabitBroadcast(ctypes.byref(length),
|
||||||
ctypes.sizeof(ctypes.c_ulong),
|
ctypes.sizeof(ctypes.c_ulong),
|
||||||
root)
|
root)
|
||||||
check_err__()
|
_check_err()
|
||||||
if root != rank:
|
if root != rank:
|
||||||
dptr = (ctypes.c_char * length.value)()
|
dptr = (ctypes.c_char * length.value)()
|
||||||
# run second
|
# run second
|
||||||
rbtlib.RabitBroadcast(ctypes.cast(dptr, ctypes.c_void_p),
|
_LIB.RabitBroadcast(ctypes.cast(dptr, ctypes.c_void_p),
|
||||||
length.value, root)
|
length.value, root)
|
||||||
check_err__()
|
_check_err()
|
||||||
data = pickle.loads(dptr.raw)
|
data = pickle.loads(dptr.raw)
|
||||||
del dptr
|
del dptr
|
||||||
else:
|
else:
|
||||||
rbtlib.RabitBroadcast(ctypes.cast(ctypes.c_char_p(s), ctypes.c_void_p),
|
_LIB.RabitBroadcast(ctypes.cast(ctypes.c_char_p(s), ctypes.c_void_p),
|
||||||
length.value, root)
|
length.value, root)
|
||||||
check_err__()
|
_check_err()
|
||||||
del s
|
del s
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@ -184,20 +198,28 @@ DTYPE_ENUM__ = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
def allreduce(data, op, prepare_fun=None):
|
def allreduce(data, op, prepare_fun=None):
|
||||||
"""
|
"""Perform allreduce, return the result.
|
||||||
perform allreduce, return the result, this function is not thread-safe
|
|
||||||
Arguments:
|
Parameters
|
||||||
data: numpy ndarray
|
----------
|
||||||
input data
|
data: numpy array
|
||||||
|
Input data.
|
||||||
op: int
|
op: int
|
||||||
reduction operators, can be MIN, MAX, SUM, BITOR
|
Reduction operators, can be MIN, MAX, SUM, BITOR
|
||||||
prepare_fun: lambda data
|
prepare_fun: function
|
||||||
Lazy preprocessing function, if it is not None, prepare_fun(data)
|
Lazy preprocessing function, if it is not None, prepare_fun(data)
|
||||||
will be called by the function before performing allreduce, to intialize the data
|
will be called by the function before performing allreduce, to intialize the data
|
||||||
If the result of Allreduce can be recovered directly,
|
If the result of Allreduce can be recovered directly,
|
||||||
then prepare_fun will NOT be called
|
then prepare_fun will NOT be called
|
||||||
Returns:
|
|
||||||
the result of allreduce, have same shape as data
|
Returns
|
||||||
|
-------
|
||||||
|
result : array_like
|
||||||
|
The result of allreduce, have same shape as data
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
This function is not thread-safe.
|
||||||
"""
|
"""
|
||||||
if not isinstance(data, np.ndarray):
|
if not isinstance(data, np.ndarray):
|
||||||
raise Exception('allreduce only takes in numpy.ndarray')
|
raise Exception('allreduce only takes in numpy.ndarray')
|
||||||
@ -207,7 +229,7 @@ def allreduce(data, op, prepare_fun=None):
|
|||||||
if buf.dtype not in DTYPE_ENUM__:
|
if buf.dtype not in DTYPE_ENUM__:
|
||||||
raise Exception('data type %s not supported' % str(buf.dtype))
|
raise Exception('data type %s not supported' % str(buf.dtype))
|
||||||
if prepare_fun is None:
|
if prepare_fun is None:
|
||||||
rbtlib.RabitAllreduce(buf.ctypes.data_as(ctypes.c_void_p),
|
_LIB.RabitAllreduce(buf.ctypes.data_as(ctypes.c_void_p),
|
||||||
buf.size, DTYPE_ENUM__[buf.dtype],
|
buf.size, DTYPE_ENUM__[buf.dtype],
|
||||||
op, None, None)
|
op, None, None)
|
||||||
else:
|
else:
|
||||||
@ -215,14 +237,14 @@ def allreduce(data, op, prepare_fun=None):
|
|||||||
def pfunc(args):
|
def pfunc(args):
|
||||||
"""prepare function."""
|
"""prepare function."""
|
||||||
prepare_fun(data)
|
prepare_fun(data)
|
||||||
rbtlib.RabitAllreduce(buf.ctypes.data_as(ctypes.c_void_p),
|
_LIB.RabitAllreduce(buf.ctypes.data_as(ctypes.c_void_p),
|
||||||
buf.size, DTYPE_ENUM__[buf.dtype],
|
buf.size, DTYPE_ENUM__[buf.dtype],
|
||||||
op, func_ptr(pfunc), None)
|
op, func_ptr(pfunc), None)
|
||||||
check_err__()
|
_check_err()
|
||||||
return buf
|
return buf
|
||||||
|
|
||||||
|
|
||||||
def load_model__(ptr, length):
|
def _load_model(ptr, length):
|
||||||
"""
|
"""
|
||||||
Internal function used by the module,
|
Internal function used by the module,
|
||||||
unpickle a model from a buffer specified by ptr, length
|
unpickle a model from a buffer specified by ptr, length
|
||||||
@ -236,12 +258,16 @@ def load_model__(ptr, length):
|
|||||||
return pickle.loads(data.raw)
|
return pickle.loads(data.raw)
|
||||||
|
|
||||||
def load_checkpoint(with_local=False):
|
def load_checkpoint(with_local=False):
|
||||||
"""
|
"""Load latest check point.
|
||||||
load latest check point
|
|
||||||
Arguments:
|
Parameters
|
||||||
with_local: boolean [default = False]
|
----------
|
||||||
|
with_local: bool, optional
|
||||||
whether the checkpoint contains local model
|
whether the checkpoint contains local model
|
||||||
Returns:
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
tuple : tuple
|
||||||
if with_local: return (version, gobal_model, local_model)
|
if with_local: return (version, gobal_model, local_model)
|
||||||
else return (version, gobal_model)
|
else return (version, gobal_model)
|
||||||
if returned version == 0, this means no model has been CheckPointed
|
if returned version == 0, this means no model has been CheckPointed
|
||||||
@ -252,62 +278,73 @@ def load_checkpoint(with_local=False):
|
|||||||
if with_local:
|
if with_local:
|
||||||
lptr = ctypes.POINTER(ctypes.c_char)()
|
lptr = ctypes.POINTER(ctypes.c_char)()
|
||||||
local_len = ctypes.c_ulong()
|
local_len = ctypes.c_ulong()
|
||||||
version = rbtlib.RabitLoadCheckPoint(
|
version = _LIB.RabitLoadCheckPoint(
|
||||||
ctypes.byref(gptr),
|
ctypes.byref(gptr),
|
||||||
ctypes.byref(global_len),
|
ctypes.byref(global_len),
|
||||||
ctypes.byref(lptr),
|
ctypes.byref(lptr),
|
||||||
ctypes.byref(local_len))
|
ctypes.byref(local_len))
|
||||||
check_err__()
|
_check_err()
|
||||||
if version == 0:
|
if version == 0:
|
||||||
return (version, None, None)
|
return (version, None, None)
|
||||||
return (version,
|
return (version,
|
||||||
load_model__(gptr, global_len.value),
|
_load_model(gptr, global_len.value),
|
||||||
load_model__(lptr, local_len.value))
|
_load_model(lptr, local_len.value))
|
||||||
else:
|
else:
|
||||||
version = rbtlib.RabitLoadCheckPoint(
|
version = _LIB.RabitLoadCheckPoint(
|
||||||
ctypes.byref(gptr),
|
ctypes.byref(gptr),
|
||||||
ctypes.byref(global_len),
|
ctypes.byref(global_len),
|
||||||
None, None)
|
None, None)
|
||||||
check_err__()
|
_check_err()
|
||||||
if version == 0:
|
if version == 0:
|
||||||
return (version, None)
|
return (version, None)
|
||||||
return (version,
|
return (version,
|
||||||
load_model__(gptr, global_len.value))
|
_load_model(gptr, global_len.value))
|
||||||
|
|
||||||
def checkpoint(global_model, local_model=None):
|
def checkpoint(global_model, local_model=None):
|
||||||
"""
|
"""Checkpoint the model.
|
||||||
checkpoint the model, meaning we finished a stage of execution
|
|
||||||
every time we call check point, there is a version number which will increase by one
|
|
||||||
|
|
||||||
Arguments:
|
This means we finished a stage of execution.
|
||||||
|
Every time we call check point, there is a version number which will increase by one.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
global_model: anytype that can be pickled
|
global_model: anytype that can be pickled
|
||||||
globally shared model/state when calling this function,
|
globally shared model/state when calling this function,
|
||||||
the caller need to gauranttees that global_model is the same in all nodes
|
the caller need to gauranttees that global_model is the same in all nodes
|
||||||
|
|
||||||
local_model: anytype that can be pickled
|
local_model: anytype that can be pickled
|
||||||
local model, that is specific to current node/rank.
|
Local model, that is specific to current node/rank.
|
||||||
This can be None when no local state is needed.
|
This can be None when no local state is needed.
|
||||||
local_model requires explicit replication of the model for fault-tolerance,
|
|
||||||
which will bring replication cost in checkpoint function,
|
Notes
|
||||||
|
-----
|
||||||
|
local_model requires explicit replication of the model for fault-tolerance.
|
||||||
|
This will bring replication cost in checkpoint function.
|
||||||
while global_model do not need explicit replication.
|
while global_model do not need explicit replication.
|
||||||
It is recommended to use global_model if possible
|
It is recommended to use global_model if possible.
|
||||||
"""
|
"""
|
||||||
sglobal = pickle.dumps(global_model)
|
sglobal = pickle.dumps(global_model)
|
||||||
if local_model is None:
|
if local_model is None:
|
||||||
rbtlib.RabitCheckPoint(sglobal, len(sglobal), None, 0)
|
_LIB.RabitCheckPoint(sglobal, len(sglobal), None, 0)
|
||||||
check_err__()
|
_check_err()
|
||||||
del sglobal
|
del sglobal
|
||||||
else:
|
else:
|
||||||
slocal = pickle.dumps(local_model)
|
slocal = pickle.dumps(local_model)
|
||||||
rbtlib.RabitCheckPoint(sglobal, len(sglobal), slocal, len(slocal))
|
_LIB.RabitCheckPoint(sglobal, len(sglobal), slocal, len(slocal))
|
||||||
check_err__()
|
_check_err()
|
||||||
del slocal
|
del slocal
|
||||||
del sglobal
|
del sglobal
|
||||||
|
|
||||||
def version_number():
|
def version_number():
|
||||||
|
"""Returns version number of current stored model.
|
||||||
|
|
||||||
|
This means how many calls to CheckPoint we made so far.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
version : int
|
||||||
|
Version number of currently stored model
|
||||||
"""
|
"""
|
||||||
Returns version number of current stored model,
|
ret = _LIB.RabitVersionNumber()
|
||||||
which means how many calls to CheckPoint we made so far
|
_check_err()
|
||||||
"""
|
|
||||||
ret = rbtlib.RabitVersionNumber()
|
|
||||||
check_err__()
|
|
||||||
return ret
|
return ret
|
||||||
|
|||||||
2
wrapper/requirements.txt
Normal file
2
wrapper/requirements.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
numpy==1.8.1
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user