From 7479791f6a3922bbca3187a3d712c3a1ebaa384a Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 27 Feb 2016 10:14:26 -0800 Subject: [PATCH] refactor: librabit --- Makefile | 32 ++- doc/Doxyfile | 2 +- guide/basic.py | 4 +- include/README.md | 7 - include/rabit/c_api.h | 135 +++++++++++++ include/rabit/{ => internal}/engine.h | 11 +- include/rabit/{ => internal}/io.h | 8 +- include/rabit/{ => internal}/rabit-inl.h | 6 +- include/rabit/{ => internal}/timer.h | 6 +- include/rabit/{ => internal}/utils.h | 6 +- include/{ => rabit}/rabit.h | 34 ++-- .../serializable.h} | 6 +- {wrapper => python}/rabit.py | 65 ++++-- src/allreduce_base.h | 4 +- src/allreduce_mock.h | 4 +- src/allreduce_robust.cc | 9 +- src/allreduce_robust.h | 2 +- wrapper/rabit_wrapper.cc => src/c_api.cc | 185 ++++++++++-------- src/engine.cc | 2 +- src/socket.h | 2 +- test/Makefile | 22 +-- test/lazy_recover.cc | 19 +- test/local_recover.cc | 25 ++- test/model_recover.cc | 19 +- test/speed_test.cc | 11 +- test/test.mk | 18 +- windows/.gitignore | 9 - windows/README.md | 12 -- windows/basic/basic.vcxproj | 118 ----------- windows/rabit.sln | 50 ----- windows/rabit/rabit.vcxproj | 133 ------------- windows/rabit_wrapper/rabit_wrapper.vcxproj | 121 ------------ wrapper/rabit_wrapper.h | 126 ------------ 33 files changed, 412 insertions(+), 801 deletions(-) delete mode 100644 include/README.md create mode 100644 include/rabit/c_api.h rename include/rabit/{ => internal}/engine.h (98%) rename include/rabit/{ => internal}/io.h (96%) rename include/rabit/{ => internal}/rabit-inl.h (99%) rename include/rabit/{ => internal}/timer.h (91%) rename include/rabit/{ => internal}/utils.h (97%) rename include/{ => rabit}/rabit.h (96%) rename include/{rabit_serializable.h => rabit/serializable.h} (87%) rename {wrapper => python}/rabit.py (83%) rename wrapper/rabit_wrapper.cc => src/c_api.cc (60%) delete mode 100644 windows/.gitignore delete mode 100644 windows/README.md delete mode 100644 windows/basic/basic.vcxproj delete mode 100644 windows/rabit.sln delete mode 100644 windows/rabit/rabit.vcxproj delete mode 100644 windows/rabit_wrapper/rabit_wrapper.vcxproj delete mode 100644 wrapper/rabit_wrapper.h diff --git a/Makefile b/Makefile index 8c9d9f403..d494b9f37 100644 --- a/Makefile +++ b/Makefile @@ -22,17 +22,16 @@ BPATH=. # objectives that makes up rabit library MPIOBJ= $(BPATH)/engine_mpi.o OBJ= $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o $(BPATH)/engine_empty.o $(BPATH)/engine_mock.o\ - $(BPATH)/rabit_wrapper.o $(BPATH)/engine_base.o -SLIB= wrapper/librabit_wrapper.so wrapper/librabit_wrapper_mock.so wrapper/librabit_wrapper_mpi.so + $(BPATH)/c_api.o $(BPATH)/engine_base.o +SLIB= lib/librabit.so lib/librabit_mpi.so lib/librabit_mock.so lib/librabit_base.so ALIB= lib/librabit.a lib/librabit_mpi.a lib/librabit_empty.a lib/librabit_mock.a lib/librabit_base.a -HEADERS=src/*.h include/*.h include/rabit/*.h +HEADERS=src/*.h include/rabit/*.h include/rabit/internal/*.h DMLC=dmlc-core .PHONY: clean all install mpi python lint doc doxygen -all: lib/librabit.a lib/librabit_mock.a wrapper/librabit_wrapper.so wrapper/librabit_wrapper_mock.so lib/librabit_base.a -mpi: lib/librabit_mpi.a wrapper/librabit_wrapper_mpi.so -python: wrapper/librabit_wrapper.so wrapper/librabit_wrapper_mock.so +all: lib/librabit.a lib/librabit_mock.a lib/librabit.so lib/librabit_base.a lib/librabit_mock.so +mpi: lib/librabit_mpi.a lib/librabit_mpi.so $(BPATH)/allreduce_base.o: src/allreduce_base.cc $(HEADERS) $(BPATH)/engine.o: src/engine.cc $(HEADERS) @@ -41,17 +40,13 @@ $(BPATH)/engine_mpi.o: src/engine_mpi.cc $(HEADERS) $(BPATH)/engine_empty.o: src/engine_empty.cc $(HEADERS) $(BPATH)/engine_mock.o: src/engine_mock.cc $(HEADERS) $(BPATH)/engine_base.o: src/engine_base.cc $(HEADERS) +$(BPATH)/c_api.o: src/c_api.cc $(HEADERS) -lib/librabit.a: $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o -lib/librabit_base.a: $(BPATH)/allreduce_base.o $(BPATH)/engine_base.o -lib/librabit_mock.a: $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine_mock.o -lib/librabit_empty.a: $(BPATH)/engine_empty.o -lib/librabit_mpi.a: $(MPIOBJ) -# wrapper code -$(BPATH)/rabit_wrapper.o: wrapper/rabit_wrapper.cc -wrapper/librabit_wrapper.so: $(BPATH)/rabit_wrapper.o lib/librabit.a -wrapper/librabit_wrapper_mock.so: $(BPATH)/rabit_wrapper.o lib/librabit_mock.a -wrapper/librabit_wrapper_mpi.so: $(BPATH)/rabit_wrapper.o lib/librabit_mpi.a +lib/librabit.a lib/librabit.so: $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o $(BPATH)/c_api.o +lib/librabit_base.a lib/librabit_base.so: $(BPATH)/allreduce_base.o $(BPATH)/engine_base.o $(BPATH)/c_api.o +lib/librabit_mock.a lib/librabit_mock.so: $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine_mock.o $(BPATH)/c_api.o +lib/librabit_empty.a: $(BPATH)/engine_empty.o $(BPATH)/c_api.o +lib/librabit_mpi.a lib/librabit_mpi.so: $(MPIOBJ) $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) @@ -66,11 +61,10 @@ $(SLIB) : $(CXX) $(CFLAGS) -shared -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) lint: - $(DMLC)/scripts/lint.py rabit $(LINT_LANG) src include wrapper + $(DMLC)/scripts/lint.py rabit $(LINT_LANG) src include doc doxygen: cd include; doxygen ../doc/Doxyfile; cd - clean: - $(RM) $(OBJ) $(MPIOBJ) $(ALIB) $(MPIALIB) $(SLIB) *~ src/*~ include/*~ include/*/*~ wrapper/*~ - + $(RM) $(OBJ) $(MPIOBJ) $(ALIB) $(MPIALIB) $(SLIB) *~ src/*~ include/*~ include/*/*~ diff --git a/doc/Doxyfile b/doc/Doxyfile index 2c9c64ea7..74cd68ea5 100644 --- a/doc/Doxyfile +++ b/doc/Doxyfile @@ -95,7 +95,7 @@ WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- -INPUT = . dmlc +INPUT = rabit dmlc INPUT_ENCODING = UTF-8 FILE_PATTERNS = RECURSIVE = NO diff --git a/guide/basic.py b/guide/basic.py index becdae07d..c1f1c2f4b 100755 --- a/guide/basic.py +++ b/guide/basic.py @@ -7,7 +7,7 @@ import sys import numpy as np # import rabit, the tracker script will setup the lib path correctly # for normal run without tracker script, add following line -# sys.path.append(os.path.dirname(__file__) + '/../wrapper') +# sys.path.append(os.path.dirname(__file__) + '/../python') import rabit rabit.init() @@ -16,7 +16,7 @@ rank = rabit.get_rank() a = np.zeros(n) for i in xrange(n): a[i] = rank + i - + print '@node[%d] before-allreduce: a=%s' % (rank, str(a)) a = rabit.allreduce(a, rabit.MAX) print '@node[%d] after-allreduce-max: a=%s' % (rank, str(a)) diff --git a/include/README.md b/include/README.md deleted file mode 100644 index 2512edc78..000000000 --- a/include/README.md +++ /dev/null @@ -1,7 +0,0 @@ -Library Header Files -==== -* This folder contains all the header needed to use the library -* To use it, add the "include" folder to the search path of the compiler -* User only needs to know [rabit.h](rabit.h) and [rabit_serializable.h](rabit_serializable.h) in order to use the library -* Folder [rabit](rabit) contains headers for internal engine and template's implementation -* Not all .h files in the project are in the "include" folder, .h files that are internally used by the library remain at [src](../src) diff --git a/include/rabit/c_api.h b/include/rabit/c_api.h new file mode 100644 index 000000000..10820fad1 --- /dev/null +++ b/include/rabit/c_api.h @@ -0,0 +1,135 @@ +/*! + * Copyright by Contributors + * \file rabit_c_api.h + * \author Tianqi Chen + * \brief a C style API of rabit. + */ +#ifndef RABIT_C_API_H_ +#define RABIT_C_API_H_ + +#ifdef __cplusplus +#define RABIT_EXTERN_C extern "C" +#endif + +#if defined(_MSC_VER) || defined(_WIN32) +#define RABIT_DLL RABIT_EXTERN_C __declspec(dllexport) +#else +#define RABIT_DLL RABIT_EXTERN_C +#endif + +// manually define unsign long +typedef unsigned long rbt_ulong; // NOLINT(*) + +/*! + * \brief intialize the rabit module, + * call this once before using anything + * The additional arguments is not necessary. + * Usually rabit will detect settings + * from environment variables. + * \param argc number of arguments in argv + * \param argv the array of input arguments + */ +RABIT_DLL void RabitInit(int argc, char *argv[]); + +/*! + * \brief finalize the rabit engine, + * call this function after you finished all jobs. + */ +RABIT_DLL void RabitFinalize(); + +/*! \brief get rank of current process */ +RABIT_DLL int RabitGetRank(); + +/*! \brief get total number of process */ +RABIT_DLL int RabitGetWorldSize(); + +/*! + * \brief print the msg to the tracker, + * this function can be used to communicate the information of the progress to + * the user who monitors the tracker + * \param msg the message to be printed + */ +RABIT_DLL void RabitTrackerPrint(const char *msg); +/*! + * \brief get name of processor + * \param out_name hold output string + * \param out_len hold length of output string + * \param max_len maximum buffer length of input + */ +RABIT_DLL void RabitGetProcessorName(char *out_name, + rbt_ulong *out_len, + rbt_ulong max_len); +/*! + * \brief broadcast an memory region to all others from root + * + * Example: int a = 1; Broadcast(&a, sizeof(a), root); + * \param sendrecv_data the pointer to send or recive buffer, + * \param size the size of the data + * \param root the root of process + */ +RABIT_DLL void RabitBroadcast(void *sendrecv_data, + rbt_ulong size, int root); +/*! + * \brief perform in-place allreduce, on sendrecvbuf + * this function is NOT thread-safe + * + * Example Usage: the following code gives sum of the result + * vector data(10); + * ... + * Allreduce(&data[0], data.size()); + * ... + * \param sendrecvbuf buffer for both sending and recving data + * \param count number of elements to be reduced + * \param enum_dtype the enumeration of data type, see rabit::engine::mpi::DataType in engine.h of rabit include + * \param enum_op the enumeration of operation type, see rabit::engine::mpi::OpType in engine.h of rabit + * \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function + */ +RABIT_DLL void RabitAllreduce(void *sendrecvbuf, + size_t count, + int enum_dtype, + int enum_op, + void (*prepare_fun)(void *arg), + void *prepare_arg); + +/*! + * \brief load latest check point + * \param out_global_model hold output of serialized global_model + * \param out_global_len the output length of serialized global model + * \param out_local_model hold output of serialized local_model, can be NULL + * \param out_local_len the output length of serialized local model, can be NULL + * + * \return the version number of check point loaded + * if returned version == 0, this means no model has been CheckPointed + * nothing will be touched + */ +RABIT_DLL int RabitLoadCheckPoint(char **out_global_model, + rbt_ulong *out_global_len, + char **out_local_model, + rbt_ulong *out_local_len); +/*! + * \brief checkpoint the model, meaning we finished a stage of execution + * every time we call check point, there is a version number which will increase by one + * + * \param global_model hold content of serialized global_model + * \param global_len the content length of serialized global model + * \param local_model hold content of serialized local_model, can be NULL + * \param local_len the content length of serialized local model, can be NULL + * + * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will + * bring replication cost in CheckPoint function. global_model do not need explicit replication. + * So only CheckPoint with global_model if possible + */ +RABIT_DLL void RabitCheckPoint(const char *global_model, + rbt_ulong global_len, + const char *local_model, + rbt_ulong local_len); +/*! + * \return version number of current stored model, + * which means how many calls to CheckPoint we made so far + */ +RABIT_DLL int RabitVersionNumber(); + +#endif // RABIT_C_API_H_ diff --git a/include/rabit/engine.h b/include/rabit/internal/engine.h similarity index 98% rename from include/rabit/engine.h rename to include/rabit/internal/engine.h index 360334808..6a7dfe4a3 100644 --- a/include/rabit/engine.h +++ b/include/rabit/internal/engine.h @@ -4,10 +4,10 @@ * \brief This file defines the core interface of rabit library * \author Tianqi Chen, Nacho, Tianyi */ -#ifndef RABIT_ENGINE_H_ -#define RABIT_ENGINE_H_ +#ifndef RABIT_INTERNAL_ENGINE_H_ +#define RABIT_INTERNAL_ENGINE_H_ #include -#include "../rabit_serializable.h" +#include "../serializable.h" namespace MPI { /*! \brief MPI data type just to be compatible with MPI reduce function*/ @@ -241,7 +241,8 @@ class ReduceHandle { * \param prepare_arg argument used to pass into the lazy preprocessing function */ void Allreduce(void *sendrecvbuf, - size_t type_nbytes, size_t count, + size_t type_nbytes, + size_t count, IEngine::PreprocFunction prepare_fun = NULL, void *prepare_arg = NULL); /*! \return the number of bytes occupied by the type */ @@ -259,4 +260,4 @@ class ReduceHandle { }; } // namespace engine } // namespace rabit -#endif // RABIT_ENGINE_H_ +#endif // RABIT_INTERNAL_ENGINE_H_ diff --git a/include/rabit/io.h b/include/rabit/internal/io.h similarity index 96% rename from include/rabit/io.h rename to include/rabit/internal/io.h index 7ffca38f2..92810e4e2 100644 --- a/include/rabit/io.h +++ b/include/rabit/internal/io.h @@ -4,15 +4,15 @@ * \brief utilities with different serializable implementations * \author Tianqi Chen */ -#ifndef RABIT_IO_H_ -#define RABIT_IO_H_ +#ifndef RABIT_INTERNAL_IO_H_ +#define RABIT_INTERNAL_IO_H_ #include #include #include #include #include #include "./utils.h" -#include "../rabit_serializable.h" +#include "../serializable.h" namespace rabit { namespace utils { @@ -103,4 +103,4 @@ struct MemoryBufferStream : public SeekStream { }; // class MemoryBufferStream } // namespace utils } // namespace rabit -#endif // RABIT_IO_H_ +#endif // RABIT_INTERNAL_IO_H_ diff --git a/include/rabit/rabit-inl.h b/include/rabit/internal/rabit-inl.h similarity index 99% rename from include/rabit/rabit-inl.h rename to include/rabit/internal/rabit-inl.h index e82b5a9a0..7536c184f 100644 --- a/include/rabit/rabit-inl.h +++ b/include/rabit/internal/rabit-inl.h @@ -5,8 +5,8 @@ * * \author Tianqi Chen */ -#ifndef RABIT_RABIT_INL_H_ -#define RABIT_RABIT_INL_H_ +#ifndef RABIT_INTERNAL_RABIT_INL_H_ +#define RABIT_INTERNAL_RABIT_INL_H_ // use engine for implementation #include #include @@ -325,4 +325,4 @@ inline void SerializeReducer::Allreduce(DType *sendrecvobj, } #endif } // namespace rabit -#endif // RABIT_RABIT_INL_H_ +#endif // RABIT_INTERNAL_RABIT_INL_H_ diff --git a/include/rabit/timer.h b/include/rabit/internal/timer.h similarity index 91% rename from include/rabit/timer.h rename to include/rabit/internal/timer.h index 1f135add6..c838028cf 100644 --- a/include/rabit/timer.h +++ b/include/rabit/internal/timer.h @@ -4,8 +4,8 @@ * \brief This file defines the utils for timing * \author Tianqi Chen, Nacho, Tianyi */ -#ifndef RABIT_TIMER_H_ -#define RABIT_TIMER_H_ +#ifndef RABIT_INTERNAL_TIMER_H_ +#define RABIT_INTERNAL_TIMER_H_ #include #ifdef __MACH__ #include @@ -38,4 +38,4 @@ inline double GetTime(void) { } } // namespace utils } // namespace rabit -#endif // RABIT_TIMER_H_ +#endif // RABIT_INTERNAL_TIMER_H_ diff --git a/include/rabit/utils.h b/include/rabit/internal/utils.h similarity index 97% rename from include/rabit/utils.h rename to include/rabit/internal/utils.h index 28709ee7d..cf8fab67d 100644 --- a/include/rabit/utils.h +++ b/include/rabit/internal/utils.h @@ -4,8 +4,8 @@ * \brief simple utils to support the code * \author Tianqi Chen */ -#ifndef RABIT_UTILS_H_ -#define RABIT_UTILS_H_ +#ifndef RABIT_INTERNAL_UTILS_H_ +#define RABIT_INTERNAL_UTILS_H_ #define _CRT_SECURE_NO_WARNINGS #include #include @@ -188,4 +188,4 @@ inline const char* BeginPtr(const std::string &str) { return &str[0]; } } // namespace rabit -#endif // RABIT_UTILS_H_ +#endif // RABIT_INTERNAL_UTILS_H_ diff --git a/include/rabit.h b/include/rabit/rabit.h similarity index 96% rename from include/rabit.h rename to include/rabit/rabit.h index b0f1df39c..70660eafe 100644 --- a/include/rabit.h +++ b/include/rabit/rabit.h @@ -22,15 +22,24 @@ #if DMLC_USE_CXX11 #include #endif // C++11 -// contains definition of Serializable -#include "./rabit_serializable.h" // engine definition of rabit, defines internal implementation // to use rabit interface, there is no need to read engine.h // rabit.h and serializable.h are enough to use the interface -#include "./rabit/engine.h" +#include "./internal/engine.h" /*! \brief rabit namespace */ namespace rabit { +/*! + * \brief defines stream used in rabit + * see definition of Stream in dmlc/io.h + */ +typedef dmlc::Stream Stream; +/*! + * \brief defines serializable objects used in rabit + * see definition of Serializable in dmlc/io.h + */ +typedef dmlc::Serializable Serializable; + /*! * \brief reduction operators namespace */ @@ -65,16 +74,16 @@ inline void Init(int argc, char *argv[]); /*! * \brief finalizes the rabit engine, call this function after you finished with all the jobs */ -inline void Finalize(void); +inline void Finalize(); /*! \brief gets rank of the current process */ -inline int GetRank(void); +inline int GetRank(); /*! \brief gets total number of processes */ -inline int GetWorldSize(void); +inline int GetWorldSize(); /*! \brief whether rabit env is in distributed mode */ -inline bool IsDistributed(void); +inline bool IsDistributed(); /*! \brief gets processor's name */ -inline std::string GetProcessorName(void); +inline std::string GetProcessorName(); /*! * \brief prints the msg to the tracker, * this function can be used to communicate progress information to @@ -241,7 +250,7 @@ inline void LazyCheckPoint(const Serializable *global_model); * which means how many calls to CheckPoint we made so far * \sa LoadCheckPoint, CheckPoint */ -inline int VersionNumber(void); +inline int VersionNumber(); // ----- extensions that allow customized reducer ------ // helper class to do customized reduce, user do not need to know the type namespace engine { @@ -258,7 +267,7 @@ class ReduceHandle; template // NOLINT(*) class Reducer { public: - Reducer(void); + Reducer(); /*! * \brief customized in-place all reduce operation * \param sendrecvbuf the in place send-recv buffer @@ -299,7 +308,7 @@ class Reducer { template class SerializeReducer { public: - SerializeReducer(void); + SerializeReducer(); /*! * \brief customized in-place all reduce operation * \param sendrecvobj pointer to the array of objects to be reduced @@ -338,5 +347,6 @@ class SerializeReducer { }; } // namespace rabit // implementation of template functions -#include "./rabit/rabit-inl.h" +#include "./internal/ +rabit-inl.h" #endif // RABIT_RABIT_H_ // NOLINT(*) diff --git a/include/rabit_serializable.h b/include/rabit/serializable.h similarity index 87% rename from include/rabit_serializable.h rename to include/rabit/serializable.h index c9199bba1..4a3c2a115 100644 --- a/include/rabit_serializable.h +++ b/include/rabit/serializable.h @@ -1,6 +1,6 @@ /*! * Copyright (c) 2014 by Contributors - * \file rabit_serializable.h + * \file serializable.h * \brief defines serializable interface of rabit * \author Tianqi Chen */ @@ -8,8 +8,8 @@ #define RABIT_SERIALIZABLE_H_ #include #include -#include "./rabit/utils.h" -#include "./dmlc/io.h" +#include "./internal/utils.h" +#include "../dmlc/io.h" namespace rabit { /*! diff --git a/wrapper/rabit.py b/python/rabit.py similarity index 83% rename from wrapper/rabit.py rename to python/rabit.py index 91ce3e6ae..d57587baa 100644 --- a/wrapper/rabit.py +++ b/python/rabit.py @@ -14,29 +14,58 @@ import numpy as np # version information about the doc __version__ = '1.0' -if os.name == 'nt': - WRAPPER_PATH = os.path.dirname(__file__) + '\\..\\windows\\x64\\Release\\rabit_wrapper%s.dll' -else: - WRAPPER_PATH = os.path.dirname(__file__) + '/librabit_wrapper%s.so' - _LIB = None +def _find_lib_path(dll_name): + """Find the rabit dynamic library files. + + Returns + ------- + lib_path: list(string) + List of all found library path to rabit + """ + curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) + # make pythonpack hack: copy this directory one level upper for setup.py + dll_path = [curr_path, + os.path.join(curr_path, '../lib/'), + os.path.join(curr_path, './lib/')] + if os.name == 'nt': + dll_path = [os.path.join(p, dll_name) for p in dll_path] + else: + dll_path = [os.path.join(p, dll_name) for p in dll_path] + lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] + #From github issues, most of installation errors come from machines w/o compilers + if len(lib_path) == 0 and not os.environ.get('XGBOOST_BUILD_DOC', False): + raise RuntimeError( + 'Cannot find Rabit Libarary in the candicate path, ' + + 'did you install compilers and run build.sh in root path?\n' + 'List of candidates:\n' + ('\n'.join(dll_path))) + return lib_path + # load in xgboost library -def _loadlib(lib='standard'): +def _loadlib(lib='standard', lib_dll=None): """Load rabit library.""" global _LIB - if _LIB != None: + if _LIB is not None: warnings.warn('rabit.int call was ignored because it has'\ ' already been initialized', level=2) return + + if lib_dll is not None: + _LIB = lib_dll + return + if lib == 'standard': - _LIB = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '') - elif lib == 'mock': - _LIB = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '_mock') - elif lib == 'mpi': - _LIB = ctypes.cdll.LoadLibrary(WRAPPER_PATH % '_mpi') + dll_name = 'librabit' else: - raise Exception('unknown rabit lib %s, can be standard, mock, mpi' % lib) + dll_name = 'librabit_' + lib + + if os.name == 'nt': + dll_name += '.dll' + else: + dll_name += '.so' + + _LIB = ctypes.cdll.LoadLibrary(_find_lib_path(dll_name)[0]) _LIB.RabitGetRank.restype = ctypes.c_int _LIB.RabitGetWorldSize.restype = ctypes.c_int _LIB.RabitVersionNumber.restype = ctypes.c_int @@ -53,7 +82,7 @@ MIN = 1 SUM = 2 BITOR = 3 -def init(args=None, lib='standard'): +def init(args=None, lib='standard', lib_dll=None): """Intialize the rabit module, call this once before using anything. Parameters @@ -62,12 +91,16 @@ def init(args=None, lib='standard'): The list of arguments used to initialized the rabit usually you need to pass in sys.argv. Defaults to sys.argv when it is None. - lib: {'standard', 'mock', 'mpi'} + lib: {'standard', 'mock', 'mpi'}, optional Type of library we want to load + When cdll is specified + lib_dll: ctypes.DLL, optional + The DLL object used as lib. + When this is presented argument lib will be ignored. """ if args is None: args = sys.argv - _loadlib(lib) + _loadlib(lib, lib_dll) arr = (ctypes.c_char_p * len(args))() arr[:] = args _LIB.RabitInit(len(args), arr) diff --git a/src/allreduce_base.h b/src/allreduce_base.h index 63acd75d5..9a2cb3fb9 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -15,8 +15,8 @@ #include #include #include -#include "../include/rabit/utils.h" -#include "../include/rabit/engine.h" +#include "../include/rabit/internal/utils.h" +#include "../include/rabit/internal/engine.h" #include "./socket.h" namespace MPI { diff --git a/src/allreduce_mock.h b/src/allreduce_mock.h index c3f9f4f1d..68590d1a1 100644 --- a/src/allreduce_mock.h +++ b/src/allreduce_mock.h @@ -11,8 +11,8 @@ #include #include #include -#include "../include/rabit/engine.h" -#include "../include/rabit/timer.h" +#include "../include/rabit/internal/engine.h" +#include "../include/rabit/internal/timer.h" #include "./allreduce_robust.h" namespace rabit { diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 8137c955a..3fd76782a 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -10,10 +10,10 @@ #define NOMINMAX #include #include -#include "../include/rabit/io.h" -#include "../include/rabit/utils.h" -#include "../include/rabit/engine.h" -#include "../include/rabit/rabit-inl.h" +#include "../include/rabit/internal/io.h" +#include "../include/rabit/internal/utils.h" +#include "../include/rabit/internal/engine.h" +#include "../include/rabit/internal/rabit-inl.h" #include "./allreduce_robust.h" namespace rabit { @@ -1180,4 +1180,3 @@ AllreduceRobust::RingPassing(void *sendrecvbuf_, } } // namespace engine } // namespace rabit - diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index caf2e57af..46e9f69c4 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -13,7 +13,7 @@ #include #include #include -#include "../include/rabit/engine.h" +#include "../include/rabit/internal/engine.h" #include "./allreduce_base.h" namespace rabit { diff --git a/wrapper/rabit_wrapper.cc b/src/c_api.cc similarity index 60% rename from wrapper/rabit_wrapper.cc rename to src/c_api.cc index 7025b3ffe..c268fea72 100644 --- a/wrapper/rabit_wrapper.cc +++ b/src/c_api.cc @@ -5,10 +5,11 @@ #include #include -#include "../include/rabit.h" -#include "./rabit_wrapper.h" +#include "../include/rabit/rabit.h" +#include "../include/rabit/c_api.h" + namespace rabit { -namespace wrapper { +namespace c_api { // helper use to avoid BitOR operator template struct FHelper { @@ -21,6 +22,7 @@ struct FHelper { prepare_fun, prepare_arg); } }; + template struct FHelper { inline static void @@ -31,6 +33,7 @@ struct FHelper { utils::Error("DataType does not support bitwise or operation"); } }; + template inline void Allreduce_(void *sendrecvbuf_, size_t count, @@ -117,8 +120,9 @@ inline void Allreduce(void *sendrecvbuf, default: utils::Error("unknown enum_op"); } } -// temporal memory for global and local model -std::string global_buffer, local_buffer; + + + // wrapper for serialization struct ReadWrapper : public Serializable { std::string *p_str; @@ -138,6 +142,7 @@ struct ReadWrapper : public Serializable { utils::Error("not implemented"); } }; + struct WriteWrapper : public Serializable { const char *data; size_t length; @@ -154,87 +159,101 @@ struct WriteWrapper : public Serializable { fo->Write(data, length * sizeof(char)); } }; -} // namespace wrapper +} // namespace c_api } // namespace rabit -extern "C" { - void RabitInit(int argc, char *argv[]) { - rabit::Init(argc, argv); + +void RabitInit(int argc, char *argv[]) { + rabit::Init(argc, argv); +} + +void RabitFinalize() { + rabit::Finalize(); +} + +int RabitGetRank() { + return rabit::GetRank(); +} + +int RabitGetWorldSize() { + return rabit::GetWorldSize(); +} + +void RabitTrackerPrint(const char *msg) { + std::string m(msg); + rabit::TrackerPrint(m); +} + +void RabitGetProcessorName(char *out_name, + rbt_ulong *out_len, + rbt_ulong max_len) { + std::string s = rabit::GetProcessorName(); + if (s.length() > max_len) { + s.resize(max_len - 1); } - void RabitFinalize(void) { - rabit::Finalize(); - } - int RabitGetRank(void) { - return rabit::GetRank(); - } - int RabitGetWorldSize(void) { - return rabit::GetWorldSize(); - } - void RabitTrackerPrint(const char *msg) { - std::string m(msg); - rabit::TrackerPrint(m); - } - void RabitGetProcessorName(char *out_name, - rbt_ulong *out_len, - rbt_ulong max_len) { - std::string s = rabit::GetProcessorName(); - if (s.length() > max_len) { - s.resize(max_len - 1); - } - strcpy(out_name, s.c_str()); // NOLINT(*) - *out_len = static_cast(s.length()); - } - void RabitBroadcast(void *sendrecv_data, - rbt_ulong size, int root) { - rabit::Broadcast(sendrecv_data, size, root); - } - void RabitAllreduce(void *sendrecvbuf, + strcpy(out_name, s.c_str()); // NOLINT(*) + *out_len = static_cast(s.length()); +} + +void RabitBroadcast(void *sendrecv_data, + rbt_ulong size, int root) { + rabit::Broadcast(sendrecv_data, size, root); +} + +void RabitAllreduce(void *sendrecvbuf, size_t count, - int enum_dtype, - int enum_op, - void (*prepare_fun)(void *arg), - void *prepare_arg) { - rabit::wrapper::Allreduce - (sendrecvbuf, count, - static_cast(enum_dtype), - static_cast(enum_op), - prepare_fun, prepare_arg); + int enum_dtype, + int enum_op, + void (*prepare_fun)(void *arg), + void *prepare_arg) { + rabit::c_api::Allreduce + (sendrecvbuf, count, + static_cast(enum_dtype), + static_cast(enum_op), + prepare_fun, prepare_arg); +} + +int RabitLoadCheckPoint(char **out_global_model, + rbt_ulong *out_global_len, + char **out_local_model, + rbt_ulong *out_local_len) { + // NOTE: this function is not thread-safe + using rabit::BeginPtr; + using namespace rabit::c_api; // NOLINT(*) + static std::string global_buffer; + static std::string local_buffer; + + ReadWrapper sg(&global_buffer); + ReadWrapper sl(&local_buffer); + int version; + + if (out_local_model == NULL) { + version = rabit::LoadCheckPoint(&sg, NULL); + *out_global_model = BeginPtr(global_buffer); + *out_global_len = static_cast(global_buffer.length()); + } else { + version = rabit::LoadCheckPoint(&sg, &sl); + *out_global_model = BeginPtr(global_buffer); + *out_global_len = static_cast(global_buffer.length()); + *out_local_model = BeginPtr(local_buffer); + *out_local_len = static_cast(local_buffer.length()); } - int RabitLoadCheckPoint(char **out_global_model, - rbt_ulong *out_global_len, - char **out_local_model, - rbt_ulong *out_local_len) { - using rabit::BeginPtr; - using namespace rabit::wrapper; - ReadWrapper sg(&global_buffer); - ReadWrapper sl(&local_buffer); - int version; - if (out_local_model == NULL) { - version = rabit::LoadCheckPoint(&sg, NULL); - *out_global_model = BeginPtr(global_buffer); - *out_global_len = static_cast(global_buffer.length()); - } else { - version = rabit::LoadCheckPoint(&sg, &sl); - *out_global_model = BeginPtr(global_buffer); - *out_global_len = static_cast(global_buffer.length()); - *out_local_model = BeginPtr(local_buffer); - *out_local_len = static_cast(local_buffer.length()); - } - return version; - } - void RabitCheckPoint(const char *global_model, - rbt_ulong global_len, - const char *local_model, - rbt_ulong local_len) { - using namespace rabit::wrapper; - WriteWrapper sg(global_model, global_len); - WriteWrapper sl(local_model, local_len); - if (local_model == NULL) { - rabit::CheckPoint(&sg, NULL); - } else { - rabit::CheckPoint(&sg, &sl); - } - } - int RabitVersionNumber(void) { - return rabit::VersionNumber(); + return version; +} + +void RabitCheckPoint(const char *global_model, + rbt_ulong global_len, + const char *local_model, + rbt_ulong local_len) { + using namespace rabit::c_api; // NOLINT(*) + WriteWrapper sg(global_model, global_len); + WriteWrapper sl(local_model, local_len); + if (local_model == NULL) { + rabit::CheckPoint(&sg, NULL); + } else { + rabit::CheckPoint(&sg, &sl); } } + +int RabitVersionNumber() { + return rabit::VersionNumber(); +} diff --git a/src/engine.cc b/src/engine.cc index 0f4770fe2..296775d85 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -10,7 +10,7 @@ #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX -#include "../include/rabit/engine.h" +#include "../include/rabit/internal/engine.h" #include "./allreduce_base.h" #include "./allreduce_robust.h" diff --git a/src/socket.h b/src/socket.h index 6df7a7b78..f605ab028 100644 --- a/src/socket.h +++ b/src/socket.h @@ -25,7 +25,7 @@ #endif #include #include -#include "../include/rabit/utils.h" +#include "../include/rabit/internal/utils.h" #if defined(_WIN32) typedef int ssize_t; diff --git a/test/Makefile b/test/Makefile index 62e4e17f0..2171c269e 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,7 +1,7 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx -export LDFLAGS= -L../lib -pthread -lm -lrt +export LDFLAGS= -L../lib -pthread -lm -lrt export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include -std=c++0x # specify tensor path @@ -10,16 +10,16 @@ OBJ = $(RABIT_OBJ) speed_test.o model_recover.o local_recover.o lazy_recover.o MPIBIN = speed_test.mpi .PHONY: clean all lib mpi -all: $(BIN) $(MPIBIN) +all: $(BIN) lib: cd ..;make;cd - mpi: cd ..;make mpi;cd - -# programs -speed_test.o: speed_test.cc ../include/*.h lib mpi -model_recover.o: model_recover.cc ../include/*.h lib -local_recover.o: local_recover.cc ../include/*.h lib -lazy_recover.o: lazy_recover.cc ../include/*.h lib +# programs +speed_test.o: speed_test.cc ../include/rabit/*.h lib mpi +model_recover.o: model_recover.cc ../include/rabit/*.h lib +local_recover.o: local_recover.cc ../include/rabit/*.h lib +lazy_recover.o: lazy_recover.cc ../include/rabit/*.h lib # we can link against MPI version to get use MPI speed_test: speed_test.o $(RABIT_OBJ) @@ -28,13 +28,13 @@ model_recover: model_recover.o $(RABIT_OBJ) local_recover: local_recover.o $(RABIT_OBJ) lazy_recover: lazy_recover.o $(RABIT_OBJ) -$(BIN) : - $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) -lrabit_mock $(LDFLAGS) +$(BIN) : + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) -lrabit_mock $(LDFLAGS) -$(OBJ) : +$(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) -$(MPIBIN) : +$(MPIBIN) : $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mpi clean: diff --git a/test/lazy_recover.cc b/test/lazy_recover.cc index 610a20664..dd64294bf 100644 --- a/test/lazy_recover.cc +++ b/test/lazy_recover.cc @@ -1,7 +1,6 @@ -// this is a test case to test whether rabit can recover model when +// this is a test case to test whether rabit can recover model when // facing an exception -#include -#include +#include #include #include #include @@ -35,7 +34,7 @@ inline void TestMax(Model *model, int ntrial, int iter) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } - rabit::Allreduce(&ndata[0], ndata.size()); + rabit::Allreduce(&ndata[0], ndata.size()); for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; @@ -70,7 +69,7 @@ inline void TestSum(Model *model, int ntrial, int iter) { inline void TestBcast(size_t n, int root, int ntrial, int iter) { int rank = rabit::GetRank(); - std::string s; s.resize(n); + std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { s[i] = char(i % 126 + 1); } @@ -94,13 +93,13 @@ int main(int argc, char *argv[]) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); std::string name = rabit::GetProcessorName(); - Model model; + Model model; srand(0); int ntrial = 0; for (int i = 1; i < argc; ++i) { int n; - if (sscanf(argv[i], "rabit_num_trial=%d", &n) == 1) ntrial = n; - } + if (sscanf(argv[i], "rabit_num_trial=%d", &n) == 1) ntrial = n; + } int iter = rabit::LoadCheckPoint(&model); if (iter == 0) { model.InitModel(n); @@ -108,9 +107,9 @@ int main(int argc, char *argv[]) { } else { printf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } - for (int r = iter; r < 3; ++r) { + for (int r = iter; r < 3; ++r) { TestMax(&model, ntrial, r); - printf("[%d] !!!TestMax pass, iter=%d\n", rank, r); + printf("[%d] !!!TestMax pass, iter=%d\n", rank, r); int step = std::max(nproc / 3, 1); for (int i = 0; i < nproc; i += step) { TestBcast(n, i, ntrial, r); diff --git a/test/local_recover.cc b/test/local_recover.cc index 5162d5a2d..a63bd2f88 100644 --- a/test/local_recover.cc +++ b/test/local_recover.cc @@ -1,7 +1,6 @@ -// this is a test case to test whether rabit can recover model when +// this is a test case to test whether rabit can recover model when // facing an exception -#include -#include +#include #include #include #include @@ -30,15 +29,15 @@ class Model : public rabit::Serializable { inline void TestMax(Model *model, Model *local, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); - const int z = iter + 111; + const int z = iter + 111; std::vector ndata(model->data.size()); - rabit::Allreduce(&ndata[0], ndata.size(), + rabit::Allreduce(&ndata[0], ndata.size(), [&]() { // use lambda expression to prepare the data for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + local->data[i]; } - }); + }); for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; @@ -64,7 +63,7 @@ inline void TestSum(Model *model, Model *local, int ntrial, int iter) { ndata[i] = (i * (rank+1)) % z + local->data[i]; } Allreduce(&ndata[0], ndata.size()); - + for (size_t i = 0; i < ndata.size(); ++i) { float rsum = 0.0f; for (int r = 0; r < nproc; ++r) { @@ -81,7 +80,7 @@ inline void TestSum(Model *model, Model *local, int ntrial, int iter) { inline void TestBcast(size_t n, int root, int ntrial, int iter) { int rank = rabit::GetRank(); - std::string s; s.resize(n); + std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { s[i] = char(i % 126 + 1); } @@ -105,13 +104,13 @@ int main(int argc, char *argv[]) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); std::string name = rabit::GetProcessorName(); - Model model, local; + Model model, local; srand(0); int ntrial = 0; for (int i = 1; i < argc; ++i) { int n; - if (sscanf(argv[i], "repeat=%d", &n) == 1) ntrial = n; - } + if (sscanf(argv[i], "repeat=%d", &n) == 1) ntrial = n; + } int iter = rabit::LoadCheckPoint(&model, &local); if (iter == 0) { model.InitModel(n, 1.0f); @@ -120,9 +119,9 @@ int main(int argc, char *argv[]) { } else { printf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } - for (int r = iter; r < 3; ++r) { + for (int r = iter; r < 3; ++r) { TestMax(&model, &local, ntrial, r); - printf("[%d] !!!TestMax pass, iter=%d\n", rank, r); + printf("[%d] !!!TestMax pass, iter=%d\n", rank, r); int step = std::max(nproc / 3, 1); for (int i = 0; i < nproc; i += step) { TestBcast(n, i, ntrial, r); diff --git a/test/model_recover.cc b/test/model_recover.cc index f833ef295..a2709f892 100644 --- a/test/model_recover.cc +++ b/test/model_recover.cc @@ -1,7 +1,6 @@ -// this is a test case to test whether rabit can recover model when +// this is a test case to test whether rabit can recover model when // facing an exception -#include -#include +#include #include #include #include @@ -35,7 +34,7 @@ inline void TestMax(Model *model, int ntrial, int iter) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } - rabit::Allreduce(&ndata[0], ndata.size()); + rabit::Allreduce(&ndata[0], ndata.size()); for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; @@ -71,7 +70,7 @@ inline void TestSum(Model *model, int ntrial, int iter) { inline void TestBcast(size_t n, int root, int ntrial, int iter) { int rank = rabit::GetRank(); - std::string s; s.resize(n); + std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { s[i] = char(i % 126 + 1); } @@ -95,13 +94,13 @@ int main(int argc, char *argv[]) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); std::string name = rabit::GetProcessorName(); - Model model; + Model model; srand(0); int ntrial = 0; for (int i = 1; i < argc; ++i) { int n; - if (sscanf(argv[i], "rabit_num_trial=%d", &n) == 1) ntrial = n; - } + if (sscanf(argv[i], "rabit_num_trial=%d", &n) == 1) ntrial = n; + } int iter = rabit::LoadCheckPoint(&model); if (iter == 0) { model.InitModel(n); @@ -109,9 +108,9 @@ int main(int argc, char *argv[]) { } else { printf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } - for (int r = iter; r < 3; ++r) { + for (int r = iter; r < 3; ++r) { TestMax(&model, ntrial, r); - printf("[%d] !!!TestMax pass, iter=%d\n", rank, r); + printf("[%d] !!!TestMax pass, iter=%d\n", rank, r); int step = std::max(nproc / 3, 1); for (int i = 0; i < nproc; i += step) { TestBcast(n, i, ntrial, r); diff --git a/test/speed_test.cc b/test/speed_test.cc index 68891bd31..8eb543def 100644 --- a/test/speed_test.cc +++ b/test/speed_test.cc @@ -1,7 +1,6 @@ // This program is used to test the speed of rabit API -#include -#include -#include +#include +#include #include #include #include @@ -30,7 +29,7 @@ inline void TestSum(size_t n) { ndata[i] = (i * (rank+1)) % z; } double tstart = utils::GetTime(); - rabit::Allreduce(&ndata[0], ndata.size()); + rabit::Allreduce(&ndata[0], ndata.size()); sum_tdiff += utils::GetTime() - tstart; } @@ -45,9 +44,9 @@ inline void TestBcast(size_t n, int root) { if (root == rank) { res = s; } - double tstart = utils::GetTime(); + double tstart = utils::GetTime(); rabit::Broadcast(&res[0], res.length(), root); - bcast_tdiff += utils::GetTime() - tstart; + bcast_tdiff += utils::GetTime() - tstart; } inline void PrintStats(const char *name, double tdiff, int n, int nrep, size_t size) { diff --git a/test/test.mk b/test/test.mk index 282a82bc4..9dadadff0 100644 --- a/test/test.mk +++ b/test/test.mk @@ -1,29 +1,29 @@ # this is a makefile used to show testcases of rabit .PHONY: all -all: model_recover_10_10k model_recover_10_10k_die_same +all: model_recover_10_10k model_recover_10_10k_die_same model_recover_10_10k_die_hard local_recover_10_10k # this experiment test recovery with actually process exit, use keepalive to keep program alive model_recover_10_10k: - ../tracker/rabit_demo.py -n 10 model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 + ../dmlc-core/tracker/dmlc-submit --cluster local --num-workers=10 model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 model_recover_10_10k_die_same: - ../tracker/rabit_demo.py -n 10 model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 + ../dmlc-core/tracker/dmlc-submit --cluster local --num-workers=10 model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 model_recover_10_10k_die_hard: - ../tracker/rabit_demo.py -n 10 model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0 + ../dmlc-core/tracker/dmlc-submit --cluster local --num-workers=10 model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0 local_recover_10_10k: - ../tracker/rabit_demo.py -n 10 local_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=1,1,1,1 + ../dmlc-core/tracker/dmlc-submit --cluster local --num-workers=10 local_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=1,1,1,1 pylocal_recover_10_10k: - ../tracker/rabit_demo.py -n 10 ./local_recover.py 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=1,1,1,1 + ../dmlc-core/tracker/dmlc-submit --cluster local --num-workers=10 ./local_recover.py 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=1,1,1,1 lazy_recover_10_10k_die_hard: - ../tracker/rabit_demo.py -n 10 lazy_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0 + ../dmlc-core/tracker/dmlc-submit --cluster local --num-workers=10 lazy_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0 lazy_recover_10_10k_die_same: - ../tracker/rabit_demo.py -n 10 lazy_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 + ../dmlc-core/tracker/dmlc-submit --cluster local --num-workers=10 lazy_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 ringallreduce_10_10k: - ../tracker/rabit_demo.py -v 1 -n 10 model_recover 100 rabit_reduce_ring_mincount=10 + ../dmlc-core/tracker/dmlc-submit --cluster local -num-workers=10 model_recover 100 rabit_reduce_ring_mincount=10 diff --git a/windows/.gitignore b/windows/.gitignore deleted file mode 100644 index 3bc83e45f..000000000 --- a/windows/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -*.suo -*.exp -*sdf -*.exe -ipch -x64 -*.filters -Release -*.user diff --git a/windows/README.md b/windows/README.md deleted file mode 100644 index 9bdeb7988..000000000 --- a/windows/README.md +++ /dev/null @@ -1,12 +0,0 @@ -The solution has been created with Visual Studio Express 2010. -Make sure to compile the Release version - -Build -==== -* Build the project ```rabit``` , this will give you ```rabit.lib``` in ```x64\Release``` - -Build Your code with rabit -==== -* Add include to the dependency path of your project -* Add ```rabit.lib``` to the linker dependency -* The project basic is an example to show you how to build rabit with basic.cc diff --git a/windows/basic/basic.vcxproj b/windows/basic/basic.vcxproj deleted file mode 100644 index 109c405ef..000000000 --- a/windows/basic/basic.vcxproj +++ /dev/null @@ -1,118 +0,0 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {A6A95246-EB0A-46BA-9471-5939CB6B0006} - basic - - - - Application - true - MultiByte - - - Application - true - MultiByte - - - Application - false - true - MultiByte - - - Application - false - true - MultiByte - - - - - - - - - - - - - - - - - - - - - Level3 - Disabled - - - true - - - - - Level3 - Disabled - - - true - - - - - Level3 - MaxSpeed - true - true - - - true - true - true - - - - - Level3 - MaxSpeed - true - true - ..\..\include - MultiThreaded - - - true - true - true - $(OutDir)\rabit.lib;%(AdditionalDependencies) - - - - - - - - - \ No newline at end of file diff --git a/windows/rabit.sln b/windows/rabit.sln deleted file mode 100644 index bf61256d6..000000000 --- a/windows/rabit.sln +++ /dev/null @@ -1,50 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual Studio 2010 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rabit", "rabit\rabit.vcxproj", "{D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "basic", "basic\basic.vcxproj", "{A6A95246-EB0A-46BA-9471-5939CB6B0006}" - ProjectSection(ProjectDependencies) = postProject - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F} = {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rabit_wrapper", "rabit_wrapper\rabit_wrapper.vcxproj", "{2F89A7C5-CA4F-4D77-A728-6702D9F33F9F}" - ProjectSection(ProjectDependencies) = postProject - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F} = {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F} - EndProjectSection -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Debug|x64 = Debug|x64 - Release|Win32 = Release|Win32 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Debug|Win32.ActiveCfg = Debug|Win32 - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Debug|Win32.Build.0 = Debug|Win32 - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Debug|x64.ActiveCfg = Debug|x64 - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Debug|x64.Build.0 = Debug|x64 - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Release|Win32.ActiveCfg = Release|Win32 - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Release|Win32.Build.0 = Release|Win32 - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Release|x64.ActiveCfg = Release|x64 - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Release|x64.Build.0 = Release|x64 - {A6A95246-EB0A-46BA-9471-5939CB6B0006}.Debug|Win32.ActiveCfg = Debug|Win32 - {A6A95246-EB0A-46BA-9471-5939CB6B0006}.Debug|Win32.Build.0 = Debug|Win32 - {A6A95246-EB0A-46BA-9471-5939CB6B0006}.Debug|x64.ActiveCfg = Debug|Win32 - {A6A95246-EB0A-46BA-9471-5939CB6B0006}.Release|Win32.ActiveCfg = Release|Win32 - {A6A95246-EB0A-46BA-9471-5939CB6B0006}.Release|Win32.Build.0 = Release|Win32 - {A6A95246-EB0A-46BA-9471-5939CB6B0006}.Release|x64.ActiveCfg = Release|x64 - {A6A95246-EB0A-46BA-9471-5939CB6B0006}.Release|x64.Build.0 = Release|x64 - {2F89A7C5-CA4F-4D77-A728-6702D9F33F9F}.Debug|Win32.ActiveCfg = Debug|Win32 - {2F89A7C5-CA4F-4D77-A728-6702D9F33F9F}.Debug|Win32.Build.0 = Debug|Win32 - {2F89A7C5-CA4F-4D77-A728-6702D9F33F9F}.Debug|x64.ActiveCfg = Debug|Win32 - {2F89A7C5-CA4F-4D77-A728-6702D9F33F9F}.Release|Win32.ActiveCfg = Release|Win32 - {2F89A7C5-CA4F-4D77-A728-6702D9F33F9F}.Release|Win32.Build.0 = Release|Win32 - {2F89A7C5-CA4F-4D77-A728-6702D9F33F9F}.Release|x64.ActiveCfg = Release|x64 - {2F89A7C5-CA4F-4D77-A728-6702D9F33F9F}.Release|x64.Build.0 = Release|x64 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/windows/rabit/rabit.vcxproj b/windows/rabit/rabit.vcxproj deleted file mode 100644 index 5948e4c17..000000000 --- a/windows/rabit/rabit.vcxproj +++ /dev/null @@ -1,133 +0,0 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F} - rabit - - - - Application - true - MultiByte - - - Application - true - MultiByte - - - StaticLibrary - false - true - MultiByte - - - StaticLibrary - false - true - MultiByte - - - - - - - - - - - - - - - - - - - - - Level3 - Disabled - - - true - - - - - Level3 - Disabled - - - true - - - - - Level3 - MaxSpeed - true - true - MultiThreaded - - - true - true - true - - - - - Level3 - MaxSpeed - true - true - ..\..\include;%(AdditionalIncludeDirectories) - MultiThreaded - - - true - true - true - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/windows/rabit_wrapper/rabit_wrapper.vcxproj b/windows/rabit_wrapper/rabit_wrapper.vcxproj deleted file mode 100644 index 73eb5abb4..000000000 --- a/windows/rabit_wrapper/rabit_wrapper.vcxproj +++ /dev/null @@ -1,121 +0,0 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {2F89A7C5-CA4F-4D77-A728-6702D9F33F9F} - rabit_wrapper - - - - Application - true - MultiByte - - - Application - true - MultiByte - - - DynamicLibrary - false - true - MultiByte - - - DynamicLibrary - false - true - MultiByte - - - - - - - - - - - - - - - - - - - - - Level3 - Disabled - - - true - - - - - Level3 - Disabled - - - true - - - - - Level3 - MaxSpeed - true - true - - - true - true - true - ..\..\x64\Release\rabit.lib;%(AdditionalDependencies) - - - - - Level3 - MaxSpeed - true - true - ..\..\include - - - true - true - true - $(OutDir)\rabit.lib;%(AdditionalDependencies) - - - - - - - - - - - - \ No newline at end of file diff --git a/wrapper/rabit_wrapper.h b/wrapper/rabit_wrapper.h deleted file mode 100644 index d00a31fda..000000000 --- a/wrapper/rabit_wrapper.h +++ /dev/null @@ -1,126 +0,0 @@ -/*! - * Copyright by Contributors - * \file rabit_wrapper.h - * \author Tianqi Chen - * \brief a C style wrapper of rabit - * can be used to create wrapper of other languages - */ -#ifndef RABIT_WRAPPER_H_ -#define RABIT_WRAPPER_H_ -#ifdef _MSC_VER -#define RABIT_DLL __declspec(dllexport) -#else -#define RABIT_DLL -#endif -// manually define unsign long -typedef unsigned long rbt_ulong; // NOLINT(*) - -#ifdef __cplusplus -extern "C" { -#endif -/*! - * \brief intialize the rabit module, call this once before using anything - * \param argc number of arguments in argv - * \param argv the array of input arguments - */ - RABIT_DLL void RabitInit(int argc, char *argv[]); - /*! - * \brief finalize the rabit engine, call this function after you finished all jobs - */ - RABIT_DLL void RabitFinalize(void); - /*! \brief get rank of current process */ - RABIT_DLL int RabitGetRank(void); - /*! \brief get total number of process */ - RABIT_DLL int RabitGetWorldSize(void); - /*! - * \brief print the msg to the tracker, - * this function can be used to communicate the information of the progress to - * the user who monitors the tracker - * \param msg the message to be printed - */ - RABIT_DLL void RabitTrackerPrint(const char *msg); - /*! - * \brief get name of processor - * \param out_name hold output string - * \param out_len hold length of output string - * \param max_len maximum buffer length of input - */ - RABIT_DLL void RabitGetProcessorName(char *out_name, - rbt_ulong *out_len, - rbt_ulong max_len); - /*! - * \brief broadcast an memory region to all others from root - * - * Example: int a = 1; Broadcast(&a, sizeof(a), root); - * \param sendrecv_data the pointer to send or recive buffer, - * \param size the size of the data - * \param root the root of process - */ - RABIT_DLL void RabitBroadcast(void *sendrecv_data, - rbt_ulong size, int root); - /*! - * \brief perform in-place allreduce, on sendrecvbuf - * this function is NOT thread-safe - * - * Example Usage: the following code gives sum of the result - * vector data(10); - * ... - * Allreduce(&data[0], data.size()); - * ... - * \param sendrecvbuf buffer for both sending and recving data - * \param count number of elements to be reduced - * \param enum_dtype the enumeration of data type, see rabit::engine::mpi::DataType in engine.h of rabit include - * \param enum_op the enumeration of operation type, see rabit::engine::mpi::OpType in engine.h of rabit - * \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) - * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. - * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called - * \param prepare_arg argument used to passed into the lazy preprocessing function - */ - RABIT_DLL void RabitAllreduce(void *sendrecvbuf, - size_t count, - int enum_dtype, - int enum_op, - void (*prepare_fun)(void *arg), - void *prepare_arg); - - /*! - * \brief load latest check point - * \param out_global_model hold output of serialized global_model - * \param out_global_len the output length of serialized global model - * \param out_local_model hold output of serialized local_model, can be NULL - * \param out_local_len the output length of serialized local model, can be NULL - * - * \return the version number of check point loaded - * if returned version == 0, this means no model has been CheckPointed - * nothing will be touched - */ - RABIT_DLL int RabitLoadCheckPoint(char **out_global_model, - rbt_ulong *out_global_len, - char **out_local_model, - rbt_ulong *out_local_len); - /*! - * \brief checkpoint the model, meaning we finished a stage of execution - * every time we call check point, there is a version number which will increase by one - * - * \param global_model hold content of serialized global_model - * \param global_len the content length of serialized global model - * \param local_model hold content of serialized local_model, can be NULL - * \param local_len the content length of serialized local model, can be NULL - * - * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will - * bring replication cost in CheckPoint function. global_model do not need explicit replication. - * So only CheckPoint with global_model if possible - */ - RABIT_DLL void RabitCheckPoint(const char *global_model, - rbt_ulong global_len, - const char *local_model, - rbt_ulong local_len); - /*! - * \return version number of current stored model, - * which means how many calls to CheckPoint we made so far - */ - RABIT_DLL int RabitVersionNumber(void); -#ifdef __cplusplus -} // C -#endif -#endif // RABIT_WRAPPER_H_